Add offline policy builder and sacred eval gate

2026-03-21 07:37:59 +00:00 · 2026-03-21 07:37:59 +00:00 · 6bba85fe9a
commit 6bba85fe9a
parent 94eae8ceba
3 changed files with 141 additions and 0 deletions
--- a/syncpatch/run-sacred-evals
+++ b/syncpatch/run-sacred-evals
@ -0,0 +1,69 @@
+#!/usr/bin/env python3
+from __future__ import annotations
+
+import json
+import subprocess
+from pathlib import Path
+
+ROOT = Path('/home/openclaw/.openclaw/workspace')
+CONFIG = ROOT / 'configs' / 'sacred_eval_gate.json'
+RESULT = ROOT / 'evals' / 'results' / 'regression_results.json'
+RUNNER = ROOT / 'evals' / 'run_regression.py'
+
+
+def load_config() -> dict:
+    try:
+        return json.loads(CONFIG.read_text(encoding='utf-8'))
+    except Exception:
+        return {
+            'min_route_accuracy_percent': 95.0,
+            'min_memory_signal_accuracy_percent': 90.0,
+            'max_timeouts': 0,
+            'require_pass': True,
+        }
+
+
+def run_regression() -> int:
+    proc = subprocess.run(['python3', str(RUNNER)], capture_output=True, text=True)
+    return proc.returncode
+
+
+def main() -> int:
+    cfg = load_config()
+    rc = run_regression()
+    if rc != 0:
+        print(json.dumps({'ok': False, 'decision': 'hold', 'reason': 'runner_failed', 'runner_rc': rc}, ensure_ascii=False))
+        return 1
+    data = json.loads(RESULT.read_text(encoding='utf-8'))
+    route = float(data.get('route_accuracy_percent', 0.0) or 0.0)
+    memory = float(data.get('memory_signal_accuracy_percent', 0.0) or 0.0)
+    timeouts = int(data.get('timeouts', 0) or 0)
+    passed = bool(data.get('pass'))
+    ok = True
+    reasons = []
+    if route < float(cfg.get('min_route_accuracy_percent', 95.0)):
+        ok = False
+        reasons.append('route_accuracy_below_threshold')
+    if memory < float(cfg.get('min_memory_signal_accuracy_percent', 90.0)):
+        ok = False
+        reasons.append('memory_accuracy_below_threshold')
+    if timeouts > int(cfg.get('max_timeouts', 0)):
+        ok = False
+        reasons.append('timeouts_above_threshold')
+    if bool(cfg.get('require_pass', True)) and not passed:
+        ok = False
+        reasons.append('regression_not_pass')
+    print(json.dumps({
+        'ok': ok,
+        'decision': 'promote' if ok else 'hold',
+        'reasons': reasons,
+        'route_accuracy_percent': route,
+        'memory_signal_accuracy_percent': memory,
+        'timeouts': timeouts,
+        'pass': passed,
+    }, ensure_ascii=False))
+    return 0 if ok else 1
+
+
+if __name__ == '__main__':
+    raise SystemExit(main())