From b1320a65f0bdd8ce4b12407bd5d619489a2e2cf5 Mon Sep 17 00:00:00 2001
From: Openclaw <openclaw@local>
Date: Sat, 21 Mar 2026 07:49:32 +0000
Subject: [PATCH] Stabilize sacred gate and policy thresholds

---
 syncpatch/run-sacred-evals     | 29 ++++++++++++++++++++++++-----
 syncpatch/train-policy-offline |  5 +++--
 2 files changed, 27 insertions(+), 7 deletions(-)

diff --git a/syncpatch/run-sacred-evals b/syncpatch/run-sacred-evals
index 8f9c769..826ab21 100644
--- a/syncpatch/run-sacred-evals
+++ b/syncpatch/run-sacred-evals
@@ -20,19 +20,38 @@ def load_config() -> dict:
             'min_memory_signal_accuracy_percent': 90.0,
             'max_timeouts': 0,
             'require_pass': True,
+            'runner_timeout_seconds': 180,
+            'runner_start': 0,
+            'runner_limit': 14,
         }
 
 
-def run_regression() -> int:
-    proc = subprocess.run(['python3', str(RUNNER)], capture_output=True, text=True)
-    return proc.returncode
+def run_regression(cfg: dict) -> tuple[int, str, str]:
+    cmd = ['python3', str(RUNNER)]
+    start = cfg.get('runner_start', None)
+    limit = cfg.get('runner_limit', None)
+    if start is not None:
+        cmd.extend(['--start', str(start)])
+    if limit is not None:
+        cmd.extend(['--limit', str(limit)])
+    proc = subprocess.run(
+        cmd,
+        capture_output=True,
+        text=True,
+        timeout=int(cfg.get('runner_timeout_seconds', 180) or 180),
+    )
+    return proc.returncode, proc.stdout, proc.stderr
 
 
 def main() -> int:
     cfg = load_config()
-    rc = run_regression()
+    try:
+        rc, stdout, stderr = run_regression(cfg)
+    except subprocess.TimeoutExpired:
+        print(json.dumps({'ok': False, 'decision': 'hold', 'reason': 'runner_timeout'}, ensure_ascii=False))
+        return 1
     if rc != 0:
-        print(json.dumps({'ok': False, 'decision': 'hold', 'reason': 'runner_failed', 'runner_rc': rc}, ensure_ascii=False))
+        print(json.dumps({'ok': False, 'decision': 'hold', 'reason': 'runner_failed', 'runner_rc': rc, 'stderr': (stderr or '')[:300]}, ensure_ascii=False))
         return 1
     data = json.loads(RESULT.read_text(encoding='utf-8'))
     route = float(data.get('route_accuracy_percent', 0.0) or 0.0)
diff --git a/syncpatch/train-policy-offline b/syncpatch/train-policy-offline
index bff7535..972e615 100644
--- a/syncpatch/train-policy-offline
+++ b/syncpatch/train-policy-offline
@@ -21,9 +21,10 @@ def classify_mode(row: dict) -> str:
     clar = int(row.get('clarification', 0) or 0)
     reward_sum = float(row.get('reward_sum', 0.0) or 0.0)
     avg_reward = reward_sum / count if count else 0.0
-    if count >= 3 and success >= max(2, failure + clar) and avg_reward >= 3.0:
+    failure_like = failure + clar
+    if count >= 3 and success >= 3 and success >= (failure_like + 2) and avg_reward >= 3.0:
         return 'prefer'
-    if count >= 3 and failure > success and avg_reward < 0.5:
+    if count >= 3 and failure_like >= 2 and failure_like > success and avg_reward < 1.0:
         return 'avoid'
     return 'observe'