From b1320a65f0bdd8ce4b12407bd5d619489a2e2cf5 Mon Sep 17 00:00:00 2001 From: Openclaw Date: Sat, 21 Mar 2026 07:49:32 +0000 Subject: [PATCH] Stabilize sacred gate and policy thresholds --- syncpatch/run-sacred-evals | 29 ++++++++++++++++++++++++----- syncpatch/train-policy-offline | 5 +++-- 2 files changed, 27 insertions(+), 7 deletions(-) diff --git a/syncpatch/run-sacred-evals b/syncpatch/run-sacred-evals index 8f9c769..826ab21 100644 --- a/syncpatch/run-sacred-evals +++ b/syncpatch/run-sacred-evals @@ -20,19 +20,38 @@ def load_config() -> dict: 'min_memory_signal_accuracy_percent': 90.0, 'max_timeouts': 0, 'require_pass': True, + 'runner_timeout_seconds': 180, + 'runner_start': 0, + 'runner_limit': 14, } -def run_regression() -> int: - proc = subprocess.run(['python3', str(RUNNER)], capture_output=True, text=True) - return proc.returncode +def run_regression(cfg: dict) -> tuple[int, str, str]: + cmd = ['python3', str(RUNNER)] + start = cfg.get('runner_start', None) + limit = cfg.get('runner_limit', None) + if start is not None: + cmd.extend(['--start', str(start)]) + if limit is not None: + cmd.extend(['--limit', str(limit)]) + proc = subprocess.run( + cmd, + capture_output=True, + text=True, + timeout=int(cfg.get('runner_timeout_seconds', 180) or 180), + ) + return proc.returncode, proc.stdout, proc.stderr def main() -> int: cfg = load_config() - rc = run_regression() + try: + rc, stdout, stderr = run_regression(cfg) + except subprocess.TimeoutExpired: + print(json.dumps({'ok': False, 'decision': 'hold', 'reason': 'runner_timeout'}, ensure_ascii=False)) + return 1 if rc != 0: - print(json.dumps({'ok': False, 'decision': 'hold', 'reason': 'runner_failed', 'runner_rc': rc}, ensure_ascii=False)) + print(json.dumps({'ok': False, 'decision': 'hold', 'reason': 'runner_failed', 'runner_rc': rc, 'stderr': (stderr or '')[:300]}, ensure_ascii=False)) return 1 data = json.loads(RESULT.read_text(encoding='utf-8')) route = float(data.get('route_accuracy_percent', 0.0) or 0.0) diff --git a/syncpatch/train-policy-offline b/syncpatch/train-policy-offline index bff7535..972e615 100644 --- a/syncpatch/train-policy-offline +++ b/syncpatch/train-policy-offline @@ -21,9 +21,10 @@ def classify_mode(row: dict) -> str: clar = int(row.get('clarification', 0) or 0) reward_sum = float(row.get('reward_sum', 0.0) or 0.0) avg_reward = reward_sum / count if count else 0.0 - if count >= 3 and success >= max(2, failure + clar) and avg_reward >= 3.0: + failure_like = failure + clar + if count >= 3 and success >= 3 and success >= (failure_like + 2) and avg_reward >= 3.0: return 'prefer' - if count >= 3 and failure > success and avg_reward < 0.5: + if count >= 3 and failure_like >= 2 and failure_like > success and avg_reward < 1.0: return 'avoid' return 'observe'