Stabilize sacred gate and policy thresholds

2026-03-21 07:49:32 +00:00 · 2026-03-21 07:49:32 +00:00 · b1320a65f0
commit b1320a65f0
parent 1e1bd0dc4a
2 changed files with 27 additions and 7 deletions
--- a/syncpatch/train-policy-offline
+++ b/syncpatch/train-policy-offline
@ -21,9 +21,10 @@ def classify_mode(row: dict) -> str:
    clar = int(row.get('clarification', 0) or 0)
    reward_sum = float(row.get('reward_sum', 0.0) or 0.0)
    avg_reward = reward_sum / count if count else 0.0
-    if count >= 3 and success >= max(2, failure + clar) and avg_reward >= 3.0:
+    failure_like = failure + clar
+    if count >= 3 and success >= 3 and success >= (failure_like + 2) and avg_reward >= 3.0:
        return 'prefer'
-    if count >= 3 and failure > success and avg_reward < 0.5:
+    if count >= 3 and failure_like >= 2 and failure_like > success and avg_reward < 1.0:
        return 'avoid'
    return 'observe'