Add offline policy builder and sacred eval gate

2026-03-21 07:37:59 +00:00 · 2026-03-21 07:37:59 +00:00 · 6bba85fe9a
commit 6bba85fe9a
parent 94eae8ceba
3 changed files with 141 additions and 0 deletions
--- a/syncpatch/train-policy-offline
+++ b/syncpatch/train-policy-offline
@ -0,0 +1,66 @@
+#!/usr/bin/env python3
+from __future__ import annotations
+
+import json
+from datetime import datetime, timezone
+from pathlib import Path
+
+ROOT = Path('/home/openclaw/.openclaw/workspace')
+POLICY_STATS = ROOT / 'data' / 'policy_stats.json'
+OUT_PATH = ROOT / 'data' / 'policy_candidate.json'
+
+
+def beta_mean(success: int, failure: int, alpha: float = 1.0, beta: float = 1.0) -> float:
+    return (success + alpha) / (success + failure + alpha + beta)
+
+
+def classify_mode(row: dict) -> str:
+    count = int(row.get('count', 0) or 0)
+    success = int(row.get('success', 0) or 0)
+    failure = int(row.get('failure', 0) or 0)
+    clar = int(row.get('clarification', 0) or 0)
+    reward_sum = float(row.get('reward_sum', 0.0) or 0.0)
+    avg_reward = reward_sum / count if count else 0.0
+    if count >= 3 and success >= max(2, failure + clar) and avg_reward >= 3.0:
+        return 'prefer'
+    if count >= 3 and failure > success and avg_reward < 0.5:
+        return 'avoid'
+    return 'observe'
+
+
+def main() -> int:
+    try:
+        stats = json.loads(POLICY_STATS.read_text(encoding='utf-8'))
+    except Exception as exc:
+        print(json.dumps({'ok': False, 'error': f'cannot_read_policy_stats: {exc}'}, ensure_ascii=False))
+        return 1
+
+    candidate = {
+        'generated_at': datetime.now(timezone.utc).isoformat(),
+        'plans': {},
+        'families': {},
+    }
+
+    for bucket in ('plans', 'families'):
+        for key, row in (stats.get(bucket, {}) or {}).items():
+            success = int(row.get('success', 0) or 0)
+            failure = int(row.get('failure', 0) or 0) + int(row.get('clarification', 0) or 0)
+            count = int(row.get('count', 0) or 0)
+            reward_sum = float(row.get('reward_sum', 0.0) or 0.0)
+            candidate[bucket][key] = {
+                'count': count,
+                'success': success,
+                'failure_like': failure,
+                'avg_reward': (reward_sum / count if count else 0.0),
+                'beta_mean': beta_mean(success, failure),
+                'mode': classify_mode(row),
+            }
+
+    OUT_PATH.parent.mkdir(parents=True, exist_ok=True)
+    OUT_PATH.write_text(json.dumps(candidate, ensure_ascii=False, indent=2), encoding='utf-8')
+    print(json.dumps({'ok': True, 'path': str(OUT_PATH), 'plans': len(candidate['plans']), 'families': len(candidate['families'])}, ensure_ascii=False))
+    return 0
+
+
+if __name__ == '__main__':
+    raise SystemExit(main())