openclaw-intelligence-core-.../syncpatch/train-policy-offline

#!/usr/bin/env python3
from __future__ import annotations

import json
from datetime import datetime, timezone
from pathlib import Path

ROOT = Path('/home/openclaw/.openclaw/workspace')
POLICY_STATS = ROOT / 'data' / 'policy_stats.json'
OUT_PATH = ROOT / 'data' / 'policy_candidate.json'


def beta_mean(success: int, failure: int, alpha: float = 1.0, beta: float = 1.0) -> float:
    return (success + alpha) / (success + failure + alpha + beta)


def classify_mode(row: dict) -> str:
    count = int(row.get('count', 0) or 0)
    success = int(row.get('success', 0) or 0)
    failure = int(row.get('failure', 0) or 0)
    clar = int(row.get('clarification', 0) or 0)
    reward_sum = float(row.get('reward_sum', 0.0) or 0.0)
    avg_reward = reward_sum / count if count else 0.0
    failure_like = failure + clar
    if count >= 3 and success >= 3 and success >= (failure_like + 2) and avg_reward >= 3.0:
        return 'prefer'
    if count >= 3 and failure_like >= 2 and failure_like > success and avg_reward < 1.0:
        return 'avoid'
    return 'observe'


def main() -> int:
    try:
        stats = json.loads(POLICY_STATS.read_text(encoding='utf-8'))
    except Exception as exc:
        print(json.dumps({'ok': False, 'error': f'cannot_read_policy_stats: {exc}'}, ensure_ascii=False))
        return 1

    candidate = {
        'generated_at': datetime.now(timezone.utc).isoformat(),
        'plans': {},
        'families': {},
    }

    for bucket in ('plans', 'families'):
        for key, row in (stats.get(bucket, {}) or {}).items():
            success = int(row.get('success', 0) or 0)
            failure = int(row.get('failure', 0) or 0) + int(row.get('clarification', 0) or 0)
            count = int(row.get('count', 0) or 0)
            reward_sum = float(row.get('reward_sum', 0.0) or 0.0)
            candidate[bucket][key] = {
                'count': count,
                'success': success,
                'failure_like': failure,
                'avg_reward': (reward_sum / count if count else 0.0),
                'alpha': success + 1.0,
                'beta': failure + 1.0,
                'beta_mean': beta_mean(success, failure),
                'mode': classify_mode(row),
            }

    OUT_PATH.parent.mkdir(parents=True, exist_ok=True)
    OUT_PATH.write_text(json.dumps(candidate, ensure_ascii=False, indent=2), encoding='utf-8')
    print(json.dumps({'ok': True, 'path': str(OUT_PATH), 'plans': len(candidate['plans']), 'families': len(candidate['families'])}, ensure_ascii=False))
    return 0


if __name__ == '__main__':
    raise SystemExit(main())
Add offline policy builder and sacred eval gate 2026-03-21 07:37:59 +00:00			`#!/usr/bin/env python3`
			`from __future__ import annotations`

			`import json`
			`from datetime import datetime, timezone`
			`from pathlib import Path`

			`ROOT = Path('/home/openclaw/.openclaw/workspace')`
			`POLICY_STATS = ROOT / 'data' / 'policy_stats.json'`
			`OUT_PATH = ROOT / 'data' / 'policy_candidate.json'`


			`def beta_mean(success: int, failure: int, alpha: float = 1.0, beta: float = 1.0) -> float:`
			`return (success + alpha) / (success + failure + alpha + beta)`


			`def classify_mode(row: dict) -> str:`
			`count = int(row.get('count', 0) or 0)`
			`success = int(row.get('success', 0) or 0)`
			`failure = int(row.get('failure', 0) or 0)`
			`clar = int(row.get('clarification', 0) or 0)`
			`reward_sum = float(row.get('reward_sum', 0.0) or 0.0)`
			`avg_reward = reward_sum / count if count else 0.0`
Stabilize sacred gate and policy thresholds 2026-03-21 07:49:32 +00:00			`failure_like = failure + clar`
			`if count >= 3 and success >= 3 and success >= (failure_like + 2) and avg_reward >= 3.0:`
Add offline policy builder and sacred eval gate 2026-03-21 07:37:59 +00:00			`return 'prefer'`
Stabilize sacred gate and policy thresholds 2026-03-21 07:49:32 +00:00			`if count >= 3 and failure_like >= 2 and failure_like > success and avg_reward < 1.0:`
Add offline policy builder and sacred eval gate 2026-03-21 07:37:59 +00:00			`return 'avoid'`
			`return 'observe'`


			`def main() -> int:`
			`try:`
			`stats = json.loads(POLICY_STATS.read_text(encoding='utf-8'))`
			`except Exception as exc:`
			`print(json.dumps({'ok': False, 'error': f'cannot_read_policy_stats: {exc}'}, ensure_ascii=False))`
			`return 1`

			`candidate = {`
			`'generated_at': datetime.now(timezone.utc).isoformat(),`
			`'plans': {},`
			`'families': {},`
			`}`

			`for bucket in ('plans', 'families'):`
			`for key, row in (stats.get(bucket, {}) or {}).items():`
			`success = int(row.get('success', 0) or 0)`
			`failure = int(row.get('failure', 0) or 0) + int(row.get('clarification', 0) or 0)`
			`count = int(row.get('count', 0) or 0)`
			`reward_sum = float(row.get('reward_sum', 0.0) or 0.0)`
			`candidate[bucket][key] = {`
			`'count': count,`
			`'success': success,`
			`'failure_like': failure,`
			`'avg_reward': (reward_sum / count if count else 0.0),`
Add bandit priors for shadow meta-controller 2026-03-21 07:44:20 +00:00			`'alpha': success + 1.0,`
			`'beta': failure + 1.0,`
Add offline policy builder and sacred eval gate 2026-03-21 07:37:59 +00:00			`'beta_mean': beta_mean(success, failure),`
			`'mode': classify_mode(row),`
			`}`

			`OUT_PATH.parent.mkdir(parents=True, exist_ok=True)`
			`OUT_PATH.write_text(json.dumps(candidate, ensure_ascii=False, indent=2), encoding='utf-8')`
			`print(json.dumps({'ok': True, 'path': str(OUT_PATH), 'plans': len(candidate['plans']), 'families': len(candidate['families'])}, ensure_ascii=False))`
			`return 0`


			`if __name__ == '__main__':`
			`raise SystemExit(main())`