Add bandit priors for shadow meta-controller

2026-03-21 07:44:20 +00:00 · 2026-03-21 07:44:20 +00:00 · 1e1bd0dc4a
commit 1e1bd0dc4a
parent 6bba85fe9a
3 changed files with 90 additions and 1 deletions
--- a/syncpatch/meta_controller.py
+++ b/syncpatch/meta_controller.py
@ -6,6 +6,7 @@ from typing import Any

 from tool_graph import build_tool_graph
 from uncertainty_model import estimate_uncertainty
+from bandit_policy import load_policy_candidate, apply_bandit_bias



@ -37,6 +38,19 @@ def shadow_decision(message: str, analysis: dict[str, Any], family_candidates: l
        decision = 'run_plan'
        reason = 'weak_grounding_under_uncertainty'

+    chosen_plan = str(analysis.get('composition_reason') or 'single_tool')
+    policy = load_policy_candidate()
+    bandit = apply_bandit_bias(
+        base_decision=decision,
+        base_reason=reason,
+        chosen_plan=chosen_plan,
+        families=families,
+        uncertainty=uncertainty,
+        policy=policy,
+    )
+    decision = bandit.get('decision', decision)
+    reason = bandit.get('reason', reason)
+
    return {
        'ts': datetime.now(timezone.utc).isoformat(),
        'message': message,
@ -47,7 +61,11 @@ def shadow_decision(message: str, analysis: dict[str, Any], family_candidates: l
        'uncertainty': uncertainty,
        'family_candidates': families,
        'normalized_task': f"{analysis.get('role','')}:{analysis.get('task_type','')}",
-        'chosen_plan': str(analysis.get('composition_reason') or 'single_tool'),
+        'chosen_plan': chosen_plan,
+        'policy_hint': {
+            'plan_prior': bandit.get('plan_prior', {}),
+            'family_priors': bandit.get('family_priors', []),
+        },
    }