Initial Phase A intelligence core

2026-03-21 07:34:09 +00:00 · 2026-03-21 07:34:09 +00:00 · 94eae8ceba
commit 94eae8ceba
11 changed files with 4261 additions and 0 deletions
--- a/syncpatch/agent-orchestrate
+++ b/syncpatch/agent-orchestrate
--- a/syncpatch/meta_controller.py
+++ b/syncpatch/meta_controller.py
@ -0,0 +1,61 @@
+from __future__ import annotations
+
+import json
+from datetime import datetime, timezone
+from typing import Any
+
+from tool_graph import build_tool_graph
+from uncertainty_model import estimate_uncertainty
+
+
+
+def shadow_decision(message: str, analysis: dict[str, Any], family_candidates: list[dict[str, Any]] | None, tool_registry: dict[str, dict[str, Any]]) -> dict[str, Any]:
+    graph = build_tool_graph(tool_registry)
+    uncertainty = estimate_uncertainty(message, analysis, family_candidates)
+    tools = list(analysis.get('tools') or [])
+    families = [str((item or {}).get('family') or '') for item in (family_candidates or []) if (item or {}).get('family')]
+
+    decision = 'answer_direct'
+    reason = 'single_grounded_or_low_uncertainty'
+    suggested_memory_mode = ''
+
+    if uncertainty['level'] == 'high' and 'ambiguous_access' in families:
+        decision = 'ask_clarification'
+        reason = 'ambiguous_service_access'
+    elif analysis.get('needs_memory') and analysis.get('needs_setup_context'):
+        decision = 'run_plan'
+        reason = 'mixed_memory_plus_setup'
+        suggested_memory_mode = 'setup'
+    elif analysis.get('needs_memory'):
+        decision = 'use_memory_mode'
+        reason = 'memory_required'
+        suggested_memory_mode = 'profile' if analysis.get('task_type') == 'memory' else 'preference'
+    elif analysis.get('needs_setup_context') or len(tools) > 1:
+        decision = 'run_plan'
+        reason = 'evidence_required'
+    elif uncertainty['level'] == 'medium' and graph.get(tools[0], None) and graph[tools[0]].groundedness == 'weak':
+        decision = 'run_plan'
+        reason = 'weak_grounding_under_uncertainty'
+
+    return {
+        'ts': datetime.now(timezone.utc).isoformat(),
+        'message': message,
+        'decision': decision,
+        'reason': reason,
+        'suggested_memory_mode': suggested_memory_mode,
+        'suggested_tools': tools,
+        'uncertainty': uncertainty,
+        'family_candidates': families,
+        'normalized_task': f"{analysis.get('role','')}:{analysis.get('task_type','')}",
+        'chosen_plan': str(analysis.get('composition_reason') or 'single_tool'),
+    }
+
+
+
+def log_shadow_decision(log_path, decision_row: dict[str, Any]) -> None:
+    try:
+        log_path.parent.mkdir(parents=True, exist_ok=True)
+        with log_path.open('a', encoding='utf-8') as f:
+            f.write(json.dumps(decision_row, ensure_ascii=False) + '\n')
+    except Exception:
+        pass
--- a/syncpatch/outcome_logging.py
+++ b/syncpatch/outcome_logging.py
@ -0,0 +1,140 @@
+#!/usr/bin/env python3
+from __future__ import annotations
+
+import json
+from datetime import datetime, timezone
+
+from trajectory_schema import build_trajectory_record
+from replay_buffer import append_replay_record, update_policy_stats
+from reward_signals import derive_cap_breaches, reward_row
+
+
+def log_intent_family_shadow(
+    message: str,
+    family_info: dict,
+    before_tools: list[str],
+    after_tools: list[str],
+    *,
+    log_path,
+    collect_intent_families,
+    service_hints,
+) -> None:
+    family = str((family_info or {}).get('family') or '')
+    if not family:
+        return
+    try:
+        log_path.parent.mkdir(parents=True, exist_ok=True)
+        try:
+            candidates = [item.get('family') for item in collect_intent_families(message, service_hints) if item.get('family')]
+        except Exception:
+            candidates = [family]
+        row = {
+            'ts': datetime.now(timezone.utc).isoformat(),
+            'message': message,
+            'family': family,
+            'family_candidates': candidates,
+            'before_tool': before_tools[0] if before_tools else '',
+            'after_tool': after_tools[0] if after_tools else '',
+            'overridden': (before_tools[:1] != after_tools[:1]),
+        }
+        with log_path.open('a', encoding='utf-8') as f:
+            f.write(json.dumps(row, ensure_ascii=False) + '\n')
+    except Exception:
+        pass
+
+
+def log_intent_composition(
+    message: str,
+    family_candidates: list[dict],
+    analysis_before: dict,
+    analysis_after: dict,
+    composition: dict,
+    *,
+    log_path,
+) -> None:
+    if not composition or not composition.get('composed'):
+        return
+    try:
+        log_path.parent.mkdir(parents=True, exist_ok=True)
+        row = {
+            'ts': datetime.now(timezone.utc).isoformat(),
+            'message': message,
+            'family_candidates': [item.get('family') for item in family_candidates if item.get('family')],
+            'before_tools': list(analysis_before.get('tools') or []),
+            'after_tools': list(analysis_after.get('tools') or []),
+            'reason': composition.get('reason', ''),
+            'policy': composition.get('policy', analysis_after.get('composition_policy', '')),
+            'force_sequential': bool(analysis_after.get('force_sequential')),
+        }
+        with log_path.open('a', encoding='utf-8') as f:
+            f.write(json.dumps(row, ensure_ascii=False) + '\n')
+    except Exception:
+        pass
+
+
+def record_task_outcome(
+    message: str,
+    analysis: dict,
+    final_text: str,
+    evidence_items: list[dict],
+    *,
+    status: str = 'success',
+    error_text: str = '',
+    log_path,
+    classify_intent_family,
+    collect_intent_families,
+    service_hints,
+    refresh_composition_policy_async,
+) -> None:
+    try:
+        log_path.parent.mkdir(parents=True, exist_ok=True)
+        grounded_items = [item for item in evidence_items if item.get('grounded')]
+        try:
+            candidates = [item.get('family') for item in collect_intent_families(message, service_hints) if item.get('family')]
+        except Exception:
+            candidates = []
+        family = (classify_intent_family(message, service_hints) or {}).get('family', '')
+        cap_breaches = derive_cap_breaches(error_text, analysis, evidence_items)
+        row = {
+            'ts': datetime.now(timezone.utc).isoformat(),
+            'status': status,
+            'message': message,
+            'role': analysis.get('role'),
+            'task_type': analysis.get('task_type'),
+            'planned_tools': list(analysis.get('tools') or []),
+            'used_tools': [item.get('tool') for item in evidence_items],
+            'family': family,
+            'family_candidates': candidates,
+            'grounded_count': len(grounded_items),
+            'evidence_count': len(evidence_items),
+            'answer_len': len((final_text or '').strip()),
+            'needs_memory': bool(analysis.get('needs_memory')),
+            'needs_setup_context': bool(analysis.get('needs_setup_context')),
+            'error_text': (error_text or '')[:300],
+            'composition_reason': str(analysis.get('composition_reason') or ''),
+            'composition_policy': str(analysis.get('composition_policy') or ''),
+            'cap_breaches': cap_breaches,
+        }
+        reward_info = reward_row(status, analysis, evidence_items, final_text, cap_breaches=cap_breaches)
+        row.update(reward_info)
+        with log_path.open('a', encoding='utf-8') as f:
+            f.write(json.dumps(row, ensure_ascii=False) + '\n')
+        record = build_trajectory_record(
+            message=message,
+            analysis=analysis,
+            final_text=final_text,
+            evidence_items=evidence_items,
+            status=status,
+            family=family,
+            family_candidates=candidates,
+            error_text=error_text,
+            ts=row['ts'],
+            cap_breaches=cap_breaches,
+        ).to_dict()
+        record.update(reward_info)
+        append_replay_record(record)
+        update_policy_stats(record)
+        if row.get('composition_reason') or row.get('composition_policy'):
+            refresh_composition_policy_async()
+    except Exception:
+        pass
--- a/syncpatch/replay_buffer.py
+++ b/syncpatch/replay_buffer.py
@ -0,0 +1,63 @@
+from __future__ import annotations
+
+import json
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Any
+
+
+DEFAULT_REPLAY_ROOT = Path('/home/openclaw/.openclaw/workspace/data/replay_buffer')
+DEFAULT_POLICY_STATS = Path('/home/openclaw/.openclaw/workspace/data/policy_stats.json')
+
+
+
+def _safe_slug(value: str) -> str:
+    out = ''.join(ch if ch.isalnum() or ch in {'_', '-'} else '_' for ch in (value or 'unknown').strip().lower())
+    return out[:80] or 'unknown'
+
+
+
+def replay_path(record: dict[str, Any], replay_root: Path = DEFAULT_REPLAY_ROOT) -> Path:
+    ts = str(record.get('ts') or datetime.now(timezone.utc).isoformat())
+    day = ts[:10]
+    task = _safe_slug(str(record.get('normalized_task') or 'unknown'))
+    return replay_root / day / f'{task}.jsonl'
+
+
+
+def append_replay_record(record: dict[str, Any], replay_root: Path = DEFAULT_REPLAY_ROOT) -> Path:
+    path = replay_path(record, replay_root)
+    path.parent.mkdir(parents=True, exist_ok=True)
+    with path.open('a', encoding='utf-8') as f:
+        f.write(json.dumps(record, ensure_ascii=False) + '\n')
+    return path
+
+
+
+def update_policy_stats(record: dict[str, Any], stats_path: Path = DEFAULT_POLICY_STATS) -> dict[str, Any]:
+    stats_path.parent.mkdir(parents=True, exist_ok=True)
+    try:
+        data = json.loads(stats_path.read_text(encoding='utf-8'))
+    except Exception:
+        data = {'plans': {}, 'families': {}, 'updated_at': ''}
+
+    plan_key = str(record.get('chosen_plan') or 'single_tool')
+    family_key = str(record.get('family') or 'unknown')
+    reward = float(record.get('reward') or 0.0)
+    status = str(record.get('outcome_status') or '')
+
+    for bucket_name, key in [('plans', plan_key), ('families', family_key)]:
+        bucket = data.setdefault(bucket_name, {})
+        row = bucket.setdefault(key, {'count': 0, 'success': 0, 'failure': 0, 'clarification': 0, 'reward_sum': 0.0})
+        row['count'] += 1
+        row['reward_sum'] += reward
+        if status == 'success':
+            row['success'] += 1
+        elif status == 'needs_clarification':
+            row['clarification'] += 1
+        else:
+            row['failure'] += 1
+
+    data['updated_at'] = datetime.now(timezone.utc).isoformat()
+    stats_path.write_text(json.dumps(data, ensure_ascii=False, indent=2), encoding='utf-8')
+    return data
--- a/syncpatch/reward_signals.py
+++ b/syncpatch/reward_signals.py
@ -0,0 +1,62 @@
+from __future__ import annotations
+
+from typing import Any
+
+
+BASE_REWARDS = {
+    'success': 5.0,
+    'needs_clarification': 1.0,
+    'tool_output_unverified': -1.5,
+    'tool_failed': -3.0,
+    'no_result': -2.5,
+}
+
+CAP_BREACH_PENALTIES = {
+    'daily_cap_exceeded': -1.0,
+    'path_like_payload': -1.0,
+}
+
+
+
+def derive_cap_breaches(error_text: str, analysis: dict[str, Any], evidence_items: list[dict[str, Any]]) -> list[str]:
+    text = ' '.join(
+        [error_text or '']
+        + [str(item.get('error') or '') for item in evidence_items]
+        + [str(item.get('output') or '')[:200] for item in evidence_items]
+    ).lower()
+    breaches: list[str] = []
+    if 'daily_cap_exceeded' in text:
+        breaches.append('daily_cap_exceeded')
+    if 'path_like_payload' in text:
+        breaches.append('path_like_payload')
+    if 'daily_cap_exceeded' not in breaches:
+        quarantine = analysis.get('quarantine_reason') or analysis.get('memory_quarantine_reason') or ''
+        if 'daily_cap_exceeded' in str(quarantine).lower():
+            breaches.append('daily_cap_exceeded')
+    return breaches
+
+
+
+def compute_reward(status: str, analysis: dict[str, Any], evidence_items: list[dict[str, Any]], final_text: str, *, cap_breaches: list[str] | None = None) -> float:
+    reward = BASE_REWARDS.get(status, 0.0)
+    grounded = sum(1 for item in evidence_items if item.get('grounded'))
+    if grounded:
+        reward += min(grounded, 3) * 0.5
+    if analysis.get('force_sequential') and status == 'success':
+        reward += 0.5
+    if final_text and len(final_text.strip()) < 24 and status == 'success' and grounded == 0:
+        reward -= 0.5
+    for breach in cap_breaches or []:
+        reward += CAP_BREACH_PENALTIES.get(breach, -0.5)
+    return reward
+
+
+
+def reward_row(status: str, analysis: dict[str, Any], evidence_items: list[dict[str, Any]], final_text: str, *, cap_breaches: list[str] | None = None) -> dict[str, Any]:
+    cap_breaches = list(cap_breaches or [])
+    return {
+        'reward': compute_reward(status, analysis, evidence_items, final_text, cap_breaches=cap_breaches),
+        'cap_breaches': cap_breaches,
+        'grounded_count': sum(1 for item in evidence_items if item.get('grounded')),
+        'evidence_count': len(evidence_items),
+    }
--- a/syncpatch/tool_graph.py
+++ b/syncpatch/tool_graph.py
@ -0,0 +1,99 @@
+from __future__ import annotations
+
+from dataclasses import dataclass, asdict
+from typing import Any
+
+
+@dataclass
+class ToolNode:
+    name: str
+    kind: str
+    description: str
+    input_schema: str
+    output_schema: str
+    effect_type: str
+    risk: str
+    latency_class: str
+    groundedness: str
+    cost_class: str
+
+    def to_dict(self) -> dict[str, Any]:
+        return asdict(self)
+
+
+KIND_TO_EFFECT = {
+    'action': 'state_change',
+    'evidence': 'evidence_only',
+    'final': 'answer_only',
+}
+
+KIND_TO_RISK = {
+    'action': 'high',
+    'evidence': 'low',
+    'final': 'medium',
+}
+
+KIND_TO_OUTPUT = {
+    'action': 'action_result',
+    'evidence': 'evidence_blob',
+    'final': 'final_answer',
+}
+
+
+def classify_latency(name: str, kind: str) -> str:
+    if name in {'url_research', 'web_research', 'ops_deep_analyze'}:
+        return 'slow'
+    if kind == 'action':
+        return 'medium'
+    if kind == 'evidence':
+        return 'fast'
+    return 'medium'
+
+
+
+def classify_cost(name: str, kind: str) -> str:
+    if name in {'web_research', 'url_research', 'ops_deep_analyze'}:
+        return 'high'
+    if kind == 'action':
+        return 'medium'
+    return 'low'
+
+
+
+def classify_groundedness(name: str, kind: str) -> str:
+    if name in {'setup_lookup', 'memory_profile', 'web_root_cause', 'user_service_access_diagnose', 'light_status', 'emby_user_provision'}:
+        return 'strong'
+    if kind == 'evidence':
+        return 'strong'
+    if name in {'general_answer', 'personal_assist', 'expert_write', 'expert_strategy'}:
+        return 'weak'
+    return 'medium'
+
+
+
+def infer_input_schema(name: str) -> str:
+    if name == 'memory_profile':
+        return 'user_query+memory_mode'
+    if name == 'setup_lookup':
+        return 'user_query+service_hint'
+    return 'user_query'
+
+
+
+def build_tool_graph(tool_registry: dict[str, dict[str, Any]]) -> dict[str, ToolNode]:
+    graph: dict[str, ToolNode] = {}
+    for name, info in (tool_registry or {}).items():
+        kind = str(info.get('kind') or 'final')
+        graph[name] = ToolNode(
+            name=name,
+            kind=kind,
+            description=str(info.get('description') or ''),
+            input_schema=infer_input_schema(name),
+            output_schema=KIND_TO_OUTPUT.get(kind, 'opaque'),
+            effect_type=KIND_TO_EFFECT.get(kind, 'answer_only'),
+            risk=KIND_TO_RISK.get(kind, 'medium'),
+            latency_class=classify_latency(name, kind),
+            groundedness=classify_groundedness(name, kind),
+            cost_class=classify_cost(name, kind),
+        )
+    return graph
--- a/syncpatch/trajectory_schema.py
+++ b/syncpatch/trajectory_schema.py
@ -0,0 +1,146 @@
+from __future__ import annotations
+
+from dataclasses import dataclass, asdict, field
+from datetime import datetime, timezone
+from typing import Any
+import hashlib
+import json
+
+
+SCHEMA_VERSION = 1
+
+
+@dataclass
+class TrajectoryRecord:
+    schema_version: int
+    trajectory_id: str
+    ts: str
+    user_query: str
+    normalized_task: str
+    role: str
+    task_type: str
+    family: str
+    family_candidates: list[str] = field(default_factory=list)
+    chosen_plan: str = ''
+    chosen_tools: list[str] = field(default_factory=list)
+    used_tools: list[str] = field(default_factory=list)
+    memory_mode: str = ''
+    uncertainty: str = ''
+    grounded_count: int = 0
+    evidence_count: int = 0
+    answer_len: int = 0
+    latency_ms: int | None = None
+    verification_status: str = ''
+    outcome_status: str = ''
+    cap_breaches: list[str] = field(default_factory=list)
+    user_feedback: str = ''
+    error_text: str = ''
+    composition_reason: str = ''
+    composition_policy: str = ''
+    needs_memory: bool = False
+    needs_setup_context: bool = False
+    metadata: dict[str, Any] = field(default_factory=dict)
+
+    def to_dict(self) -> dict[str, Any]:
+        return asdict(self)
+
+    def to_json(self) -> str:
+        return json.dumps(self.to_dict(), ensure_ascii=False)
+
+
+
+def utc_now_iso() -> str:
+    return datetime.now(timezone.utc).isoformat()
+
+
+
+def make_trajectory_id(message: str, ts: str, planned_tools: list[str], used_tools: list[str]) -> str:
+    digest = hashlib.sha256()
+    digest.update((message or '').encode('utf-8', errors='ignore'))
+    digest.update((ts or '').encode('utf-8', errors='ignore'))
+    digest.update('|'.join(planned_tools or []).encode('utf-8', errors='ignore'))
+    digest.update('|'.join(used_tools or []).encode('utf-8', errors='ignore'))
+    return digest.hexdigest()[:24]
+
+
+
+def infer_memory_mode(task_type: str, analysis: dict[str, Any], used_tools: list[str]) -> str:
+    if 'memory_profile' in used_tools:
+        if task_type == 'memory':
+            return 'profile'
+        if analysis.get('needs_setup_context'):
+            return 'setup'
+        if analysis.get('needs_memory'):
+            return 'preference'
+        return 'profile'
+    return ''
+
+
+
+def infer_uncertainty(status: str, analysis: dict[str, Any], evidence_items: list[dict[str, Any]]) -> str:
+    confidence = float(analysis.get('confidence', 0.0) or 0.0)
+    grounded = sum(1 for item in evidence_items if item.get('grounded'))
+    if status in {'tool_failed', 'no_result', 'tool_output_unverified'}:
+        return 'high'
+    if status == 'needs_clarification':
+        return 'medium'
+    if confidence >= 0.93 and grounded >= 1:
+        return 'low'
+    if confidence >= 0.8:
+        return 'medium'
+    return 'high'
+
+
+
+def build_trajectory_record(
+    *,
+    message: str,
+    analysis: dict[str, Any],
+    final_text: str,
+    evidence_items: list[dict[str, Any]],
+    status: str,
+    family: str,
+    family_candidates: list[str],
+    error_text: str = '',
+    ts: str | None = None,
+    latency_ms: int | None = None,
+    verification_status: str = '',
+    cap_breaches: list[str] | None = None,
+    user_feedback: str = '',
+) -> TrajectoryRecord:
+    ts = ts or utc_now_iso()
+    planned_tools = list(analysis.get('tools') or [])
+    used_tools = [str(item.get('tool') or '') for item in evidence_items if item.get('tool')]
+    task_type = str(analysis.get('task_type') or '')
+    return TrajectoryRecord(
+        schema_version=SCHEMA_VERSION,
+        trajectory_id=make_trajectory_id(message, ts, planned_tools, used_tools),
+        ts=ts,
+        user_query=message,
+        normalized_task=f"{analysis.get('role','')}:{task_type}",
+        role=str(analysis.get('role') or ''),
+        task_type=task_type,
+        family=family,
+        family_candidates=list(family_candidates or []),
+        chosen_plan=str(analysis.get('composition_reason') or 'single_tool'),
+        chosen_tools=planned_tools,
+        used_tools=used_tools,
+        memory_mode=infer_memory_mode(task_type, analysis, used_tools),
+        uncertainty=infer_uncertainty(status, analysis, evidence_items),
+        grounded_count=sum(1 for item in evidence_items if item.get('grounded')),
+        evidence_count=len(evidence_items),
+        answer_len=len((final_text or '').strip()),
+        latency_ms=latency_ms,
+        verification_status=verification_status or ('grounded' if any(item.get('grounded') for item in evidence_items) else 'unverified'),
+        outcome_status=status,
+        cap_breaches=list(cap_breaches or []),
+        user_feedback=user_feedback,
+        error_text=(error_text or '')[:300],
+        composition_reason=str(analysis.get('composition_reason') or ''),
+        composition_policy=str(analysis.get('composition_policy') or ''),
+        needs_memory=bool(analysis.get('needs_memory')),
+        needs_setup_context=bool(analysis.get('needs_setup_context')),
+        metadata={
+            'force_sequential': bool(analysis.get('force_sequential')),
+        },
+    )
--- a/syncpatch/uncertainty_model.py
+++ b/syncpatch/uncertainty_model.py
@ -0,0 +1,37 @@
+from __future__ import annotations
+
+from typing import Any
+
+
+
+def estimate_uncertainty(message: str, analysis: dict[str, Any], family_candidates: list[dict[str, Any]] | None = None) -> dict[str, Any]:
+    candidates = [str((item or {}).get('family') or '') for item in (family_candidates or []) if (item or {}).get('family')]
+    confidence = float(analysis.get('confidence', 0.0) or 0.0)
+    score = 0.0
+    reasons: list[str] = []
+    if len(set(candidates)) >= 2:
+        score += 0.35
+        reasons.append('multiple_family_candidates')
+    if confidence < 0.75:
+        score += 0.4
+        reasons.append('low_confidence')
+    elif confidence < 0.9:
+        score += 0.2
+        reasons.append('medium_confidence')
+    if analysis.get('needs_memory') and analysis.get('needs_setup_context'):
+        score += 0.15
+        reasons.append('mixed_memory_and_setup')
+    if 'http' in (message or '').lower() and analysis.get('task_type') not in {'summarize', 'research'}:
+        score += 0.1
+        reasons.append('url_in_non_research_query')
+    if analysis.get('composition_reason'):
+        score += 0.1
+        reasons.append('composed_path')
+
+    if score >= 0.65:
+        level = 'high'
+    elif score >= 0.3:
+        level = 'medium'
+    else:
+        level = 'low'
+    return {'level': level, 'score': round(score, 3), 'reasons': reasons}