Initial Phase A intelligence core

2026-03-21 07:34:09 +00:00 · 2026-03-21 07:34:09 +00:00 · 94eae8ceba
commit 94eae8ceba
11 changed files with 4261 additions and 0 deletions
--- a/README.md
+++ b/README.md
@ -0,0 +1,11 @@
 # Openclaw Intelligence Core
 This repository tracks the new intelligence-controller slice for Openclaw:
 - typed trajectories
 - reward signals
 - replay buffer / policy stats
 - typed tool graph
 - uncertainty model
 - shadow meta-controller
 It is intentionally narrow: this repo is the source-of-truth for the new learning and controller layer, not the entire legacy Openclaw workspace.
--- a/docs/ARCHITECTURE_BOARD.md
+++ b/docs/ARCHITECTURE_BOARD.md
@ -0,0 +1,25 @@
 # Architecture Board
 ## Goal
 Build a local-first Openclaw agent that becomes more intelligent over time through:
 - typed memory
 - typed tool graph
 - trajectory logging
 - reward signals
 - shadow meta-controller
 - offline policy learning
 - sacred eval gates
 ## Hosts
 - Mac Studio: hot-path inference
 - openclaw: orchestration and live logging
 - Unraid: offline learning, retrieval, replay, eval batch jobs
 - Kimi: offline teacher only
 ## Phase A
 1. Typed trajectory schema
 2. Reward signals
 3. Replay buffer + policy stats
 4. Tool graph
 5. Uncertainty model
 6. Shadow meta-controller
--- a/scripts/deploy_to_openclaw.sh
+++ b/scripts/deploy_to_openclaw.sh
@ -0,0 +1,9 @@
 #!/usr/bin/env bash
 set -euo pipefail
 REMOTE="openclaw@openclaw.tailef61c0.ts.net"
 REMOTE_BASE="/home/openclaw/.openclaw/workspace"
 for f in trajectory_schema.py reward_signals.py replay_buffer.py tool_graph.py uncertainty_model.py meta_controller.py outcome_logging.py; do
  scp "$PWD/syncpatch/$f" "$REMOTE:$REMOTE_BASE/lib/$f"
 done
 scp "$PWD/syncpatch/agent-orchestrate" "$REMOTE:$REMOTE_BASE/bin/agent-orchestrate"
 ssh "$REMOTE" "python3 -m py_compile $REMOTE_BASE/lib/trajectory_schema.py $REMOTE_BASE/lib/reward_signals.py $REMOTE_BASE/lib/replay_buffer.py $REMOTE_BASE/lib/tool_graph.py $REMOTE_BASE/lib/uncertainty_model.py $REMOTE_BASE/lib/meta_controller.py $REMOTE_BASE/lib/outcome_logging.py $REMOTE_BASE/bin/agent-orchestrate"
--- a/syncpatch/agent-orchestrate
+++ b/syncpatch/agent-orchestrate
--- a/syncpatch/meta_controller.py
+++ b/syncpatch/meta_controller.py
@ -0,0 +1,61 @@
 from __future__ import annotations
 import json
 from datetime import datetime, timezone
 from typing import Any
 from tool_graph import build_tool_graph
 from uncertainty_model import estimate_uncertainty
 def shadow_decision(message: str, analysis: dict[str, Any], family_candidates: list[dict[str, Any]] | None, tool_registry: dict[str, dict[str, Any]]) -> dict[str, Any]:
    graph = build_tool_graph(tool_registry)
    uncertainty = estimate_uncertainty(message, analysis, family_candidates)
    tools = list(analysis.get('tools') or [])
    families = [str((item or {}).get('family') or '') for item in (family_candidates or []) if (item or {}).get('family')]
    decision = 'answer_direct'
    reason = 'single_grounded_or_low_uncertainty'
    suggested_memory_mode = ''
    if uncertainty['level'] == 'high' and 'ambiguous_access' in families:
        decision = 'ask_clarification'
        reason = 'ambiguous_service_access'
    elif analysis.get('needs_memory') and analysis.get('needs_setup_context'):
        decision = 'run_plan'
        reason = 'mixed_memory_plus_setup'
        suggested_memory_mode = 'setup'
    elif analysis.get('needs_memory'):
        decision = 'use_memory_mode'
        reason = 'memory_required'
        suggested_memory_mode = 'profile' if analysis.get('task_type') == 'memory' else 'preference'
    elif analysis.get('needs_setup_context') or len(tools) > 1:
        decision = 'run_plan'
        reason = 'evidence_required'
    elif uncertainty['level'] == 'medium' and graph.get(tools[0], None) and graph[tools[0]].groundedness == 'weak':
        decision = 'run_plan'
        reason = 'weak_grounding_under_uncertainty'
    return {
        'ts': datetime.now(timezone.utc).isoformat(),
        'message': message,
        'decision': decision,
        'reason': reason,
        'suggested_memory_mode': suggested_memory_mode,
        'suggested_tools': tools,
        'uncertainty': uncertainty,
        'family_candidates': families,
        'normalized_task': f"{analysis.get('role','')}:{analysis.get('task_type','')}",
        'chosen_plan': str(analysis.get('composition_reason') or 'single_tool'),
    }
 def log_shadow_decision(log_path, decision_row: dict[str, Any]) -> None:
    try:
        log_path.parent.mkdir(parents=True, exist_ok=True)
        with log_path.open('a', encoding='utf-8') as f:
            f.write(json.dumps(decision_row, ensure_ascii=False) + '\n')
    except Exception:
        pass
--- a/syncpatch/outcome_logging.py
+++ b/syncpatch/outcome_logging.py
@ -0,0 +1,140 @@
 #!/usr/bin/env python3
 from __future__ import annotations
 import json
 from datetime import datetime, timezone
 from trajectory_schema import build_trajectory_record
 from replay_buffer import append_replay_record, update_policy_stats
 from reward_signals import derive_cap_breaches, reward_row
 def log_intent_family_shadow(
    message: str,
    family_info: dict,
    before_tools: list[str],
    after_tools: list[str],
    *,
    log_path,
    collect_intent_families,
    service_hints,
 ) -> None:
    family = str((family_info or {}).get('family') or '')
    if not family:
        return
    try:
        log_path.parent.mkdir(parents=True, exist_ok=True)
        try:
            candidates = [item.get('family') for item in collect_intent_families(message, service_hints) if item.get('family')]
        except Exception:
            candidates = [family]
        row = {
            'ts': datetime.now(timezone.utc).isoformat(),
            'message': message,
            'family': family,
            'family_candidates': candidates,
            'before_tool': before_tools[0] if before_tools else '',
            'after_tool': after_tools[0] if after_tools else '',
            'overridden': (before_tools[:1] != after_tools[:1]),
        }
        with log_path.open('a', encoding='utf-8') as f:
            f.write(json.dumps(row, ensure_ascii=False) + '\n')
    except Exception:
        pass
 def log_intent_composition(
    message: str,
    family_candidates: list[dict],
    analysis_before: dict,
    analysis_after: dict,
    composition: dict,
    *,
    log_path,
 ) -> None:
    if not composition or not composition.get('composed'):
        return
    try:
        log_path.parent.mkdir(parents=True, exist_ok=True)
        row = {
            'ts': datetime.now(timezone.utc).isoformat(),
            'message': message,
            'family_candidates': [item.get('family') for item in family_candidates if item.get('family')],
            'before_tools': list(analysis_before.get('tools') or []),
            'after_tools': list(analysis_after.get('tools') or []),
            'reason': composition.get('reason', ''),
            'policy': composition.get('policy', analysis_after.get('composition_policy', '')),
            'force_sequential': bool(analysis_after.get('force_sequential')),
        }
        with log_path.open('a', encoding='utf-8') as f:
            f.write(json.dumps(row, ensure_ascii=False) + '\n')
    except Exception:
        pass
 def record_task_outcome(
    message: str,
    analysis: dict,
    final_text: str,
    evidence_items: list[dict],
    *,
    status: str = 'success',
    error_text: str = '',
    log_path,
    classify_intent_family,
    collect_intent_families,
    service_hints,
    refresh_composition_policy_async,
 ) -> None:
    try:
        log_path.parent.mkdir(parents=True, exist_ok=True)
        grounded_items = [item for item in evidence_items if item.get('grounded')]
        try:
            candidates = [item.get('family') for item in collect_intent_families(message, service_hints) if item.get('family')]
        except Exception:
            candidates = []
        family = (classify_intent_family(message, service_hints) or {}).get('family', '')
        cap_breaches = derive_cap_breaches(error_text, analysis, evidence_items)
        row = {
            'ts': datetime.now(timezone.utc).isoformat(),
            'status': status,
            'message': message,
            'role': analysis.get('role'),
            'task_type': analysis.get('task_type'),
            'planned_tools': list(analysis.get('tools') or []),
            'used_tools': [item.get('tool') for item in evidence_items],
            'family': family,
            'family_candidates': candidates,
            'grounded_count': len(grounded_items),
            'evidence_count': len(evidence_items),
            'answer_len': len((final_text or '').strip()),
            'needs_memory': bool(analysis.get('needs_memory')),
            'needs_setup_context': bool(analysis.get('needs_setup_context')),
            'error_text': (error_text or '')[:300],
            'composition_reason': str(analysis.get('composition_reason') or ''),
            'composition_policy': str(analysis.get('composition_policy') or ''),
            'cap_breaches': cap_breaches,
        }
        reward_info = reward_row(status, analysis, evidence_items, final_text, cap_breaches=cap_breaches)
        row.update(reward_info)
        with log_path.open('a', encoding='utf-8') as f:
            f.write(json.dumps(row, ensure_ascii=False) + '\n')
        record = build_trajectory_record(
            message=message,
            analysis=analysis,
            final_text=final_text,
            evidence_items=evidence_items,
            status=status,
            family=family,
            family_candidates=candidates,
            error_text=error_text,
            ts=row['ts'],
            cap_breaches=cap_breaches,
        ).to_dict()
        record.update(reward_info)
        append_replay_record(record)
        update_policy_stats(record)
        if row.get('composition_reason') or row.get('composition_policy'):
            refresh_composition_policy_async()
    except Exception:
        pass
--- a/syncpatch/replay_buffer.py
+++ b/syncpatch/replay_buffer.py
@ -0,0 +1,63 @@
 from __future__ import annotations
 import json
 from datetime import datetime, timezone
 from pathlib import Path
 from typing import Any
 DEFAULT_REPLAY_ROOT = Path('/home/openclaw/.openclaw/workspace/data/replay_buffer')
 DEFAULT_POLICY_STATS = Path('/home/openclaw/.openclaw/workspace/data/policy_stats.json')
 def _safe_slug(value: str) -> str:
    out = ''.join(ch if ch.isalnum() or ch in {'_', '-'} else '_' for ch in (value or 'unknown').strip().lower())
    return out[:80] or 'unknown'
 def replay_path(record: dict[str, Any], replay_root: Path = DEFAULT_REPLAY_ROOT) -> Path:
    ts = str(record.get('ts') or datetime.now(timezone.utc).isoformat())
    day = ts[:10]
    task = _safe_slug(str(record.get('normalized_task') or 'unknown'))
    return replay_root / day / f'{task}.jsonl'
 def append_replay_record(record: dict[str, Any], replay_root: Path = DEFAULT_REPLAY_ROOT) -> Path:
    path = replay_path(record, replay_root)
    path.parent.mkdir(parents=True, exist_ok=True)
    with path.open('a', encoding='utf-8') as f:
        f.write(json.dumps(record, ensure_ascii=False) + '\n')
    return path
 def update_policy_stats(record: dict[str, Any], stats_path: Path = DEFAULT_POLICY_STATS) -> dict[str, Any]:
    stats_path.parent.mkdir(parents=True, exist_ok=True)
    try:
        data = json.loads(stats_path.read_text(encoding='utf-8'))
    except Exception:
        data = {'plans': {}, 'families': {}, 'updated_at': ''}
    plan_key = str(record.get('chosen_plan') or 'single_tool')
    family_key = str(record.get('family') or 'unknown')
    reward = float(record.get('reward') or 0.0)
    status = str(record.get('outcome_status') or '')
    for bucket_name, key in [('plans', plan_key), ('families', family_key)]:
        bucket = data.setdefault(bucket_name, {})
        row = bucket.setdefault(key, {'count': 0, 'success': 0, 'failure': 0, 'clarification': 0, 'reward_sum': 0.0})
        row['count'] += 1
        row['reward_sum'] += reward
        if status == 'success':
            row['success'] += 1
        elif status == 'needs_clarification':
            row['clarification'] += 1
        else:
            row['failure'] += 1
    data['updated_at'] = datetime.now(timezone.utc).isoformat()
    stats_path.write_text(json.dumps(data, ensure_ascii=False, indent=2), encoding='utf-8')
    return data
--- a/syncpatch/reward_signals.py
+++ b/syncpatch/reward_signals.py
@ -0,0 +1,62 @@
 from __future__ import annotations
 from typing import Any
 BASE_REWARDS = {
    'success': 5.0,
    'needs_clarification': 1.0,
    'tool_output_unverified': -1.5,
    'tool_failed': -3.0,
    'no_result': -2.5,
 }
 CAP_BREACH_PENALTIES = {
    'daily_cap_exceeded': -1.0,
    'path_like_payload': -1.0,
 }
 def derive_cap_breaches(error_text: str, analysis: dict[str, Any], evidence_items: list[dict[str, Any]]) -> list[str]:
    text = ' '.join(
        [error_text or '']
        + [str(item.get('error') or '') for item in evidence_items]
        + [str(item.get('output') or '')[:200] for item in evidence_items]
    ).lower()
    breaches: list[str] = []
    if 'daily_cap_exceeded' in text:
        breaches.append('daily_cap_exceeded')
    if 'path_like_payload' in text:
        breaches.append('path_like_payload')
    if 'daily_cap_exceeded' not in breaches:
        quarantine = analysis.get('quarantine_reason') or analysis.get('memory_quarantine_reason') or ''
        if 'daily_cap_exceeded' in str(quarantine).lower():
            breaches.append('daily_cap_exceeded')
    return breaches
 def compute_reward(status: str, analysis: dict[str, Any], evidence_items: list[dict[str, Any]], final_text: str, *, cap_breaches: list[str] | None = None) -> float:
    reward = BASE_REWARDS.get(status, 0.0)
    grounded = sum(1 for item in evidence_items if item.get('grounded'))
    if grounded:
        reward += min(grounded, 3) * 0.5
    if analysis.get('force_sequential') and status == 'success':
        reward += 0.5
    if final_text and len(final_text.strip()) < 24 and status == 'success' and grounded == 0:
        reward -= 0.5
    for breach in cap_breaches or []:
        reward += CAP_BREACH_PENALTIES.get(breach, -0.5)
    return reward
 def reward_row(status: str, analysis: dict[str, Any], evidence_items: list[dict[str, Any]], final_text: str, *, cap_breaches: list[str] | None = None) -> dict[str, Any]:
    cap_breaches = list(cap_breaches or [])
    return {
        'reward': compute_reward(status, analysis, evidence_items, final_text, cap_breaches=cap_breaches),
        'cap_breaches': cap_breaches,
        'grounded_count': sum(1 for item in evidence_items if item.get('grounded')),
        'evidence_count': len(evidence_items),
    }
--- a/syncpatch/tool_graph.py
+++ b/syncpatch/tool_graph.py
@ -0,0 +1,99 @@
 from __future__ import annotations
 from dataclasses import dataclass, asdict
 from typing import Any
@dataclass
 class ToolNode:
    name: str
    kind: str
    description: str
    input_schema: str
    output_schema: str
    effect_type: str
    risk: str
    latency_class: str
    groundedness: str
    cost_class: str
    def to_dict(self) -> dict[str, Any]:
        return asdict(self)
 KIND_TO_EFFECT = {
    'action': 'state_change',
    'evidence': 'evidence_only',
    'final': 'answer_only',
 }
 KIND_TO_RISK = {
    'action': 'high',
    'evidence': 'low',
    'final': 'medium',
 }
 KIND_TO_OUTPUT = {
    'action': 'action_result',
    'evidence': 'evidence_blob',
    'final': 'final_answer',
 }
 def classify_latency(name: str, kind: str) -> str:
    if name in {'url_research', 'web_research', 'ops_deep_analyze'}:
        return 'slow'
    if kind == 'action':
        return 'medium'
    if kind == 'evidence':
        return 'fast'
    return 'medium'
 def classify_cost(name: str, kind: str) -> str:
    if name in {'web_research', 'url_research', 'ops_deep_analyze'}:
        return 'high'
    if kind == 'action':
        return 'medium'
    return 'low'
 def classify_groundedness(name: str, kind: str) -> str:
    if name in {'setup_lookup', 'memory_profile', 'web_root_cause', 'user_service_access_diagnose', 'light_status', 'emby_user_provision'}:
        return 'strong'
    if kind == 'evidence':
        return 'strong'
    if name in {'general_answer', 'personal_assist', 'expert_write', 'expert_strategy'}:
        return 'weak'
    return 'medium'
 def infer_input_schema(name: str) -> str:
    if name == 'memory_profile':
        return 'user_query+memory_mode'
    if name == 'setup_lookup':
        return 'user_query+service_hint'
    return 'user_query'
 def build_tool_graph(tool_registry: dict[str, dict[str, Any]]) -> dict[str, ToolNode]:
    graph: dict[str, ToolNode] = {}
    for name, info in (tool_registry or {}).items():
        kind = str(info.get('kind') or 'final')
        graph[name] = ToolNode(
            name=name,
            kind=kind,
            description=str(info.get('description') or ''),
            input_schema=infer_input_schema(name),
            output_schema=KIND_TO_OUTPUT.get(kind, 'opaque'),
            effect_type=KIND_TO_EFFECT.get(kind, 'answer_only'),
            risk=KIND_TO_RISK.get(kind, 'medium'),
            latency_class=classify_latency(name, kind),
            groundedness=classify_groundedness(name, kind),
            cost_class=classify_cost(name, kind),
        )
    return graph
--- a/syncpatch/trajectory_schema.py
+++ b/syncpatch/trajectory_schema.py
@ -0,0 +1,146 @@
 from __future__ import annotations
 from dataclasses import dataclass, asdict, field
 from datetime import datetime, timezone
 from typing import Any
 import hashlib
 import json
 SCHEMA_VERSION = 1
@dataclass
 class TrajectoryRecord:
    schema_version: int
    trajectory_id: str
    ts: str
    user_query: str
    normalized_task: str
    role: str
    task_type: str
    family: str
    family_candidates: list[str] = field(default_factory=list)
    chosen_plan: str = ''
    chosen_tools: list[str] = field(default_factory=list)
    used_tools: list[str] = field(default_factory=list)
    memory_mode: str = ''
    uncertainty: str = ''
    grounded_count: int = 0
    evidence_count: int = 0
    answer_len: int = 0
    latency_ms: int | None = None
    verification_status: str = ''
    outcome_status: str = ''
    cap_breaches: list[str] = field(default_factory=list)
    user_feedback: str = ''
    error_text: str = ''
    composition_reason: str = ''
    composition_policy: str = ''
    needs_memory: bool = False
    needs_setup_context: bool = False
    metadata: dict[str, Any] = field(default_factory=dict)
    def to_dict(self) -> dict[str, Any]:
        return asdict(self)
    def to_json(self) -> str:
        return json.dumps(self.to_dict(), ensure_ascii=False)
 def utc_now_iso() -> str:
    return datetime.now(timezone.utc).isoformat()
 def make_trajectory_id(message: str, ts: str, planned_tools: list[str], used_tools: list[str]) -> str:
    digest = hashlib.sha256()
    digest.update((message or '').encode('utf-8', errors='ignore'))
    digest.update((ts or '').encode('utf-8', errors='ignore'))
    digest.update('|'.join(planned_tools or []).encode('utf-8', errors='ignore'))
    digest.update('|'.join(used_tools or []).encode('utf-8', errors='ignore'))
    return digest.hexdigest()[:24]
 def infer_memory_mode(task_type: str, analysis: dict[str, Any], used_tools: list[str]) -> str:
    if 'memory_profile' in used_tools:
        if task_type == 'memory':
            return 'profile'
        if analysis.get('needs_setup_context'):
            return 'setup'
        if analysis.get('needs_memory'):
            return 'preference'
        return 'profile'
    return ''
 def infer_uncertainty(status: str, analysis: dict[str, Any], evidence_items: list[dict[str, Any]]) -> str:
    confidence = float(analysis.get('confidence', 0.0) or 0.0)
    grounded = sum(1 for item in evidence_items if item.get('grounded'))
    if status in {'tool_failed', 'no_result', 'tool_output_unverified'}:
        return 'high'
    if status == 'needs_clarification':
        return 'medium'
    if confidence >= 0.93 and grounded >= 1:
        return 'low'
    if confidence >= 0.8:
        return 'medium'
    return 'high'
 def build_trajectory_record(
    *,
    message: str,
    analysis: dict[str, Any],
    final_text: str,
    evidence_items: list[dict[str, Any]],
    status: str,
    family: str,
    family_candidates: list[str],
    error_text: str = '',
    ts: str | None = None,
    latency_ms: int | None = None,
    verification_status: str = '',
    cap_breaches: list[str] | None = None,
    user_feedback: str = '',
 ) -> TrajectoryRecord:
    ts = ts or utc_now_iso()
    planned_tools = list(analysis.get('tools') or [])
    used_tools = [str(item.get('tool') or '') for item in evidence_items if item.get('tool')]
    task_type = str(analysis.get('task_type') or '')
    return TrajectoryRecord(
        schema_version=SCHEMA_VERSION,
        trajectory_id=make_trajectory_id(message, ts, planned_tools, used_tools),
        ts=ts,
        user_query=message,
        normalized_task=f"{analysis.get('role','')}:{task_type}",
        role=str(analysis.get('role') or ''),
        task_type=task_type,
        family=family,
        family_candidates=list(family_candidates or []),
        chosen_plan=str(analysis.get('composition_reason') or 'single_tool'),
        chosen_tools=planned_tools,
        used_tools=used_tools,
        memory_mode=infer_memory_mode(task_type, analysis, used_tools),
        uncertainty=infer_uncertainty(status, analysis, evidence_items),
        grounded_count=sum(1 for item in evidence_items if item.get('grounded')),
        evidence_count=len(evidence_items),
        answer_len=len((final_text or '').strip()),
        latency_ms=latency_ms,
        verification_status=verification_status or ('grounded' if any(item.get('grounded') for item in evidence_items) else 'unverified'),
        outcome_status=status,
        cap_breaches=list(cap_breaches or []),
        user_feedback=user_feedback,
        error_text=(error_text or '')[:300],
        composition_reason=str(analysis.get('composition_reason') or ''),
        composition_policy=str(analysis.get('composition_policy') or ''),
        needs_memory=bool(analysis.get('needs_memory')),
        needs_setup_context=bool(analysis.get('needs_setup_context')),
        metadata={
            'force_sequential': bool(analysis.get('force_sequential')),
        },
    )
--- a/syncpatch/uncertainty_model.py
+++ b/syncpatch/uncertainty_model.py
@ -0,0 +1,37 @@
 from __future__ import annotations
 from typing import Any
 def estimate_uncertainty(message: str, analysis: dict[str, Any], family_candidates: list[dict[str, Any]] | None = None) -> dict[str, Any]:
    candidates = [str((item or {}).get('family') or '') for item in (family_candidates or []) if (item or {}).get('family')]
    confidence = float(analysis.get('confidence', 0.0) or 0.0)
    score = 0.0
    reasons: list[str] = []
    if len(set(candidates)) >= 2:
        score += 0.35
        reasons.append('multiple_family_candidates')
    if confidence < 0.75:
        score += 0.4
        reasons.append('low_confidence')
    elif confidence < 0.9:
        score += 0.2
        reasons.append('medium_confidence')
    if analysis.get('needs_memory') and analysis.get('needs_setup_context'):
        score += 0.15
        reasons.append('mixed_memory_and_setup')
    if 'http' in (message or '').lower() and analysis.get('task_type') not in {'summarize', 'research'}:
        score += 0.1
        reasons.append('url_in_non_research_query')
    if analysis.get('composition_reason'):
        score += 0.1
        reasons.append('composed_path')
    if score >= 0.65:
        level = 'high'
    elif score >= 0.3:
        level = 'medium'
    else:
        level = 'low'
    return {'level': level, 'score': round(score, 3), 'reasons': reasons}