Initial Phase A intelligence core

This commit is contained in:
Openclaw 2026-03-21 07:34:09 +00:00
commit 94eae8ceba
11 changed files with 4261 additions and 0 deletions

11
README.md Normal file
View file

@ -0,0 +1,11 @@
# Openclaw Intelligence Core
This repository tracks the new intelligence-controller slice for Openclaw:
- typed trajectories
- reward signals
- replay buffer / policy stats
- typed tool graph
- uncertainty model
- shadow meta-controller
It is intentionally narrow: this repo is the source-of-truth for the new learning and controller layer, not the entire legacy Openclaw workspace.

View file

@ -0,0 +1,25 @@
# Architecture Board
## Goal
Build a local-first Openclaw agent that becomes more intelligent over time through:
- typed memory
- typed tool graph
- trajectory logging
- reward signals
- shadow meta-controller
- offline policy learning
- sacred eval gates
## Hosts
- Mac Studio: hot-path inference
- openclaw: orchestration and live logging
- Unraid: offline learning, retrieval, replay, eval batch jobs
- Kimi: offline teacher only
## Phase A
1. Typed trajectory schema
2. Reward signals
3. Replay buffer + policy stats
4. Tool graph
5. Uncertainty model
6. Shadow meta-controller

9
scripts/deploy_to_openclaw.sh Executable file
View file

@ -0,0 +1,9 @@
#!/usr/bin/env bash
set -euo pipefail
REMOTE="openclaw@openclaw.tailef61c0.ts.net"
REMOTE_BASE="/home/openclaw/.openclaw/workspace"
for f in trajectory_schema.py reward_signals.py replay_buffer.py tool_graph.py uncertainty_model.py meta_controller.py outcome_logging.py; do
scp "$PWD/syncpatch/$f" "$REMOTE:$REMOTE_BASE/lib/$f"
done
scp "$PWD/syncpatch/agent-orchestrate" "$REMOTE:$REMOTE_BASE/bin/agent-orchestrate"
ssh "$REMOTE" "python3 -m py_compile $REMOTE_BASE/lib/trajectory_schema.py $REMOTE_BASE/lib/reward_signals.py $REMOTE_BASE/lib/replay_buffer.py $REMOTE_BASE/lib/tool_graph.py $REMOTE_BASE/lib/uncertainty_model.py $REMOTE_BASE/lib/meta_controller.py $REMOTE_BASE/lib/outcome_logging.py $REMOTE_BASE/bin/agent-orchestrate"

3608
syncpatch/agent-orchestrate Normal file

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,61 @@
from __future__ import annotations
import json
from datetime import datetime, timezone
from typing import Any
from tool_graph import build_tool_graph
from uncertainty_model import estimate_uncertainty
def shadow_decision(message: str, analysis: dict[str, Any], family_candidates: list[dict[str, Any]] | None, tool_registry: dict[str, dict[str, Any]]) -> dict[str, Any]:
graph = build_tool_graph(tool_registry)
uncertainty = estimate_uncertainty(message, analysis, family_candidates)
tools = list(analysis.get('tools') or [])
families = [str((item or {}).get('family') or '') for item in (family_candidates or []) if (item or {}).get('family')]
decision = 'answer_direct'
reason = 'single_grounded_or_low_uncertainty'
suggested_memory_mode = ''
if uncertainty['level'] == 'high' and 'ambiguous_access' in families:
decision = 'ask_clarification'
reason = 'ambiguous_service_access'
elif analysis.get('needs_memory') and analysis.get('needs_setup_context'):
decision = 'run_plan'
reason = 'mixed_memory_plus_setup'
suggested_memory_mode = 'setup'
elif analysis.get('needs_memory'):
decision = 'use_memory_mode'
reason = 'memory_required'
suggested_memory_mode = 'profile' if analysis.get('task_type') == 'memory' else 'preference'
elif analysis.get('needs_setup_context') or len(tools) > 1:
decision = 'run_plan'
reason = 'evidence_required'
elif uncertainty['level'] == 'medium' and graph.get(tools[0], None) and graph[tools[0]].groundedness == 'weak':
decision = 'run_plan'
reason = 'weak_grounding_under_uncertainty'
return {
'ts': datetime.now(timezone.utc).isoformat(),
'message': message,
'decision': decision,
'reason': reason,
'suggested_memory_mode': suggested_memory_mode,
'suggested_tools': tools,
'uncertainty': uncertainty,
'family_candidates': families,
'normalized_task': f"{analysis.get('role','')}:{analysis.get('task_type','')}",
'chosen_plan': str(analysis.get('composition_reason') or 'single_tool'),
}
def log_shadow_decision(log_path, decision_row: dict[str, Any]) -> None:
try:
log_path.parent.mkdir(parents=True, exist_ok=True)
with log_path.open('a', encoding='utf-8') as f:
f.write(json.dumps(decision_row, ensure_ascii=False) + '\n')
except Exception:
pass

View file

@ -0,0 +1,140 @@
#!/usr/bin/env python3
from __future__ import annotations
import json
from datetime import datetime, timezone
from trajectory_schema import build_trajectory_record
from replay_buffer import append_replay_record, update_policy_stats
from reward_signals import derive_cap_breaches, reward_row
def log_intent_family_shadow(
message: str,
family_info: dict,
before_tools: list[str],
after_tools: list[str],
*,
log_path,
collect_intent_families,
service_hints,
) -> None:
family = str((family_info or {}).get('family') or '')
if not family:
return
try:
log_path.parent.mkdir(parents=True, exist_ok=True)
try:
candidates = [item.get('family') for item in collect_intent_families(message, service_hints) if item.get('family')]
except Exception:
candidates = [family]
row = {
'ts': datetime.now(timezone.utc).isoformat(),
'message': message,
'family': family,
'family_candidates': candidates,
'before_tool': before_tools[0] if before_tools else '',
'after_tool': after_tools[0] if after_tools else '',
'overridden': (before_tools[:1] != after_tools[:1]),
}
with log_path.open('a', encoding='utf-8') as f:
f.write(json.dumps(row, ensure_ascii=False) + '\n')
except Exception:
pass
def log_intent_composition(
message: str,
family_candidates: list[dict],
analysis_before: dict,
analysis_after: dict,
composition: dict,
*,
log_path,
) -> None:
if not composition or not composition.get('composed'):
return
try:
log_path.parent.mkdir(parents=True, exist_ok=True)
row = {
'ts': datetime.now(timezone.utc).isoformat(),
'message': message,
'family_candidates': [item.get('family') for item in family_candidates if item.get('family')],
'before_tools': list(analysis_before.get('tools') or []),
'after_tools': list(analysis_after.get('tools') or []),
'reason': composition.get('reason', ''),
'policy': composition.get('policy', analysis_after.get('composition_policy', '')),
'force_sequential': bool(analysis_after.get('force_sequential')),
}
with log_path.open('a', encoding='utf-8') as f:
f.write(json.dumps(row, ensure_ascii=False) + '\n')
except Exception:
pass
def record_task_outcome(
message: str,
analysis: dict,
final_text: str,
evidence_items: list[dict],
*,
status: str = 'success',
error_text: str = '',
log_path,
classify_intent_family,
collect_intent_families,
service_hints,
refresh_composition_policy_async,
) -> None:
try:
log_path.parent.mkdir(parents=True, exist_ok=True)
grounded_items = [item for item in evidence_items if item.get('grounded')]
try:
candidates = [item.get('family') for item in collect_intent_families(message, service_hints) if item.get('family')]
except Exception:
candidates = []
family = (classify_intent_family(message, service_hints) or {}).get('family', '')
cap_breaches = derive_cap_breaches(error_text, analysis, evidence_items)
row = {
'ts': datetime.now(timezone.utc).isoformat(),
'status': status,
'message': message,
'role': analysis.get('role'),
'task_type': analysis.get('task_type'),
'planned_tools': list(analysis.get('tools') or []),
'used_tools': [item.get('tool') for item in evidence_items],
'family': family,
'family_candidates': candidates,
'grounded_count': len(grounded_items),
'evidence_count': len(evidence_items),
'answer_len': len((final_text or '').strip()),
'needs_memory': bool(analysis.get('needs_memory')),
'needs_setup_context': bool(analysis.get('needs_setup_context')),
'error_text': (error_text or '')[:300],
'composition_reason': str(analysis.get('composition_reason') or ''),
'composition_policy': str(analysis.get('composition_policy') or ''),
'cap_breaches': cap_breaches,
}
reward_info = reward_row(status, analysis, evidence_items, final_text, cap_breaches=cap_breaches)
row.update(reward_info)
with log_path.open('a', encoding='utf-8') as f:
f.write(json.dumps(row, ensure_ascii=False) + '\n')
record = build_trajectory_record(
message=message,
analysis=analysis,
final_text=final_text,
evidence_items=evidence_items,
status=status,
family=family,
family_candidates=candidates,
error_text=error_text,
ts=row['ts'],
cap_breaches=cap_breaches,
).to_dict()
record.update(reward_info)
append_replay_record(record)
update_policy_stats(record)
if row.get('composition_reason') or row.get('composition_policy'):
refresh_composition_policy_async()
except Exception:
pass

View file

@ -0,0 +1,63 @@
from __future__ import annotations
import json
from datetime import datetime, timezone
from pathlib import Path
from typing import Any
DEFAULT_REPLAY_ROOT = Path('/home/openclaw/.openclaw/workspace/data/replay_buffer')
DEFAULT_POLICY_STATS = Path('/home/openclaw/.openclaw/workspace/data/policy_stats.json')
def _safe_slug(value: str) -> str:
out = ''.join(ch if ch.isalnum() or ch in {'_', '-'} else '_' for ch in (value or 'unknown').strip().lower())
return out[:80] or 'unknown'
def replay_path(record: dict[str, Any], replay_root: Path = DEFAULT_REPLAY_ROOT) -> Path:
ts = str(record.get('ts') or datetime.now(timezone.utc).isoformat())
day = ts[:10]
task = _safe_slug(str(record.get('normalized_task') or 'unknown'))
return replay_root / day / f'{task}.jsonl'
def append_replay_record(record: dict[str, Any], replay_root: Path = DEFAULT_REPLAY_ROOT) -> Path:
path = replay_path(record, replay_root)
path.parent.mkdir(parents=True, exist_ok=True)
with path.open('a', encoding='utf-8') as f:
f.write(json.dumps(record, ensure_ascii=False) + '\n')
return path
def update_policy_stats(record: dict[str, Any], stats_path: Path = DEFAULT_POLICY_STATS) -> dict[str, Any]:
stats_path.parent.mkdir(parents=True, exist_ok=True)
try:
data = json.loads(stats_path.read_text(encoding='utf-8'))
except Exception:
data = {'plans': {}, 'families': {}, 'updated_at': ''}
plan_key = str(record.get('chosen_plan') or 'single_tool')
family_key = str(record.get('family') or 'unknown')
reward = float(record.get('reward') or 0.0)
status = str(record.get('outcome_status') or '')
for bucket_name, key in [('plans', plan_key), ('families', family_key)]:
bucket = data.setdefault(bucket_name, {})
row = bucket.setdefault(key, {'count': 0, 'success': 0, 'failure': 0, 'clarification': 0, 'reward_sum': 0.0})
row['count'] += 1
row['reward_sum'] += reward
if status == 'success':
row['success'] += 1
elif status == 'needs_clarification':
row['clarification'] += 1
else:
row['failure'] += 1
data['updated_at'] = datetime.now(timezone.utc).isoformat()
stats_path.write_text(json.dumps(data, ensure_ascii=False, indent=2), encoding='utf-8')
return data

View file

@ -0,0 +1,62 @@
from __future__ import annotations
from typing import Any
BASE_REWARDS = {
'success': 5.0,
'needs_clarification': 1.0,
'tool_output_unverified': -1.5,
'tool_failed': -3.0,
'no_result': -2.5,
}
CAP_BREACH_PENALTIES = {
'daily_cap_exceeded': -1.0,
'path_like_payload': -1.0,
}
def derive_cap_breaches(error_text: str, analysis: dict[str, Any], evidence_items: list[dict[str, Any]]) -> list[str]:
text = ' '.join(
[error_text or '']
+ [str(item.get('error') or '') for item in evidence_items]
+ [str(item.get('output') or '')[:200] for item in evidence_items]
).lower()
breaches: list[str] = []
if 'daily_cap_exceeded' in text:
breaches.append('daily_cap_exceeded')
if 'path_like_payload' in text:
breaches.append('path_like_payload')
if 'daily_cap_exceeded' not in breaches:
quarantine = analysis.get('quarantine_reason') or analysis.get('memory_quarantine_reason') or ''
if 'daily_cap_exceeded' in str(quarantine).lower():
breaches.append('daily_cap_exceeded')
return breaches
def compute_reward(status: str, analysis: dict[str, Any], evidence_items: list[dict[str, Any]], final_text: str, *, cap_breaches: list[str] | None = None) -> float:
reward = BASE_REWARDS.get(status, 0.0)
grounded = sum(1 for item in evidence_items if item.get('grounded'))
if grounded:
reward += min(grounded, 3) * 0.5
if analysis.get('force_sequential') and status == 'success':
reward += 0.5
if final_text and len(final_text.strip()) < 24 and status == 'success' and grounded == 0:
reward -= 0.5
for breach in cap_breaches or []:
reward += CAP_BREACH_PENALTIES.get(breach, -0.5)
return reward
def reward_row(status: str, analysis: dict[str, Any], evidence_items: list[dict[str, Any]], final_text: str, *, cap_breaches: list[str] | None = None) -> dict[str, Any]:
cap_breaches = list(cap_breaches or [])
return {
'reward': compute_reward(status, analysis, evidence_items, final_text, cap_breaches=cap_breaches),
'cap_breaches': cap_breaches,
'grounded_count': sum(1 for item in evidence_items if item.get('grounded')),
'evidence_count': len(evidence_items),
}

99
syncpatch/tool_graph.py Normal file
View file

@ -0,0 +1,99 @@
from __future__ import annotations
from dataclasses import dataclass, asdict
from typing import Any
@dataclass
class ToolNode:
name: str
kind: str
description: str
input_schema: str
output_schema: str
effect_type: str
risk: str
latency_class: str
groundedness: str
cost_class: str
def to_dict(self) -> dict[str, Any]:
return asdict(self)
KIND_TO_EFFECT = {
'action': 'state_change',
'evidence': 'evidence_only',
'final': 'answer_only',
}
KIND_TO_RISK = {
'action': 'high',
'evidence': 'low',
'final': 'medium',
}
KIND_TO_OUTPUT = {
'action': 'action_result',
'evidence': 'evidence_blob',
'final': 'final_answer',
}
def classify_latency(name: str, kind: str) -> str:
if name in {'url_research', 'web_research', 'ops_deep_analyze'}:
return 'slow'
if kind == 'action':
return 'medium'
if kind == 'evidence':
return 'fast'
return 'medium'
def classify_cost(name: str, kind: str) -> str:
if name in {'web_research', 'url_research', 'ops_deep_analyze'}:
return 'high'
if kind == 'action':
return 'medium'
return 'low'
def classify_groundedness(name: str, kind: str) -> str:
if name in {'setup_lookup', 'memory_profile', 'web_root_cause', 'user_service_access_diagnose', 'light_status', 'emby_user_provision'}:
return 'strong'
if kind == 'evidence':
return 'strong'
if name in {'general_answer', 'personal_assist', 'expert_write', 'expert_strategy'}:
return 'weak'
return 'medium'
def infer_input_schema(name: str) -> str:
if name == 'memory_profile':
return 'user_query+memory_mode'
if name == 'setup_lookup':
return 'user_query+service_hint'
return 'user_query'
def build_tool_graph(tool_registry: dict[str, dict[str, Any]]) -> dict[str, ToolNode]:
graph: dict[str, ToolNode] = {}
for name, info in (tool_registry or {}).items():
kind = str(info.get('kind') or 'final')
graph[name] = ToolNode(
name=name,
kind=kind,
description=str(info.get('description') or ''),
input_schema=infer_input_schema(name),
output_schema=KIND_TO_OUTPUT.get(kind, 'opaque'),
effect_type=KIND_TO_EFFECT.get(kind, 'answer_only'),
risk=KIND_TO_RISK.get(kind, 'medium'),
latency_class=classify_latency(name, kind),
groundedness=classify_groundedness(name, kind),
cost_class=classify_cost(name, kind),
)
return graph

View file

@ -0,0 +1,146 @@
from __future__ import annotations
from dataclasses import dataclass, asdict, field
from datetime import datetime, timezone
from typing import Any
import hashlib
import json
SCHEMA_VERSION = 1
@dataclass
class TrajectoryRecord:
schema_version: int
trajectory_id: str
ts: str
user_query: str
normalized_task: str
role: str
task_type: str
family: str
family_candidates: list[str] = field(default_factory=list)
chosen_plan: str = ''
chosen_tools: list[str] = field(default_factory=list)
used_tools: list[str] = field(default_factory=list)
memory_mode: str = ''
uncertainty: str = ''
grounded_count: int = 0
evidence_count: int = 0
answer_len: int = 0
latency_ms: int | None = None
verification_status: str = ''
outcome_status: str = ''
cap_breaches: list[str] = field(default_factory=list)
user_feedback: str = ''
error_text: str = ''
composition_reason: str = ''
composition_policy: str = ''
needs_memory: bool = False
needs_setup_context: bool = False
metadata: dict[str, Any] = field(default_factory=dict)
def to_dict(self) -> dict[str, Any]:
return asdict(self)
def to_json(self) -> str:
return json.dumps(self.to_dict(), ensure_ascii=False)
def utc_now_iso() -> str:
return datetime.now(timezone.utc).isoformat()
def make_trajectory_id(message: str, ts: str, planned_tools: list[str], used_tools: list[str]) -> str:
digest = hashlib.sha256()
digest.update((message or '').encode('utf-8', errors='ignore'))
digest.update((ts or '').encode('utf-8', errors='ignore'))
digest.update('|'.join(planned_tools or []).encode('utf-8', errors='ignore'))
digest.update('|'.join(used_tools or []).encode('utf-8', errors='ignore'))
return digest.hexdigest()[:24]
def infer_memory_mode(task_type: str, analysis: dict[str, Any], used_tools: list[str]) -> str:
if 'memory_profile' in used_tools:
if task_type == 'memory':
return 'profile'
if analysis.get('needs_setup_context'):
return 'setup'
if analysis.get('needs_memory'):
return 'preference'
return 'profile'
return ''
def infer_uncertainty(status: str, analysis: dict[str, Any], evidence_items: list[dict[str, Any]]) -> str:
confidence = float(analysis.get('confidence', 0.0) or 0.0)
grounded = sum(1 for item in evidence_items if item.get('grounded'))
if status in {'tool_failed', 'no_result', 'tool_output_unverified'}:
return 'high'
if status == 'needs_clarification':
return 'medium'
if confidence >= 0.93 and grounded >= 1:
return 'low'
if confidence >= 0.8:
return 'medium'
return 'high'
def build_trajectory_record(
*,
message: str,
analysis: dict[str, Any],
final_text: str,
evidence_items: list[dict[str, Any]],
status: str,
family: str,
family_candidates: list[str],
error_text: str = '',
ts: str | None = None,
latency_ms: int | None = None,
verification_status: str = '',
cap_breaches: list[str] | None = None,
user_feedback: str = '',
) -> TrajectoryRecord:
ts = ts or utc_now_iso()
planned_tools = list(analysis.get('tools') or [])
used_tools = [str(item.get('tool') or '') for item in evidence_items if item.get('tool')]
task_type = str(analysis.get('task_type') or '')
return TrajectoryRecord(
schema_version=SCHEMA_VERSION,
trajectory_id=make_trajectory_id(message, ts, planned_tools, used_tools),
ts=ts,
user_query=message,
normalized_task=f"{analysis.get('role','')}:{task_type}",
role=str(analysis.get('role') or ''),
task_type=task_type,
family=family,
family_candidates=list(family_candidates or []),
chosen_plan=str(analysis.get('composition_reason') or 'single_tool'),
chosen_tools=planned_tools,
used_tools=used_tools,
memory_mode=infer_memory_mode(task_type, analysis, used_tools),
uncertainty=infer_uncertainty(status, analysis, evidence_items),
grounded_count=sum(1 for item in evidence_items if item.get('grounded')),
evidence_count=len(evidence_items),
answer_len=len((final_text or '').strip()),
latency_ms=latency_ms,
verification_status=verification_status or ('grounded' if any(item.get('grounded') for item in evidence_items) else 'unverified'),
outcome_status=status,
cap_breaches=list(cap_breaches or []),
user_feedback=user_feedback,
error_text=(error_text or '')[:300],
composition_reason=str(analysis.get('composition_reason') or ''),
composition_policy=str(analysis.get('composition_policy') or ''),
needs_memory=bool(analysis.get('needs_memory')),
needs_setup_context=bool(analysis.get('needs_setup_context')),
metadata={
'force_sequential': bool(analysis.get('force_sequential')),
},
)

View file

@ -0,0 +1,37 @@
from __future__ import annotations
from typing import Any
def estimate_uncertainty(message: str, analysis: dict[str, Any], family_candidates: list[dict[str, Any]] | None = None) -> dict[str, Any]:
candidates = [str((item or {}).get('family') or '') for item in (family_candidates or []) if (item or {}).get('family')]
confidence = float(analysis.get('confidence', 0.0) or 0.0)
score = 0.0
reasons: list[str] = []
if len(set(candidates)) >= 2:
score += 0.35
reasons.append('multiple_family_candidates')
if confidence < 0.75:
score += 0.4
reasons.append('low_confidence')
elif confidence < 0.9:
score += 0.2
reasons.append('medium_confidence')
if analysis.get('needs_memory') and analysis.get('needs_setup_context'):
score += 0.15
reasons.append('mixed_memory_and_setup')
if 'http' in (message or '').lower() and analysis.get('task_type') not in {'summarize', 'research'}:
score += 0.1
reasons.append('url_in_non_research_query')
if analysis.get('composition_reason'):
score += 0.1
reasons.append('composed_path')
if score >= 0.65:
level = 'high'
elif score >= 0.3:
level = 'medium'
else:
level = 'low'
return {'level': level, 'score': round(score, 3), 'reasons': reasons}