Initial Phase A intelligence core
This commit is contained in:
commit
94eae8ceba
11 changed files with 4261 additions and 0 deletions
11
README.md
Normal file
11
README.md
Normal file
|
|
@ -0,0 +1,11 @@
|
||||||
|
# Openclaw Intelligence Core
|
||||||
|
|
||||||
|
This repository tracks the new intelligence-controller slice for Openclaw:
|
||||||
|
- typed trajectories
|
||||||
|
- reward signals
|
||||||
|
- replay buffer / policy stats
|
||||||
|
- typed tool graph
|
||||||
|
- uncertainty model
|
||||||
|
- shadow meta-controller
|
||||||
|
|
||||||
|
It is intentionally narrow: this repo is the source-of-truth for the new learning and controller layer, not the entire legacy Openclaw workspace.
|
||||||
25
docs/ARCHITECTURE_BOARD.md
Normal file
25
docs/ARCHITECTURE_BOARD.md
Normal file
|
|
@ -0,0 +1,25 @@
|
||||||
|
# Architecture Board
|
||||||
|
|
||||||
|
## Goal
|
||||||
|
Build a local-first Openclaw agent that becomes more intelligent over time through:
|
||||||
|
- typed memory
|
||||||
|
- typed tool graph
|
||||||
|
- trajectory logging
|
||||||
|
- reward signals
|
||||||
|
- shadow meta-controller
|
||||||
|
- offline policy learning
|
||||||
|
- sacred eval gates
|
||||||
|
|
||||||
|
## Hosts
|
||||||
|
- Mac Studio: hot-path inference
|
||||||
|
- openclaw: orchestration and live logging
|
||||||
|
- Unraid: offline learning, retrieval, replay, eval batch jobs
|
||||||
|
- Kimi: offline teacher only
|
||||||
|
|
||||||
|
## Phase A
|
||||||
|
1. Typed trajectory schema
|
||||||
|
2. Reward signals
|
||||||
|
3. Replay buffer + policy stats
|
||||||
|
4. Tool graph
|
||||||
|
5. Uncertainty model
|
||||||
|
6. Shadow meta-controller
|
||||||
9
scripts/deploy_to_openclaw.sh
Executable file
9
scripts/deploy_to_openclaw.sh
Executable file
|
|
@ -0,0 +1,9 @@
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
REMOTE="openclaw@openclaw.tailef61c0.ts.net"
|
||||||
|
REMOTE_BASE="/home/openclaw/.openclaw/workspace"
|
||||||
|
for f in trajectory_schema.py reward_signals.py replay_buffer.py tool_graph.py uncertainty_model.py meta_controller.py outcome_logging.py; do
|
||||||
|
scp "$PWD/syncpatch/$f" "$REMOTE:$REMOTE_BASE/lib/$f"
|
||||||
|
done
|
||||||
|
scp "$PWD/syncpatch/agent-orchestrate" "$REMOTE:$REMOTE_BASE/bin/agent-orchestrate"
|
||||||
|
ssh "$REMOTE" "python3 -m py_compile $REMOTE_BASE/lib/trajectory_schema.py $REMOTE_BASE/lib/reward_signals.py $REMOTE_BASE/lib/replay_buffer.py $REMOTE_BASE/lib/tool_graph.py $REMOTE_BASE/lib/uncertainty_model.py $REMOTE_BASE/lib/meta_controller.py $REMOTE_BASE/lib/outcome_logging.py $REMOTE_BASE/bin/agent-orchestrate"
|
||||||
3608
syncpatch/agent-orchestrate
Normal file
3608
syncpatch/agent-orchestrate
Normal file
File diff suppressed because it is too large
Load diff
61
syncpatch/meta_controller.py
Normal file
61
syncpatch/meta_controller.py
Normal file
|
|
@ -0,0 +1,61 @@
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from tool_graph import build_tool_graph
|
||||||
|
from uncertainty_model import estimate_uncertainty
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def shadow_decision(message: str, analysis: dict[str, Any], family_candidates: list[dict[str, Any]] | None, tool_registry: dict[str, dict[str, Any]]) -> dict[str, Any]:
|
||||||
|
graph = build_tool_graph(tool_registry)
|
||||||
|
uncertainty = estimate_uncertainty(message, analysis, family_candidates)
|
||||||
|
tools = list(analysis.get('tools') or [])
|
||||||
|
families = [str((item or {}).get('family') or '') for item in (family_candidates or []) if (item or {}).get('family')]
|
||||||
|
|
||||||
|
decision = 'answer_direct'
|
||||||
|
reason = 'single_grounded_or_low_uncertainty'
|
||||||
|
suggested_memory_mode = ''
|
||||||
|
|
||||||
|
if uncertainty['level'] == 'high' and 'ambiguous_access' in families:
|
||||||
|
decision = 'ask_clarification'
|
||||||
|
reason = 'ambiguous_service_access'
|
||||||
|
elif analysis.get('needs_memory') and analysis.get('needs_setup_context'):
|
||||||
|
decision = 'run_plan'
|
||||||
|
reason = 'mixed_memory_plus_setup'
|
||||||
|
suggested_memory_mode = 'setup'
|
||||||
|
elif analysis.get('needs_memory'):
|
||||||
|
decision = 'use_memory_mode'
|
||||||
|
reason = 'memory_required'
|
||||||
|
suggested_memory_mode = 'profile' if analysis.get('task_type') == 'memory' else 'preference'
|
||||||
|
elif analysis.get('needs_setup_context') or len(tools) > 1:
|
||||||
|
decision = 'run_plan'
|
||||||
|
reason = 'evidence_required'
|
||||||
|
elif uncertainty['level'] == 'medium' and graph.get(tools[0], None) and graph[tools[0]].groundedness == 'weak':
|
||||||
|
decision = 'run_plan'
|
||||||
|
reason = 'weak_grounding_under_uncertainty'
|
||||||
|
|
||||||
|
return {
|
||||||
|
'ts': datetime.now(timezone.utc).isoformat(),
|
||||||
|
'message': message,
|
||||||
|
'decision': decision,
|
||||||
|
'reason': reason,
|
||||||
|
'suggested_memory_mode': suggested_memory_mode,
|
||||||
|
'suggested_tools': tools,
|
||||||
|
'uncertainty': uncertainty,
|
||||||
|
'family_candidates': families,
|
||||||
|
'normalized_task': f"{analysis.get('role','')}:{analysis.get('task_type','')}",
|
||||||
|
'chosen_plan': str(analysis.get('composition_reason') or 'single_tool'),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def log_shadow_decision(log_path, decision_row: dict[str, Any]) -> None:
|
||||||
|
try:
|
||||||
|
log_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
with log_path.open('a', encoding='utf-8') as f:
|
||||||
|
f.write(json.dumps(decision_row, ensure_ascii=False) + '\n')
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
140
syncpatch/outcome_logging.py
Normal file
140
syncpatch/outcome_logging.py
Normal file
|
|
@ -0,0 +1,140 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
|
||||||
|
from trajectory_schema import build_trajectory_record
|
||||||
|
from replay_buffer import append_replay_record, update_policy_stats
|
||||||
|
from reward_signals import derive_cap_breaches, reward_row
|
||||||
|
|
||||||
|
|
||||||
|
def log_intent_family_shadow(
|
||||||
|
message: str,
|
||||||
|
family_info: dict,
|
||||||
|
before_tools: list[str],
|
||||||
|
after_tools: list[str],
|
||||||
|
*,
|
||||||
|
log_path,
|
||||||
|
collect_intent_families,
|
||||||
|
service_hints,
|
||||||
|
) -> None:
|
||||||
|
family = str((family_info or {}).get('family') or '')
|
||||||
|
if not family:
|
||||||
|
return
|
||||||
|
try:
|
||||||
|
log_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
try:
|
||||||
|
candidates = [item.get('family') for item in collect_intent_families(message, service_hints) if item.get('family')]
|
||||||
|
except Exception:
|
||||||
|
candidates = [family]
|
||||||
|
row = {
|
||||||
|
'ts': datetime.now(timezone.utc).isoformat(),
|
||||||
|
'message': message,
|
||||||
|
'family': family,
|
||||||
|
'family_candidates': candidates,
|
||||||
|
'before_tool': before_tools[0] if before_tools else '',
|
||||||
|
'after_tool': after_tools[0] if after_tools else '',
|
||||||
|
'overridden': (before_tools[:1] != after_tools[:1]),
|
||||||
|
}
|
||||||
|
with log_path.open('a', encoding='utf-8') as f:
|
||||||
|
f.write(json.dumps(row, ensure_ascii=False) + '\n')
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def log_intent_composition(
|
||||||
|
message: str,
|
||||||
|
family_candidates: list[dict],
|
||||||
|
analysis_before: dict,
|
||||||
|
analysis_after: dict,
|
||||||
|
composition: dict,
|
||||||
|
*,
|
||||||
|
log_path,
|
||||||
|
) -> None:
|
||||||
|
if not composition or not composition.get('composed'):
|
||||||
|
return
|
||||||
|
try:
|
||||||
|
log_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
row = {
|
||||||
|
'ts': datetime.now(timezone.utc).isoformat(),
|
||||||
|
'message': message,
|
||||||
|
'family_candidates': [item.get('family') for item in family_candidates if item.get('family')],
|
||||||
|
'before_tools': list(analysis_before.get('tools') or []),
|
||||||
|
'after_tools': list(analysis_after.get('tools') or []),
|
||||||
|
'reason': composition.get('reason', ''),
|
||||||
|
'policy': composition.get('policy', analysis_after.get('composition_policy', '')),
|
||||||
|
'force_sequential': bool(analysis_after.get('force_sequential')),
|
||||||
|
}
|
||||||
|
with log_path.open('a', encoding='utf-8') as f:
|
||||||
|
f.write(json.dumps(row, ensure_ascii=False) + '\n')
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def record_task_outcome(
|
||||||
|
message: str,
|
||||||
|
analysis: dict,
|
||||||
|
final_text: str,
|
||||||
|
evidence_items: list[dict],
|
||||||
|
*,
|
||||||
|
status: str = 'success',
|
||||||
|
error_text: str = '',
|
||||||
|
log_path,
|
||||||
|
classify_intent_family,
|
||||||
|
collect_intent_families,
|
||||||
|
service_hints,
|
||||||
|
refresh_composition_policy_async,
|
||||||
|
) -> None:
|
||||||
|
try:
|
||||||
|
log_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
grounded_items = [item for item in evidence_items if item.get('grounded')]
|
||||||
|
try:
|
||||||
|
candidates = [item.get('family') for item in collect_intent_families(message, service_hints) if item.get('family')]
|
||||||
|
except Exception:
|
||||||
|
candidates = []
|
||||||
|
family = (classify_intent_family(message, service_hints) or {}).get('family', '')
|
||||||
|
cap_breaches = derive_cap_breaches(error_text, analysis, evidence_items)
|
||||||
|
row = {
|
||||||
|
'ts': datetime.now(timezone.utc).isoformat(),
|
||||||
|
'status': status,
|
||||||
|
'message': message,
|
||||||
|
'role': analysis.get('role'),
|
||||||
|
'task_type': analysis.get('task_type'),
|
||||||
|
'planned_tools': list(analysis.get('tools') or []),
|
||||||
|
'used_tools': [item.get('tool') for item in evidence_items],
|
||||||
|
'family': family,
|
||||||
|
'family_candidates': candidates,
|
||||||
|
'grounded_count': len(grounded_items),
|
||||||
|
'evidence_count': len(evidence_items),
|
||||||
|
'answer_len': len((final_text or '').strip()),
|
||||||
|
'needs_memory': bool(analysis.get('needs_memory')),
|
||||||
|
'needs_setup_context': bool(analysis.get('needs_setup_context')),
|
||||||
|
'error_text': (error_text or '')[:300],
|
||||||
|
'composition_reason': str(analysis.get('composition_reason') or ''),
|
||||||
|
'composition_policy': str(analysis.get('composition_policy') or ''),
|
||||||
|
'cap_breaches': cap_breaches,
|
||||||
|
}
|
||||||
|
reward_info = reward_row(status, analysis, evidence_items, final_text, cap_breaches=cap_breaches)
|
||||||
|
row.update(reward_info)
|
||||||
|
with log_path.open('a', encoding='utf-8') as f:
|
||||||
|
f.write(json.dumps(row, ensure_ascii=False) + '\n')
|
||||||
|
record = build_trajectory_record(
|
||||||
|
message=message,
|
||||||
|
analysis=analysis,
|
||||||
|
final_text=final_text,
|
||||||
|
evidence_items=evidence_items,
|
||||||
|
status=status,
|
||||||
|
family=family,
|
||||||
|
family_candidates=candidates,
|
||||||
|
error_text=error_text,
|
||||||
|
ts=row['ts'],
|
||||||
|
cap_breaches=cap_breaches,
|
||||||
|
).to_dict()
|
||||||
|
record.update(reward_info)
|
||||||
|
append_replay_record(record)
|
||||||
|
update_policy_stats(record)
|
||||||
|
if row.get('composition_reason') or row.get('composition_policy'):
|
||||||
|
refresh_composition_policy_async()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
63
syncpatch/replay_buffer.py
Normal file
63
syncpatch/replay_buffer.py
Normal file
|
|
@ -0,0 +1,63 @@
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
|
||||||
|
DEFAULT_REPLAY_ROOT = Path('/home/openclaw/.openclaw/workspace/data/replay_buffer')
|
||||||
|
DEFAULT_POLICY_STATS = Path('/home/openclaw/.openclaw/workspace/data/policy_stats.json')
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def _safe_slug(value: str) -> str:
|
||||||
|
out = ''.join(ch if ch.isalnum() or ch in {'_', '-'} else '_' for ch in (value or 'unknown').strip().lower())
|
||||||
|
return out[:80] or 'unknown'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def replay_path(record: dict[str, Any], replay_root: Path = DEFAULT_REPLAY_ROOT) -> Path:
|
||||||
|
ts = str(record.get('ts') or datetime.now(timezone.utc).isoformat())
|
||||||
|
day = ts[:10]
|
||||||
|
task = _safe_slug(str(record.get('normalized_task') or 'unknown'))
|
||||||
|
return replay_root / day / f'{task}.jsonl'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def append_replay_record(record: dict[str, Any], replay_root: Path = DEFAULT_REPLAY_ROOT) -> Path:
|
||||||
|
path = replay_path(record, replay_root)
|
||||||
|
path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
with path.open('a', encoding='utf-8') as f:
|
||||||
|
f.write(json.dumps(record, ensure_ascii=False) + '\n')
|
||||||
|
return path
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def update_policy_stats(record: dict[str, Any], stats_path: Path = DEFAULT_POLICY_STATS) -> dict[str, Any]:
|
||||||
|
stats_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
try:
|
||||||
|
data = json.loads(stats_path.read_text(encoding='utf-8'))
|
||||||
|
except Exception:
|
||||||
|
data = {'plans': {}, 'families': {}, 'updated_at': ''}
|
||||||
|
|
||||||
|
plan_key = str(record.get('chosen_plan') or 'single_tool')
|
||||||
|
family_key = str(record.get('family') or 'unknown')
|
||||||
|
reward = float(record.get('reward') or 0.0)
|
||||||
|
status = str(record.get('outcome_status') or '')
|
||||||
|
|
||||||
|
for bucket_name, key in [('plans', plan_key), ('families', family_key)]:
|
||||||
|
bucket = data.setdefault(bucket_name, {})
|
||||||
|
row = bucket.setdefault(key, {'count': 0, 'success': 0, 'failure': 0, 'clarification': 0, 'reward_sum': 0.0})
|
||||||
|
row['count'] += 1
|
||||||
|
row['reward_sum'] += reward
|
||||||
|
if status == 'success':
|
||||||
|
row['success'] += 1
|
||||||
|
elif status == 'needs_clarification':
|
||||||
|
row['clarification'] += 1
|
||||||
|
else:
|
||||||
|
row['failure'] += 1
|
||||||
|
|
||||||
|
data['updated_at'] = datetime.now(timezone.utc).isoformat()
|
||||||
|
stats_path.write_text(json.dumps(data, ensure_ascii=False, indent=2), encoding='utf-8')
|
||||||
|
return data
|
||||||
62
syncpatch/reward_signals.py
Normal file
62
syncpatch/reward_signals.py
Normal file
|
|
@ -0,0 +1,62 @@
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
|
||||||
|
BASE_REWARDS = {
|
||||||
|
'success': 5.0,
|
||||||
|
'needs_clarification': 1.0,
|
||||||
|
'tool_output_unverified': -1.5,
|
||||||
|
'tool_failed': -3.0,
|
||||||
|
'no_result': -2.5,
|
||||||
|
}
|
||||||
|
|
||||||
|
CAP_BREACH_PENALTIES = {
|
||||||
|
'daily_cap_exceeded': -1.0,
|
||||||
|
'path_like_payload': -1.0,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def derive_cap_breaches(error_text: str, analysis: dict[str, Any], evidence_items: list[dict[str, Any]]) -> list[str]:
|
||||||
|
text = ' '.join(
|
||||||
|
[error_text or '']
|
||||||
|
+ [str(item.get('error') or '') for item in evidence_items]
|
||||||
|
+ [str(item.get('output') or '')[:200] for item in evidence_items]
|
||||||
|
).lower()
|
||||||
|
breaches: list[str] = []
|
||||||
|
if 'daily_cap_exceeded' in text:
|
||||||
|
breaches.append('daily_cap_exceeded')
|
||||||
|
if 'path_like_payload' in text:
|
||||||
|
breaches.append('path_like_payload')
|
||||||
|
if 'daily_cap_exceeded' not in breaches:
|
||||||
|
quarantine = analysis.get('quarantine_reason') or analysis.get('memory_quarantine_reason') or ''
|
||||||
|
if 'daily_cap_exceeded' in str(quarantine).lower():
|
||||||
|
breaches.append('daily_cap_exceeded')
|
||||||
|
return breaches
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def compute_reward(status: str, analysis: dict[str, Any], evidence_items: list[dict[str, Any]], final_text: str, *, cap_breaches: list[str] | None = None) -> float:
|
||||||
|
reward = BASE_REWARDS.get(status, 0.0)
|
||||||
|
grounded = sum(1 for item in evidence_items if item.get('grounded'))
|
||||||
|
if grounded:
|
||||||
|
reward += min(grounded, 3) * 0.5
|
||||||
|
if analysis.get('force_sequential') and status == 'success':
|
||||||
|
reward += 0.5
|
||||||
|
if final_text and len(final_text.strip()) < 24 and status == 'success' and grounded == 0:
|
||||||
|
reward -= 0.5
|
||||||
|
for breach in cap_breaches or []:
|
||||||
|
reward += CAP_BREACH_PENALTIES.get(breach, -0.5)
|
||||||
|
return reward
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def reward_row(status: str, analysis: dict[str, Any], evidence_items: list[dict[str, Any]], final_text: str, *, cap_breaches: list[str] | None = None) -> dict[str, Any]:
|
||||||
|
cap_breaches = list(cap_breaches or [])
|
||||||
|
return {
|
||||||
|
'reward': compute_reward(status, analysis, evidence_items, final_text, cap_breaches=cap_breaches),
|
||||||
|
'cap_breaches': cap_breaches,
|
||||||
|
'grounded_count': sum(1 for item in evidence_items if item.get('grounded')),
|
||||||
|
'evidence_count': len(evidence_items),
|
||||||
|
}
|
||||||
99
syncpatch/tool_graph.py
Normal file
99
syncpatch/tool_graph.py
Normal file
|
|
@ -0,0 +1,99 @@
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from dataclasses import dataclass, asdict
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ToolNode:
|
||||||
|
name: str
|
||||||
|
kind: str
|
||||||
|
description: str
|
||||||
|
input_schema: str
|
||||||
|
output_schema: str
|
||||||
|
effect_type: str
|
||||||
|
risk: str
|
||||||
|
latency_class: str
|
||||||
|
groundedness: str
|
||||||
|
cost_class: str
|
||||||
|
|
||||||
|
def to_dict(self) -> dict[str, Any]:
|
||||||
|
return asdict(self)
|
||||||
|
|
||||||
|
|
||||||
|
KIND_TO_EFFECT = {
|
||||||
|
'action': 'state_change',
|
||||||
|
'evidence': 'evidence_only',
|
||||||
|
'final': 'answer_only',
|
||||||
|
}
|
||||||
|
|
||||||
|
KIND_TO_RISK = {
|
||||||
|
'action': 'high',
|
||||||
|
'evidence': 'low',
|
||||||
|
'final': 'medium',
|
||||||
|
}
|
||||||
|
|
||||||
|
KIND_TO_OUTPUT = {
|
||||||
|
'action': 'action_result',
|
||||||
|
'evidence': 'evidence_blob',
|
||||||
|
'final': 'final_answer',
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def classify_latency(name: str, kind: str) -> str:
|
||||||
|
if name in {'url_research', 'web_research', 'ops_deep_analyze'}:
|
||||||
|
return 'slow'
|
||||||
|
if kind == 'action':
|
||||||
|
return 'medium'
|
||||||
|
if kind == 'evidence':
|
||||||
|
return 'fast'
|
||||||
|
return 'medium'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def classify_cost(name: str, kind: str) -> str:
|
||||||
|
if name in {'web_research', 'url_research', 'ops_deep_analyze'}:
|
||||||
|
return 'high'
|
||||||
|
if kind == 'action':
|
||||||
|
return 'medium'
|
||||||
|
return 'low'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def classify_groundedness(name: str, kind: str) -> str:
|
||||||
|
if name in {'setup_lookup', 'memory_profile', 'web_root_cause', 'user_service_access_diagnose', 'light_status', 'emby_user_provision'}:
|
||||||
|
return 'strong'
|
||||||
|
if kind == 'evidence':
|
||||||
|
return 'strong'
|
||||||
|
if name in {'general_answer', 'personal_assist', 'expert_write', 'expert_strategy'}:
|
||||||
|
return 'weak'
|
||||||
|
return 'medium'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def infer_input_schema(name: str) -> str:
|
||||||
|
if name == 'memory_profile':
|
||||||
|
return 'user_query+memory_mode'
|
||||||
|
if name == 'setup_lookup':
|
||||||
|
return 'user_query+service_hint'
|
||||||
|
return 'user_query'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def build_tool_graph(tool_registry: dict[str, dict[str, Any]]) -> dict[str, ToolNode]:
|
||||||
|
graph: dict[str, ToolNode] = {}
|
||||||
|
for name, info in (tool_registry or {}).items():
|
||||||
|
kind = str(info.get('kind') or 'final')
|
||||||
|
graph[name] = ToolNode(
|
||||||
|
name=name,
|
||||||
|
kind=kind,
|
||||||
|
description=str(info.get('description') or ''),
|
||||||
|
input_schema=infer_input_schema(name),
|
||||||
|
output_schema=KIND_TO_OUTPUT.get(kind, 'opaque'),
|
||||||
|
effect_type=KIND_TO_EFFECT.get(kind, 'answer_only'),
|
||||||
|
risk=KIND_TO_RISK.get(kind, 'medium'),
|
||||||
|
latency_class=classify_latency(name, kind),
|
||||||
|
groundedness=classify_groundedness(name, kind),
|
||||||
|
cost_class=classify_cost(name, kind),
|
||||||
|
)
|
||||||
|
return graph
|
||||||
146
syncpatch/trajectory_schema.py
Normal file
146
syncpatch/trajectory_schema.py
Normal file
|
|
@ -0,0 +1,146 @@
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from dataclasses import dataclass, asdict, field
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
from typing import Any
|
||||||
|
import hashlib
|
||||||
|
import json
|
||||||
|
|
||||||
|
|
||||||
|
SCHEMA_VERSION = 1
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class TrajectoryRecord:
|
||||||
|
schema_version: int
|
||||||
|
trajectory_id: str
|
||||||
|
ts: str
|
||||||
|
user_query: str
|
||||||
|
normalized_task: str
|
||||||
|
role: str
|
||||||
|
task_type: str
|
||||||
|
family: str
|
||||||
|
family_candidates: list[str] = field(default_factory=list)
|
||||||
|
chosen_plan: str = ''
|
||||||
|
chosen_tools: list[str] = field(default_factory=list)
|
||||||
|
used_tools: list[str] = field(default_factory=list)
|
||||||
|
memory_mode: str = ''
|
||||||
|
uncertainty: str = ''
|
||||||
|
grounded_count: int = 0
|
||||||
|
evidence_count: int = 0
|
||||||
|
answer_len: int = 0
|
||||||
|
latency_ms: int | None = None
|
||||||
|
verification_status: str = ''
|
||||||
|
outcome_status: str = ''
|
||||||
|
cap_breaches: list[str] = field(default_factory=list)
|
||||||
|
user_feedback: str = ''
|
||||||
|
error_text: str = ''
|
||||||
|
composition_reason: str = ''
|
||||||
|
composition_policy: str = ''
|
||||||
|
needs_memory: bool = False
|
||||||
|
needs_setup_context: bool = False
|
||||||
|
metadata: dict[str, Any] = field(default_factory=dict)
|
||||||
|
|
||||||
|
def to_dict(self) -> dict[str, Any]:
|
||||||
|
return asdict(self)
|
||||||
|
|
||||||
|
def to_json(self) -> str:
|
||||||
|
return json.dumps(self.to_dict(), ensure_ascii=False)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def utc_now_iso() -> str:
|
||||||
|
return datetime.now(timezone.utc).isoformat()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def make_trajectory_id(message: str, ts: str, planned_tools: list[str], used_tools: list[str]) -> str:
|
||||||
|
digest = hashlib.sha256()
|
||||||
|
digest.update((message or '').encode('utf-8', errors='ignore'))
|
||||||
|
digest.update((ts or '').encode('utf-8', errors='ignore'))
|
||||||
|
digest.update('|'.join(planned_tools or []).encode('utf-8', errors='ignore'))
|
||||||
|
digest.update('|'.join(used_tools or []).encode('utf-8', errors='ignore'))
|
||||||
|
return digest.hexdigest()[:24]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def infer_memory_mode(task_type: str, analysis: dict[str, Any], used_tools: list[str]) -> str:
|
||||||
|
if 'memory_profile' in used_tools:
|
||||||
|
if task_type == 'memory':
|
||||||
|
return 'profile'
|
||||||
|
if analysis.get('needs_setup_context'):
|
||||||
|
return 'setup'
|
||||||
|
if analysis.get('needs_memory'):
|
||||||
|
return 'preference'
|
||||||
|
return 'profile'
|
||||||
|
return ''
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def infer_uncertainty(status: str, analysis: dict[str, Any], evidence_items: list[dict[str, Any]]) -> str:
|
||||||
|
confidence = float(analysis.get('confidence', 0.0) or 0.0)
|
||||||
|
grounded = sum(1 for item in evidence_items if item.get('grounded'))
|
||||||
|
if status in {'tool_failed', 'no_result', 'tool_output_unverified'}:
|
||||||
|
return 'high'
|
||||||
|
if status == 'needs_clarification':
|
||||||
|
return 'medium'
|
||||||
|
if confidence >= 0.93 and grounded >= 1:
|
||||||
|
return 'low'
|
||||||
|
if confidence >= 0.8:
|
||||||
|
return 'medium'
|
||||||
|
return 'high'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def build_trajectory_record(
|
||||||
|
*,
|
||||||
|
message: str,
|
||||||
|
analysis: dict[str, Any],
|
||||||
|
final_text: str,
|
||||||
|
evidence_items: list[dict[str, Any]],
|
||||||
|
status: str,
|
||||||
|
family: str,
|
||||||
|
family_candidates: list[str],
|
||||||
|
error_text: str = '',
|
||||||
|
ts: str | None = None,
|
||||||
|
latency_ms: int | None = None,
|
||||||
|
verification_status: str = '',
|
||||||
|
cap_breaches: list[str] | None = None,
|
||||||
|
user_feedback: str = '',
|
||||||
|
) -> TrajectoryRecord:
|
||||||
|
ts = ts or utc_now_iso()
|
||||||
|
planned_tools = list(analysis.get('tools') or [])
|
||||||
|
used_tools = [str(item.get('tool') or '') for item in evidence_items if item.get('tool')]
|
||||||
|
task_type = str(analysis.get('task_type') or '')
|
||||||
|
return TrajectoryRecord(
|
||||||
|
schema_version=SCHEMA_VERSION,
|
||||||
|
trajectory_id=make_trajectory_id(message, ts, planned_tools, used_tools),
|
||||||
|
ts=ts,
|
||||||
|
user_query=message,
|
||||||
|
normalized_task=f"{analysis.get('role','')}:{task_type}",
|
||||||
|
role=str(analysis.get('role') or ''),
|
||||||
|
task_type=task_type,
|
||||||
|
family=family,
|
||||||
|
family_candidates=list(family_candidates or []),
|
||||||
|
chosen_plan=str(analysis.get('composition_reason') or 'single_tool'),
|
||||||
|
chosen_tools=planned_tools,
|
||||||
|
used_tools=used_tools,
|
||||||
|
memory_mode=infer_memory_mode(task_type, analysis, used_tools),
|
||||||
|
uncertainty=infer_uncertainty(status, analysis, evidence_items),
|
||||||
|
grounded_count=sum(1 for item in evidence_items if item.get('grounded')),
|
||||||
|
evidence_count=len(evidence_items),
|
||||||
|
answer_len=len((final_text or '').strip()),
|
||||||
|
latency_ms=latency_ms,
|
||||||
|
verification_status=verification_status or ('grounded' if any(item.get('grounded') for item in evidence_items) else 'unverified'),
|
||||||
|
outcome_status=status,
|
||||||
|
cap_breaches=list(cap_breaches or []),
|
||||||
|
user_feedback=user_feedback,
|
||||||
|
error_text=(error_text or '')[:300],
|
||||||
|
composition_reason=str(analysis.get('composition_reason') or ''),
|
||||||
|
composition_policy=str(analysis.get('composition_policy') or ''),
|
||||||
|
needs_memory=bool(analysis.get('needs_memory')),
|
||||||
|
needs_setup_context=bool(analysis.get('needs_setup_context')),
|
||||||
|
metadata={
|
||||||
|
'force_sequential': bool(analysis.get('force_sequential')),
|
||||||
|
},
|
||||||
|
)
|
||||||
37
syncpatch/uncertainty_model.py
Normal file
37
syncpatch/uncertainty_model.py
Normal file
|
|
@ -0,0 +1,37 @@
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def estimate_uncertainty(message: str, analysis: dict[str, Any], family_candidates: list[dict[str, Any]] | None = None) -> dict[str, Any]:
|
||||||
|
candidates = [str((item or {}).get('family') or '') for item in (family_candidates or []) if (item or {}).get('family')]
|
||||||
|
confidence = float(analysis.get('confidence', 0.0) or 0.0)
|
||||||
|
score = 0.0
|
||||||
|
reasons: list[str] = []
|
||||||
|
if len(set(candidates)) >= 2:
|
||||||
|
score += 0.35
|
||||||
|
reasons.append('multiple_family_candidates')
|
||||||
|
if confidence < 0.75:
|
||||||
|
score += 0.4
|
||||||
|
reasons.append('low_confidence')
|
||||||
|
elif confidence < 0.9:
|
||||||
|
score += 0.2
|
||||||
|
reasons.append('medium_confidence')
|
||||||
|
if analysis.get('needs_memory') and analysis.get('needs_setup_context'):
|
||||||
|
score += 0.15
|
||||||
|
reasons.append('mixed_memory_and_setup')
|
||||||
|
if 'http' in (message or '').lower() and analysis.get('task_type') not in {'summarize', 'research'}:
|
||||||
|
score += 0.1
|
||||||
|
reasons.append('url_in_non_research_query')
|
||||||
|
if analysis.get('composition_reason'):
|
||||||
|
score += 0.1
|
||||||
|
reasons.append('composed_path')
|
||||||
|
|
||||||
|
if score >= 0.65:
|
||||||
|
level = 'high'
|
||||||
|
elif score >= 0.3:
|
||||||
|
level = 'medium'
|
||||||
|
else:
|
||||||
|
level = 'low'
|
||||||
|
return {'level': level, 'score': round(score, 3), 'reasons': reasons}
|
||||||
Loading…
Add table
Add a link
Reference in a new issue