Initial Phase A intelligence core
This commit is contained in:
commit
94eae8ceba
11 changed files with 4261 additions and 0 deletions
3608
syncpatch/agent-orchestrate
Normal file
3608
syncpatch/agent-orchestrate
Normal file
File diff suppressed because it is too large
Load diff
61
syncpatch/meta_controller.py
Normal file
61
syncpatch/meta_controller.py
Normal file
|
|
@ -0,0 +1,61 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any
|
||||
|
||||
from tool_graph import build_tool_graph
|
||||
from uncertainty_model import estimate_uncertainty
|
||||
|
||||
|
||||
|
||||
def shadow_decision(message: str, analysis: dict[str, Any], family_candidates: list[dict[str, Any]] | None, tool_registry: dict[str, dict[str, Any]]) -> dict[str, Any]:
|
||||
graph = build_tool_graph(tool_registry)
|
||||
uncertainty = estimate_uncertainty(message, analysis, family_candidates)
|
||||
tools = list(analysis.get('tools') or [])
|
||||
families = [str((item or {}).get('family') or '') for item in (family_candidates or []) if (item or {}).get('family')]
|
||||
|
||||
decision = 'answer_direct'
|
||||
reason = 'single_grounded_or_low_uncertainty'
|
||||
suggested_memory_mode = ''
|
||||
|
||||
if uncertainty['level'] == 'high' and 'ambiguous_access' in families:
|
||||
decision = 'ask_clarification'
|
||||
reason = 'ambiguous_service_access'
|
||||
elif analysis.get('needs_memory') and analysis.get('needs_setup_context'):
|
||||
decision = 'run_plan'
|
||||
reason = 'mixed_memory_plus_setup'
|
||||
suggested_memory_mode = 'setup'
|
||||
elif analysis.get('needs_memory'):
|
||||
decision = 'use_memory_mode'
|
||||
reason = 'memory_required'
|
||||
suggested_memory_mode = 'profile' if analysis.get('task_type') == 'memory' else 'preference'
|
||||
elif analysis.get('needs_setup_context') or len(tools) > 1:
|
||||
decision = 'run_plan'
|
||||
reason = 'evidence_required'
|
||||
elif uncertainty['level'] == 'medium' and graph.get(tools[0], None) and graph[tools[0]].groundedness == 'weak':
|
||||
decision = 'run_plan'
|
||||
reason = 'weak_grounding_under_uncertainty'
|
||||
|
||||
return {
|
||||
'ts': datetime.now(timezone.utc).isoformat(),
|
||||
'message': message,
|
||||
'decision': decision,
|
||||
'reason': reason,
|
||||
'suggested_memory_mode': suggested_memory_mode,
|
||||
'suggested_tools': tools,
|
||||
'uncertainty': uncertainty,
|
||||
'family_candidates': families,
|
||||
'normalized_task': f"{analysis.get('role','')}:{analysis.get('task_type','')}",
|
||||
'chosen_plan': str(analysis.get('composition_reason') or 'single_tool'),
|
||||
}
|
||||
|
||||
|
||||
|
||||
def log_shadow_decision(log_path, decision_row: dict[str, Any]) -> None:
|
||||
try:
|
||||
log_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with log_path.open('a', encoding='utf-8') as f:
|
||||
f.write(json.dumps(decision_row, ensure_ascii=False) + '\n')
|
||||
except Exception:
|
||||
pass
|
||||
140
syncpatch/outcome_logging.py
Normal file
140
syncpatch/outcome_logging.py
Normal file
|
|
@ -0,0 +1,140 @@
|
|||
#!/usr/bin/env python3
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from trajectory_schema import build_trajectory_record
|
||||
from replay_buffer import append_replay_record, update_policy_stats
|
||||
from reward_signals import derive_cap_breaches, reward_row
|
||||
|
||||
|
||||
def log_intent_family_shadow(
|
||||
message: str,
|
||||
family_info: dict,
|
||||
before_tools: list[str],
|
||||
after_tools: list[str],
|
||||
*,
|
||||
log_path,
|
||||
collect_intent_families,
|
||||
service_hints,
|
||||
) -> None:
|
||||
family = str((family_info or {}).get('family') or '')
|
||||
if not family:
|
||||
return
|
||||
try:
|
||||
log_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
try:
|
||||
candidates = [item.get('family') for item in collect_intent_families(message, service_hints) if item.get('family')]
|
||||
except Exception:
|
||||
candidates = [family]
|
||||
row = {
|
||||
'ts': datetime.now(timezone.utc).isoformat(),
|
||||
'message': message,
|
||||
'family': family,
|
||||
'family_candidates': candidates,
|
||||
'before_tool': before_tools[0] if before_tools else '',
|
||||
'after_tool': after_tools[0] if after_tools else '',
|
||||
'overridden': (before_tools[:1] != after_tools[:1]),
|
||||
}
|
||||
with log_path.open('a', encoding='utf-8') as f:
|
||||
f.write(json.dumps(row, ensure_ascii=False) + '\n')
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def log_intent_composition(
|
||||
message: str,
|
||||
family_candidates: list[dict],
|
||||
analysis_before: dict,
|
||||
analysis_after: dict,
|
||||
composition: dict,
|
||||
*,
|
||||
log_path,
|
||||
) -> None:
|
||||
if not composition or not composition.get('composed'):
|
||||
return
|
||||
try:
|
||||
log_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
row = {
|
||||
'ts': datetime.now(timezone.utc).isoformat(),
|
||||
'message': message,
|
||||
'family_candidates': [item.get('family') for item in family_candidates if item.get('family')],
|
||||
'before_tools': list(analysis_before.get('tools') or []),
|
||||
'after_tools': list(analysis_after.get('tools') or []),
|
||||
'reason': composition.get('reason', ''),
|
||||
'policy': composition.get('policy', analysis_after.get('composition_policy', '')),
|
||||
'force_sequential': bool(analysis_after.get('force_sequential')),
|
||||
}
|
||||
with log_path.open('a', encoding='utf-8') as f:
|
||||
f.write(json.dumps(row, ensure_ascii=False) + '\n')
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def record_task_outcome(
|
||||
message: str,
|
||||
analysis: dict,
|
||||
final_text: str,
|
||||
evidence_items: list[dict],
|
||||
*,
|
||||
status: str = 'success',
|
||||
error_text: str = '',
|
||||
log_path,
|
||||
classify_intent_family,
|
||||
collect_intent_families,
|
||||
service_hints,
|
||||
refresh_composition_policy_async,
|
||||
) -> None:
|
||||
try:
|
||||
log_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
grounded_items = [item for item in evidence_items if item.get('grounded')]
|
||||
try:
|
||||
candidates = [item.get('family') for item in collect_intent_families(message, service_hints) if item.get('family')]
|
||||
except Exception:
|
||||
candidates = []
|
||||
family = (classify_intent_family(message, service_hints) or {}).get('family', '')
|
||||
cap_breaches = derive_cap_breaches(error_text, analysis, evidence_items)
|
||||
row = {
|
||||
'ts': datetime.now(timezone.utc).isoformat(),
|
||||
'status': status,
|
||||
'message': message,
|
||||
'role': analysis.get('role'),
|
||||
'task_type': analysis.get('task_type'),
|
||||
'planned_tools': list(analysis.get('tools') or []),
|
||||
'used_tools': [item.get('tool') for item in evidence_items],
|
||||
'family': family,
|
||||
'family_candidates': candidates,
|
||||
'grounded_count': len(grounded_items),
|
||||
'evidence_count': len(evidence_items),
|
||||
'answer_len': len((final_text or '').strip()),
|
||||
'needs_memory': bool(analysis.get('needs_memory')),
|
||||
'needs_setup_context': bool(analysis.get('needs_setup_context')),
|
||||
'error_text': (error_text or '')[:300],
|
||||
'composition_reason': str(analysis.get('composition_reason') or ''),
|
||||
'composition_policy': str(analysis.get('composition_policy') or ''),
|
||||
'cap_breaches': cap_breaches,
|
||||
}
|
||||
reward_info = reward_row(status, analysis, evidence_items, final_text, cap_breaches=cap_breaches)
|
||||
row.update(reward_info)
|
||||
with log_path.open('a', encoding='utf-8') as f:
|
||||
f.write(json.dumps(row, ensure_ascii=False) + '\n')
|
||||
record = build_trajectory_record(
|
||||
message=message,
|
||||
analysis=analysis,
|
||||
final_text=final_text,
|
||||
evidence_items=evidence_items,
|
||||
status=status,
|
||||
family=family,
|
||||
family_candidates=candidates,
|
||||
error_text=error_text,
|
||||
ts=row['ts'],
|
||||
cap_breaches=cap_breaches,
|
||||
).to_dict()
|
||||
record.update(reward_info)
|
||||
append_replay_record(record)
|
||||
update_policy_stats(record)
|
||||
if row.get('composition_reason') or row.get('composition_policy'):
|
||||
refresh_composition_policy_async()
|
||||
except Exception:
|
||||
pass
|
||||
63
syncpatch/replay_buffer.py
Normal file
63
syncpatch/replay_buffer.py
Normal file
|
|
@ -0,0 +1,63 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
|
||||
DEFAULT_REPLAY_ROOT = Path('/home/openclaw/.openclaw/workspace/data/replay_buffer')
|
||||
DEFAULT_POLICY_STATS = Path('/home/openclaw/.openclaw/workspace/data/policy_stats.json')
|
||||
|
||||
|
||||
|
||||
def _safe_slug(value: str) -> str:
|
||||
out = ''.join(ch if ch.isalnum() or ch in {'_', '-'} else '_' for ch in (value or 'unknown').strip().lower())
|
||||
return out[:80] or 'unknown'
|
||||
|
||||
|
||||
|
||||
def replay_path(record: dict[str, Any], replay_root: Path = DEFAULT_REPLAY_ROOT) -> Path:
|
||||
ts = str(record.get('ts') or datetime.now(timezone.utc).isoformat())
|
||||
day = ts[:10]
|
||||
task = _safe_slug(str(record.get('normalized_task') or 'unknown'))
|
||||
return replay_root / day / f'{task}.jsonl'
|
||||
|
||||
|
||||
|
||||
def append_replay_record(record: dict[str, Any], replay_root: Path = DEFAULT_REPLAY_ROOT) -> Path:
|
||||
path = replay_path(record, replay_root)
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with path.open('a', encoding='utf-8') as f:
|
||||
f.write(json.dumps(record, ensure_ascii=False) + '\n')
|
||||
return path
|
||||
|
||||
|
||||
|
||||
def update_policy_stats(record: dict[str, Any], stats_path: Path = DEFAULT_POLICY_STATS) -> dict[str, Any]:
|
||||
stats_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
try:
|
||||
data = json.loads(stats_path.read_text(encoding='utf-8'))
|
||||
except Exception:
|
||||
data = {'plans': {}, 'families': {}, 'updated_at': ''}
|
||||
|
||||
plan_key = str(record.get('chosen_plan') or 'single_tool')
|
||||
family_key = str(record.get('family') or 'unknown')
|
||||
reward = float(record.get('reward') or 0.0)
|
||||
status = str(record.get('outcome_status') or '')
|
||||
|
||||
for bucket_name, key in [('plans', plan_key), ('families', family_key)]:
|
||||
bucket = data.setdefault(bucket_name, {})
|
||||
row = bucket.setdefault(key, {'count': 0, 'success': 0, 'failure': 0, 'clarification': 0, 'reward_sum': 0.0})
|
||||
row['count'] += 1
|
||||
row['reward_sum'] += reward
|
||||
if status == 'success':
|
||||
row['success'] += 1
|
||||
elif status == 'needs_clarification':
|
||||
row['clarification'] += 1
|
||||
else:
|
||||
row['failure'] += 1
|
||||
|
||||
data['updated_at'] = datetime.now(timezone.utc).isoformat()
|
||||
stats_path.write_text(json.dumps(data, ensure_ascii=False, indent=2), encoding='utf-8')
|
||||
return data
|
||||
62
syncpatch/reward_signals.py
Normal file
62
syncpatch/reward_signals.py
Normal file
|
|
@ -0,0 +1,62 @@
|
|||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
|
||||
BASE_REWARDS = {
|
||||
'success': 5.0,
|
||||
'needs_clarification': 1.0,
|
||||
'tool_output_unverified': -1.5,
|
||||
'tool_failed': -3.0,
|
||||
'no_result': -2.5,
|
||||
}
|
||||
|
||||
CAP_BREACH_PENALTIES = {
|
||||
'daily_cap_exceeded': -1.0,
|
||||
'path_like_payload': -1.0,
|
||||
}
|
||||
|
||||
|
||||
|
||||
def derive_cap_breaches(error_text: str, analysis: dict[str, Any], evidence_items: list[dict[str, Any]]) -> list[str]:
|
||||
text = ' '.join(
|
||||
[error_text or '']
|
||||
+ [str(item.get('error') or '') for item in evidence_items]
|
||||
+ [str(item.get('output') or '')[:200] for item in evidence_items]
|
||||
).lower()
|
||||
breaches: list[str] = []
|
||||
if 'daily_cap_exceeded' in text:
|
||||
breaches.append('daily_cap_exceeded')
|
||||
if 'path_like_payload' in text:
|
||||
breaches.append('path_like_payload')
|
||||
if 'daily_cap_exceeded' not in breaches:
|
||||
quarantine = analysis.get('quarantine_reason') or analysis.get('memory_quarantine_reason') or ''
|
||||
if 'daily_cap_exceeded' in str(quarantine).lower():
|
||||
breaches.append('daily_cap_exceeded')
|
||||
return breaches
|
||||
|
||||
|
||||
|
||||
def compute_reward(status: str, analysis: dict[str, Any], evidence_items: list[dict[str, Any]], final_text: str, *, cap_breaches: list[str] | None = None) -> float:
|
||||
reward = BASE_REWARDS.get(status, 0.0)
|
||||
grounded = sum(1 for item in evidence_items if item.get('grounded'))
|
||||
if grounded:
|
||||
reward += min(grounded, 3) * 0.5
|
||||
if analysis.get('force_sequential') and status == 'success':
|
||||
reward += 0.5
|
||||
if final_text and len(final_text.strip()) < 24 and status == 'success' and grounded == 0:
|
||||
reward -= 0.5
|
||||
for breach in cap_breaches or []:
|
||||
reward += CAP_BREACH_PENALTIES.get(breach, -0.5)
|
||||
return reward
|
||||
|
||||
|
||||
|
||||
def reward_row(status: str, analysis: dict[str, Any], evidence_items: list[dict[str, Any]], final_text: str, *, cap_breaches: list[str] | None = None) -> dict[str, Any]:
|
||||
cap_breaches = list(cap_breaches or [])
|
||||
return {
|
||||
'reward': compute_reward(status, analysis, evidence_items, final_text, cap_breaches=cap_breaches),
|
||||
'cap_breaches': cap_breaches,
|
||||
'grounded_count': sum(1 for item in evidence_items if item.get('grounded')),
|
||||
'evidence_count': len(evidence_items),
|
||||
}
|
||||
99
syncpatch/tool_graph.py
Normal file
99
syncpatch/tool_graph.py
Normal file
|
|
@ -0,0 +1,99 @@
|
|||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, asdict
|
||||
from typing import Any
|
||||
|
||||
|
||||
@dataclass
|
||||
class ToolNode:
|
||||
name: str
|
||||
kind: str
|
||||
description: str
|
||||
input_schema: str
|
||||
output_schema: str
|
||||
effect_type: str
|
||||
risk: str
|
||||
latency_class: str
|
||||
groundedness: str
|
||||
cost_class: str
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
return asdict(self)
|
||||
|
||||
|
||||
KIND_TO_EFFECT = {
|
||||
'action': 'state_change',
|
||||
'evidence': 'evidence_only',
|
||||
'final': 'answer_only',
|
||||
}
|
||||
|
||||
KIND_TO_RISK = {
|
||||
'action': 'high',
|
||||
'evidence': 'low',
|
||||
'final': 'medium',
|
||||
}
|
||||
|
||||
KIND_TO_OUTPUT = {
|
||||
'action': 'action_result',
|
||||
'evidence': 'evidence_blob',
|
||||
'final': 'final_answer',
|
||||
}
|
||||
|
||||
|
||||
def classify_latency(name: str, kind: str) -> str:
|
||||
if name in {'url_research', 'web_research', 'ops_deep_analyze'}:
|
||||
return 'slow'
|
||||
if kind == 'action':
|
||||
return 'medium'
|
||||
if kind == 'evidence':
|
||||
return 'fast'
|
||||
return 'medium'
|
||||
|
||||
|
||||
|
||||
def classify_cost(name: str, kind: str) -> str:
|
||||
if name in {'web_research', 'url_research', 'ops_deep_analyze'}:
|
||||
return 'high'
|
||||
if kind == 'action':
|
||||
return 'medium'
|
||||
return 'low'
|
||||
|
||||
|
||||
|
||||
def classify_groundedness(name: str, kind: str) -> str:
|
||||
if name in {'setup_lookup', 'memory_profile', 'web_root_cause', 'user_service_access_diagnose', 'light_status', 'emby_user_provision'}:
|
||||
return 'strong'
|
||||
if kind == 'evidence':
|
||||
return 'strong'
|
||||
if name in {'general_answer', 'personal_assist', 'expert_write', 'expert_strategy'}:
|
||||
return 'weak'
|
||||
return 'medium'
|
||||
|
||||
|
||||
|
||||
def infer_input_schema(name: str) -> str:
|
||||
if name == 'memory_profile':
|
||||
return 'user_query+memory_mode'
|
||||
if name == 'setup_lookup':
|
||||
return 'user_query+service_hint'
|
||||
return 'user_query'
|
||||
|
||||
|
||||
|
||||
def build_tool_graph(tool_registry: dict[str, dict[str, Any]]) -> dict[str, ToolNode]:
|
||||
graph: dict[str, ToolNode] = {}
|
||||
for name, info in (tool_registry or {}).items():
|
||||
kind = str(info.get('kind') or 'final')
|
||||
graph[name] = ToolNode(
|
||||
name=name,
|
||||
kind=kind,
|
||||
description=str(info.get('description') or ''),
|
||||
input_schema=infer_input_schema(name),
|
||||
output_schema=KIND_TO_OUTPUT.get(kind, 'opaque'),
|
||||
effect_type=KIND_TO_EFFECT.get(kind, 'answer_only'),
|
||||
risk=KIND_TO_RISK.get(kind, 'medium'),
|
||||
latency_class=classify_latency(name, kind),
|
||||
groundedness=classify_groundedness(name, kind),
|
||||
cost_class=classify_cost(name, kind),
|
||||
)
|
||||
return graph
|
||||
146
syncpatch/trajectory_schema.py
Normal file
146
syncpatch/trajectory_schema.py
Normal file
|
|
@ -0,0 +1,146 @@
|
|||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, asdict, field
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any
|
||||
import hashlib
|
||||
import json
|
||||
|
||||
|
||||
SCHEMA_VERSION = 1
|
||||
|
||||
|
||||
@dataclass
|
||||
class TrajectoryRecord:
|
||||
schema_version: int
|
||||
trajectory_id: str
|
||||
ts: str
|
||||
user_query: str
|
||||
normalized_task: str
|
||||
role: str
|
||||
task_type: str
|
||||
family: str
|
||||
family_candidates: list[str] = field(default_factory=list)
|
||||
chosen_plan: str = ''
|
||||
chosen_tools: list[str] = field(default_factory=list)
|
||||
used_tools: list[str] = field(default_factory=list)
|
||||
memory_mode: str = ''
|
||||
uncertainty: str = ''
|
||||
grounded_count: int = 0
|
||||
evidence_count: int = 0
|
||||
answer_len: int = 0
|
||||
latency_ms: int | None = None
|
||||
verification_status: str = ''
|
||||
outcome_status: str = ''
|
||||
cap_breaches: list[str] = field(default_factory=list)
|
||||
user_feedback: str = ''
|
||||
error_text: str = ''
|
||||
composition_reason: str = ''
|
||||
composition_policy: str = ''
|
||||
needs_memory: bool = False
|
||||
needs_setup_context: bool = False
|
||||
metadata: dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
return asdict(self)
|
||||
|
||||
def to_json(self) -> str:
|
||||
return json.dumps(self.to_dict(), ensure_ascii=False)
|
||||
|
||||
|
||||
|
||||
def utc_now_iso() -> str:
|
||||
return datetime.now(timezone.utc).isoformat()
|
||||
|
||||
|
||||
|
||||
def make_trajectory_id(message: str, ts: str, planned_tools: list[str], used_tools: list[str]) -> str:
|
||||
digest = hashlib.sha256()
|
||||
digest.update((message or '').encode('utf-8', errors='ignore'))
|
||||
digest.update((ts or '').encode('utf-8', errors='ignore'))
|
||||
digest.update('|'.join(planned_tools or []).encode('utf-8', errors='ignore'))
|
||||
digest.update('|'.join(used_tools or []).encode('utf-8', errors='ignore'))
|
||||
return digest.hexdigest()[:24]
|
||||
|
||||
|
||||
|
||||
def infer_memory_mode(task_type: str, analysis: dict[str, Any], used_tools: list[str]) -> str:
|
||||
if 'memory_profile' in used_tools:
|
||||
if task_type == 'memory':
|
||||
return 'profile'
|
||||
if analysis.get('needs_setup_context'):
|
||||
return 'setup'
|
||||
if analysis.get('needs_memory'):
|
||||
return 'preference'
|
||||
return 'profile'
|
||||
return ''
|
||||
|
||||
|
||||
|
||||
def infer_uncertainty(status: str, analysis: dict[str, Any], evidence_items: list[dict[str, Any]]) -> str:
|
||||
confidence = float(analysis.get('confidence', 0.0) or 0.0)
|
||||
grounded = sum(1 for item in evidence_items if item.get('grounded'))
|
||||
if status in {'tool_failed', 'no_result', 'tool_output_unverified'}:
|
||||
return 'high'
|
||||
if status == 'needs_clarification':
|
||||
return 'medium'
|
||||
if confidence >= 0.93 and grounded >= 1:
|
||||
return 'low'
|
||||
if confidence >= 0.8:
|
||||
return 'medium'
|
||||
return 'high'
|
||||
|
||||
|
||||
|
||||
def build_trajectory_record(
|
||||
*,
|
||||
message: str,
|
||||
analysis: dict[str, Any],
|
||||
final_text: str,
|
||||
evidence_items: list[dict[str, Any]],
|
||||
status: str,
|
||||
family: str,
|
||||
family_candidates: list[str],
|
||||
error_text: str = '',
|
||||
ts: str | None = None,
|
||||
latency_ms: int | None = None,
|
||||
verification_status: str = '',
|
||||
cap_breaches: list[str] | None = None,
|
||||
user_feedback: str = '',
|
||||
) -> TrajectoryRecord:
|
||||
ts = ts or utc_now_iso()
|
||||
planned_tools = list(analysis.get('tools') or [])
|
||||
used_tools = [str(item.get('tool') or '') for item in evidence_items if item.get('tool')]
|
||||
task_type = str(analysis.get('task_type') or '')
|
||||
return TrajectoryRecord(
|
||||
schema_version=SCHEMA_VERSION,
|
||||
trajectory_id=make_trajectory_id(message, ts, planned_tools, used_tools),
|
||||
ts=ts,
|
||||
user_query=message,
|
||||
normalized_task=f"{analysis.get('role','')}:{task_type}",
|
||||
role=str(analysis.get('role') or ''),
|
||||
task_type=task_type,
|
||||
family=family,
|
||||
family_candidates=list(family_candidates or []),
|
||||
chosen_plan=str(analysis.get('composition_reason') or 'single_tool'),
|
||||
chosen_tools=planned_tools,
|
||||
used_tools=used_tools,
|
||||
memory_mode=infer_memory_mode(task_type, analysis, used_tools),
|
||||
uncertainty=infer_uncertainty(status, analysis, evidence_items),
|
||||
grounded_count=sum(1 for item in evidence_items if item.get('grounded')),
|
||||
evidence_count=len(evidence_items),
|
||||
answer_len=len((final_text or '').strip()),
|
||||
latency_ms=latency_ms,
|
||||
verification_status=verification_status or ('grounded' if any(item.get('grounded') for item in evidence_items) else 'unverified'),
|
||||
outcome_status=status,
|
||||
cap_breaches=list(cap_breaches or []),
|
||||
user_feedback=user_feedback,
|
||||
error_text=(error_text or '')[:300],
|
||||
composition_reason=str(analysis.get('composition_reason') or ''),
|
||||
composition_policy=str(analysis.get('composition_policy') or ''),
|
||||
needs_memory=bool(analysis.get('needs_memory')),
|
||||
needs_setup_context=bool(analysis.get('needs_setup_context')),
|
||||
metadata={
|
||||
'force_sequential': bool(analysis.get('force_sequential')),
|
||||
},
|
||||
)
|
||||
37
syncpatch/uncertainty_model.py
Normal file
37
syncpatch/uncertainty_model.py
Normal file
|
|
@ -0,0 +1,37 @@
|
|||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
|
||||
|
||||
def estimate_uncertainty(message: str, analysis: dict[str, Any], family_candidates: list[dict[str, Any]] | None = None) -> dict[str, Any]:
|
||||
candidates = [str((item or {}).get('family') or '') for item in (family_candidates or []) if (item or {}).get('family')]
|
||||
confidence = float(analysis.get('confidence', 0.0) or 0.0)
|
||||
score = 0.0
|
||||
reasons: list[str] = []
|
||||
if len(set(candidates)) >= 2:
|
||||
score += 0.35
|
||||
reasons.append('multiple_family_candidates')
|
||||
if confidence < 0.75:
|
||||
score += 0.4
|
||||
reasons.append('low_confidence')
|
||||
elif confidence < 0.9:
|
||||
score += 0.2
|
||||
reasons.append('medium_confidence')
|
||||
if analysis.get('needs_memory') and analysis.get('needs_setup_context'):
|
||||
score += 0.15
|
||||
reasons.append('mixed_memory_and_setup')
|
||||
if 'http' in (message or '').lower() and analysis.get('task_type') not in {'summarize', 'research'}:
|
||||
score += 0.1
|
||||
reasons.append('url_in_non_research_query')
|
||||
if analysis.get('composition_reason'):
|
||||
score += 0.1
|
||||
reasons.append('composed_path')
|
||||
|
||||
if score >= 0.65:
|
||||
level = 'high'
|
||||
elif score >= 0.3:
|
||||
level = 'medium'
|
||||
else:
|
||||
level = 'low'
|
||||
return {'level': level, 'score': round(score, 3), 'reasons': reasons}
|
||||
Loading…
Add table
Add a link
Reference in a new issue