Make sacred gate a hard-bounded wrapper
This commit is contained in:
parent
b1320a65f0
commit
93f21c5588
2 changed files with 44 additions and 11 deletions
|
|
@ -2,5 +2,10 @@
|
||||||
"min_route_accuracy_percent": 95.0,
|
"min_route_accuracy_percent": 95.0,
|
||||||
"min_memory_signal_accuracy_percent": 90.0,
|
"min_memory_signal_accuracy_percent": 90.0,
|
||||||
"max_timeouts": 0,
|
"max_timeouts": 0,
|
||||||
"require_pass": true
|
"require_pass": true,
|
||||||
|
"runner_timeout_seconds": 90,
|
||||||
|
"runner_start": 0,
|
||||||
|
"runner_limit": 5,
|
||||||
|
"runner_per_case_timeout": 8,
|
||||||
|
"runner_progress_every": 5
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,10 @@
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import json
|
import json
|
||||||
|
import os
|
||||||
|
import signal
|
||||||
import subprocess
|
import subprocess
|
||||||
|
from datetime import datetime, timezone
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
ROOT = Path('/home/openclaw/.openclaw/workspace')
|
ROOT = Path('/home/openclaw/.openclaw/workspace')
|
||||||
|
|
@ -20,40 +23,64 @@ def load_config() -> dict:
|
||||||
'min_memory_signal_accuracy_percent': 90.0,
|
'min_memory_signal_accuracy_percent': 90.0,
|
||||||
'max_timeouts': 0,
|
'max_timeouts': 0,
|
||||||
'require_pass': True,
|
'require_pass': True,
|
||||||
'runner_timeout_seconds': 180,
|
'runner_timeout_seconds': 90,
|
||||||
'runner_start': 0,
|
'runner_start': 0,
|
||||||
'runner_limit': 14,
|
'runner_limit': 5,
|
||||||
|
'runner_per_case_timeout': 8,
|
||||||
|
'runner_progress_every': 5,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def run_regression(cfg: dict) -> tuple[int, str, str]:
|
def gate_output_path() -> Path:
|
||||||
cmd = ['python3', str(RUNNER)]
|
ts = datetime.now(timezone.utc).strftime('%Y%m%dT%H%M%SZ')
|
||||||
|
return ROOT / 'evals' / 'results' / f'sacred_gate_{ts}.json'
|
||||||
|
|
||||||
|
|
||||||
|
def run_regression(cfg: dict) -> tuple[int, str, str, Path]:
|
||||||
|
output = gate_output_path()
|
||||||
|
cmd = ['python3', str(RUNNER), '--output', str(output)]
|
||||||
start = cfg.get('runner_start', None)
|
start = cfg.get('runner_start', None)
|
||||||
limit = cfg.get('runner_limit', None)
|
limit = cfg.get('runner_limit', None)
|
||||||
|
per_case = cfg.get('runner_per_case_timeout', None)
|
||||||
|
progress = cfg.get('runner_progress_every', None)
|
||||||
if start is not None:
|
if start is not None:
|
||||||
cmd.extend(['--start', str(start)])
|
cmd.extend(['--start', str(start)])
|
||||||
if limit is not None:
|
if limit is not None:
|
||||||
cmd.extend(['--limit', str(limit)])
|
cmd.extend(['--limit', str(limit)])
|
||||||
proc = subprocess.run(
|
if per_case is not None:
|
||||||
|
cmd.extend(['--per-case-timeout', str(per_case)])
|
||||||
|
if progress is not None:
|
||||||
|
cmd.extend(['--progress-every', str(progress)])
|
||||||
|
proc = subprocess.Popen(
|
||||||
cmd,
|
cmd,
|
||||||
capture_output=True,
|
stdout=subprocess.PIPE,
|
||||||
|
stderr=subprocess.PIPE,
|
||||||
text=True,
|
text=True,
|
||||||
timeout=int(cfg.get('runner_timeout_seconds', 180) or 180),
|
start_new_session=True,
|
||||||
)
|
)
|
||||||
return proc.returncode, proc.stdout, proc.stderr
|
try:
|
||||||
|
stdout, stderr = proc.communicate(timeout=int(cfg.get('runner_timeout_seconds', 90) or 90))
|
||||||
|
except subprocess.TimeoutExpired:
|
||||||
|
try:
|
||||||
|
os.killpg(proc.pid, signal.SIGTERM)
|
||||||
|
except Exception:
|
||||||
|
proc.kill()
|
||||||
|
raise
|
||||||
|
return proc.returncode, stdout, stderr, output
|
||||||
|
|
||||||
|
|
||||||
def main() -> int:
|
def main() -> int:
|
||||||
cfg = load_config()
|
cfg = load_config()
|
||||||
try:
|
try:
|
||||||
rc, stdout, stderr = run_regression(cfg)
|
rc, stdout, stderr, output_path = run_regression(cfg)
|
||||||
except subprocess.TimeoutExpired:
|
except subprocess.TimeoutExpired:
|
||||||
print(json.dumps({'ok': False, 'decision': 'hold', 'reason': 'runner_timeout'}, ensure_ascii=False))
|
print(json.dumps({'ok': False, 'decision': 'hold', 'reason': 'runner_timeout'}, ensure_ascii=False))
|
||||||
return 1
|
return 1
|
||||||
if rc != 0:
|
if rc != 0:
|
||||||
print(json.dumps({'ok': False, 'decision': 'hold', 'reason': 'runner_failed', 'runner_rc': rc, 'stderr': (stderr or '')[:300]}, ensure_ascii=False))
|
print(json.dumps({'ok': False, 'decision': 'hold', 'reason': 'runner_failed', 'runner_rc': rc, 'stderr': (stderr or '')[:300]}, ensure_ascii=False))
|
||||||
return 1
|
return 1
|
||||||
data = json.loads(RESULT.read_text(encoding='utf-8'))
|
result_path = output_path if output_path.exists() else RESULT
|
||||||
|
data = json.loads(result_path.read_text(encoding='utf-8'))
|
||||||
route = float(data.get('route_accuracy_percent', 0.0) or 0.0)
|
route = float(data.get('route_accuracy_percent', 0.0) or 0.0)
|
||||||
memory = float(data.get('memory_signal_accuracy_percent', 0.0) or 0.0)
|
memory = float(data.get('memory_signal_accuracy_percent', 0.0) or 0.0)
|
||||||
timeouts = int(data.get('timeouts', 0) or 0)
|
timeouts = int(data.get('timeouts', 0) or 0)
|
||||||
|
|
@ -80,6 +107,7 @@ def main() -> int:
|
||||||
'memory_signal_accuracy_percent': memory,
|
'memory_signal_accuracy_percent': memory,
|
||||||
'timeouts': timeouts,
|
'timeouts': timeouts,
|
||||||
'pass': passed,
|
'pass': passed,
|
||||||
|
'result_path': str(result_path),
|
||||||
}, ensure_ascii=False))
|
}, ensure_ascii=False))
|
||||||
return 0 if ok else 1
|
return 0 if ok else 1
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue