diff --git a/agents/shared.py b/agents/shared.py index 180dead..2c16cf6 100644 --- a/agents/shared.py +++ b/agents/shared.py @@ -3,9 +3,10 @@ import json import os import sys +import time from datetime import datetime from zoneinfo import ZoneInfo -from urllib import request +from urllib import request, error as urlerror MT = ZoneInfo("America/Denver") @@ -21,26 +22,54 @@ MONTH_NAMES = [ "July", "August", "September", "October", "November", "December", ] +# Retry config +DEFAULT_RETRIES = 3 +DEFAULT_BACKOFF = 2 # seconds, doubles each retry +RETRIABLE_CODES = {408, 429, 500, 502, 503, 504} -def api_request(url, data=None, headers=None, method="GET"): - """Simple HTTP helper using urllib.""" + +def api_request(url, data=None, headers=None, method="GET", retries=DEFAULT_RETRIES, backoff=DEFAULT_BACKOFF): + """HTTP helper with automatic retry on transient failures.""" if data is not None: data = json.dumps(data).encode("utf-8") - req = request.Request(url, data=data, headers=headers or {}, method=method) - if data: - req.add_header("Content-Type", "application/json") - with request.urlopen(req, timeout=30) as resp: - return json.loads(resp.read().decode()) + + last_error = None + for attempt in range(retries + 1): + try: + req = request.Request(url, data=data, headers=headers or {}, method=method) + if data: + req.add_header("Content-Type", "application/json") + with request.urlopen(req, timeout=30) as resp: + return json.loads(resp.read().decode()) + except urlerror.HTTPError as e: + last_error = e + if e.code in RETRIABLE_CODES and attempt < retries: + wait = backoff * (2 ** attempt) + print(f" Retry {attempt + 1}/{retries} after {e.code} from {url} (waiting {wait}s)", file=sys.stderr) + time.sleep(wait) + continue + raise + except (urlerror.URLError, TimeoutError, ConnectionError, OSError) as e: + last_error = e + if attempt < retries: + wait = backoff * (2 ** attempt) + print(f" Retry {attempt + 1}/{retries} after {type(e).__name__} from {url} (waiting {wait}s)", file=sys.stderr) + time.sleep(wait) + continue + raise + + raise last_error def log_run(agent_id, status, output="", err="", metadata=None, instance_id=None): - """Log a run to the dashboard API. Uses instance_id if available (v2), falls back to agent_id.""" + """Log a run to the dashboard API.""" try: if instance_id: api_request( f"{DASHBOARD_API}/api/instances/{instance_id}/runs", data={"status": status, "output": output, "error": err, "metadata": metadata or {}}, method="POST", + retries=1, # Don't retry logging too aggressively ) else: print(f"Warning: no instance_id, run not logged for {agent_id}", file=sys.stderr) diff --git a/dashboard/app.py b/dashboard/app.py index c8bf038..78a7c8c 100644 --- a/dashboard/app.py +++ b/dashboard/app.py @@ -448,6 +448,39 @@ def delete_instance(instance_id: int, user: dict = Depends(require_auth), db: Se return {"status": "deleted"} +@app.post("/api/instances/{instance_id}/trigger") +def trigger_instance(instance_id: int, user: dict = Depends(require_auth), db: Session = Depends(get_db)): + """Trigger a manual run of an agent instance. Runs async via subprocess.""" + inst = db.query(AgentInstance).filter( + AgentInstance.id == instance_id, AgentInstance.user_id == user["user_id"] + ).first() + if not inst: + raise HTTPException(status_code=404) + + # Determine which script to run based on catalog type and user + catalog_id = inst.catalog_id + u = db.query(User).filter(User.id == user["user_id"]).first() + + if catalog_id == "daily-briefing": + # Find the user's briefing wrapper script or use the generic engine + import subprocess + env = {**dict(os.environ), f"{u.username.upper().replace('.','_')}_INSTANCE_ID": str(instance_id)} + # Check for user-specific script + script_map = { + "eric": "eric_briefing.py", + "angela": "angela_briefing.py", + } + script = script_map.get(u.username, None) + if script: + cmd = f"cd /opt/agent-dashboard/agents && {u.username.upper().replace('.','_')}_INSTANCE_ID={instance_id} python3 {script}" + else: + cmd = f"cd /opt/agent-dashboard/agents && python3 -c \"from daily_briefing import run; run({{'person': '{u.display_name}', 'agent_id': '{catalog_id}', 'instance_id': {instance_id}, 'wiki_parent_doc_id': '', 'location': {{}}}})\"" + subprocess.Popen(cmd, shell=True, env=env) + return {"status": "triggered", "message": f"Running {catalog_id} for {u.display_name}"} + + return {"status": "error", "message": f"Manual trigger not supported for {catalog_id} yet"} + + # --- Internal endpoints (no auth, for agent scripts) --- @app.get("/api/instances/{instance_id}/config") diff --git a/dashboard/static/index.html b/dashboard/static/index.html index 2fcdbb6..62a2a0e 100644 --- a/dashboard/static/index.html +++ b/dashboard/static/index.html @@ -206,6 +206,7 @@ function buildConfigForm(inst){ html+=`
+
`; @@ -286,6 +287,19 @@ async function saveInstanceConfig(id){ else{msg.textContent='Error';msg.style.color='var(--red)'} } +async function triggerRun(id){ + const msg=document.getElementById('save-msg'); + msg.textContent='Running...';msg.style.color='var(--blue)'; + const res=await fetch(API+'/api/instances/'+id+'/trigger',{method:'POST'}); + if(res.ok){ + const data=await res.json(); + msg.textContent=data.message||'Triggered';msg.style.color='var(--green)'; + setTimeout(()=>{msg.textContent='';refresh()},5000); + } else { + msg.textContent='Failed to trigger';msg.style.color='var(--red)'; + } +} + async function deleteInstance(id){ if(!confirm('Delete this agent instance and all its runs?'))return; await fetch(API+'/api/instances/'+id,{method:'DELETE'});