These Python examples are real runnable files. Edit the source file first; this page is rebuilt from the checked-in example and its metadata header.
Python Incident Log Forensics (RLM)
Infers service architecture and root-cause findings from a huge CloudWatch export that never enters the prompt – held in contextFields and worked through the runtime under a lean contextPolicy.
- Provider:
google-gemini - Env:
GOOGLE_APIKEY - Level:
advanced - Run:
npm run example -- python src/examples/python/long-agents/incident-log-forensics.py - Source: src/examples/python/long-agents/incident-log-forensics.py
import json
import os
from datetime import datetime, timedelta, timezone
from axllm import GoogleGeminiClient, agent
from axllm.runtime_quickjs import AxQuickJsCodeRuntime
api_key = os.getenv("GOOGLE_APIKEY")
if not api_key:
raise SystemExit("Set GOOGLE_APIKEY to run this example.")
client = GoogleGeminiClient(api_key=api_key, model="gemini-3.5-flash")
# ---------------------------------------------------------------------------
# Synthetic CloudWatch-style export -- generated large on purpose. Dumping these
# raw events into a prompt would blow the context window. The agent keeps them
# in its runtime (contextFields) and only the *evidence it extracts* ever
# reaches the model. Deterministic so the example is reproducible.
# ---------------------------------------------------------------------------
def build_log_dump():
start = datetime(2026, 3, 2, 13, 0, 0, tzinfo=timezone.utc)
events = []
def push(i, event):
event = dict(event)
event["timestamp"] = (start + timedelta(seconds=i * 2)).isoformat().replace("+00:00", "Z")
event["requestId"] = f"req-{100000 + i}"
events.append(event)
for i in range(1600):
# Routine, healthy traffic across the fleet.
push(i, {"level": "INFO", "service": "gateway", "statusCode": 200, "latencyMs": 40 + (i % 30), "message": "route ok GET /checkout"})
push(i, {"level": "INFO", "service": "search-api", "statusCode": 200, "latencyMs": 70 + (i % 50), "message": "query ok q=shoes"})
# Window A: payments-gw upstream timeouts spill into checkout-api 502s for
# enterprise tenants, with retry storms + pool exhaustion.
if 300 <= i < 520:
push(i, {"level": "ERROR", "service": "payments-gw", "statusCode": 504, "latencyMs": 10000, "tenantTier": "enterprise", "message": "upstream timeout calling acquirer (10s)"})
push(i, {"level": "ERROR", "service": "checkout-api", "statusCode": 502, "tenantTier": "enterprise", "message": "bad gateway from svc-payments-gw"})
if i % 3 == 0:
push(i, {"level": "WARN", "service": "payments-gw", "message": "connection pool exhausted (max=64) waiting=200+"})
push(i, {"level": "WARN", "service": "checkout-api", "tenantTier": "enterprise", "message": 'user-visible: "Payment could not be processed"'})
# Window B: the nightly catalog-cron pins CPU and search-api returns 429s.
if 1000 <= i < 1120:
push(i, {"level": "WARN", "service": "catalog-cron", "latencyMs": 0, "message": "rebuild step pinning CPU at 95% on shared node"})
push(i, {"level": "ERROR", "service": "search-api", "statusCode": 429, "message": "rate limited: downstream catalog unavailable"})
return events
logs = build_log_dump()
print(f"Generated {len(logs)} log events (kept out of the prompt).")
log_rlm = agent(
'task:string, logs:json "Raw CloudWatch export; keep this out of the prompt" -> architecture:string[] "Services and how they call each other", findings:json[] "Each: issue, count, window, evidence, impact", overallHealth:string, nextActions:string[]',
{
# The export stays in the runtime; only extracted evidence reaches the model.
"contextFields": ["logs"],
"contextPolicy": {"preset": "lean", "budget": "balanced"},
"maxRuntimeChars": 12000,
"runtime": {"language": "JavaScript"},
},
)
report = log_rlm.forward(
client,
{
"logs": logs,
"task": "Infer the service architecture from the logs alone. Then find repeated errors, throttles, retries, and bad user states -- with the affected time window, an occurrence count, and concrete log evidence for each.",
},
{"runtime": AxQuickJsCodeRuntime(), "max_actor_steps": 40},
)
print("\n=== Report ===")
print(json.dumps(report, indent=2, sort_keys=True))
print("\n=== Usage ===")
print(json.dumps(log_rlm.get_usage(), indent=2, sort_keys=True))Python Codebase Q&A with a Peek Context Map
Answers several dependency questions over one large module index by building and reusing an evolving context map (the “peek” orientation cache), so later questions skip re-scanning the corpus.
- Provider:
google-gemini - Env:
GOOGLE_APIKEY - Level:
advanced - Run:
npm run example -- python src/examples/python/long-agents/codebase-peek-map.py - Source: src/examples/python/long-agents/codebase-peek-map.py
import json
import os
from axllm import GoogleGeminiClient, agent
from axllm.runtime_quickjs import AxQuickJsCodeRuntime
api_key = os.getenv("GOOGLE_APIKEY")
if not api_key:
raise SystemExit("Set GOOGLE_APIKEY to run this example.")
client = GoogleGeminiClient(api_key=api_key, model="gemini-3.5-flash")
# ---------------------------------------------------------------------------
# A large module-dependency index for a monorepo. Each block is a record the
# agent must *search* to answer -- the answers cannot be guessed, only computed
# by filtering the index. Generated large so it would not fit comfortably in a
# prompt; it lives in contextFields and is queried from the runtime.
# ---------------------------------------------------------------------------
def build_module_index():
core = [
{"path": "packages/api/middleware/auth.ts", "imports": ["packages/shared"], "writes": "-"},
{"path": "packages/api/middleware/rateLimit.ts", "imports": ["packages/db"], "writes": "-"},
{"path": "packages/api/routes/checkout.ts", "imports": ["packages/api/middleware/auth.ts", "packages/services/orders/createOrder.ts", "packages/services/payments/charge.ts"], "writes": "-"},
{"path": "packages/api/routes/search.ts", "imports": ["packages/api/middleware/auth.ts", "packages/services/catalog/searchCatalog.ts"], "writes": "-"},
{"path": "packages/services/orders/createOrder.ts", "imports": ["packages/db", "packages/clients/bus"], "writes": "orders"},
{"path": "packages/services/orders/orderRepo.ts", "imports": ["packages/db"], "writes": "orders"},
{"path": "packages/services/payments/charge.ts", "imports": ["packages/clients/acquirer", "packages/db"], "writes": "payments"},
{"path": "packages/services/payments/refund.ts", "imports": ["packages/clients/acquirer", "packages/db"], "writes": "refunds"},
{"path": "packages/services/catalog/searchCatalog.ts", "imports": ["packages/db"], "writes": "-"},
{"path": "packages/clients/acquirer/index.ts", "imports": ["packages/shared"], "writes": "-"},
{"path": "packages/clients/bus/index.ts", "imports": ["packages/shared"], "writes": "-"},
]
# Filler modules so the index is genuinely large; some also depend on the acquirer.
filler = []
for i in range(110):
filler.append({
"path": f"packages/services/feature{i}/handler.ts",
"imports": ["packages/clients/acquirer" if i % 4 == 0 else "packages/db", "packages/shared"],
"writes": "audit" if i % 6 == 0 else "-",
})
return core + filler
modules = build_module_index()
codebase_index = "\n\n".join(
f"PATH: {m['path']}\nIMPORTS: {', '.join(m['imports'])}\nWRITES: {m['writes']}" for m in modules
)
print(f"Module index: {len(modules)} records (kept out of the prompt).")
analyst = agent(
'context:string, question:string -> answer:string, paths:string[] "Exact PATH values from the index that answer the question"',
{
"contextFields": ["context"],
"contextPolicy": {"preset": "adaptive", "budget": "balanced"},
"contextOptions": {
"description": "The context is a module index of \"PATH / IMPORTS / WRITES\" records. Answer by filtering those records in code -- never guess. Return exact PATH values verbatim.",
},
# The Peek context map: small, persistent orientation reused across queries.
"contextMap": {"maxChars": 1800, "infiniteEvolve": False, "evolveSteps": 1},
"runtime": {"language": "JavaScript"},
},
)
questions = [
"Which modules import 'packages/clients/acquirer'? Give the exact PATH values.",
"Which modules write to the 'orders' table?",
"What are the direct IMPORTS of packages/api/routes/checkout.ts?",
]
for question in questions:
result = analyst.forward(
client,
{"context": codebase_index, "question": question},
{"runtime": AxQuickJsCodeRuntime(), "max_actor_steps": 24},
)
print("\nQ:", question)
print("A:", result.get("answer"))
print("Paths:", ", ".join(result.get("paths") or []))
print("\nThe context map evolved on the first query and was reused for the rest.")Python Data Analyst (Large Context + Tools)
Combines a large data dictionary held in contextFields with typed warehouse tools, so the agent answers business questions over a big dataset it never has to inline.
- Provider:
google-gemini - Env:
GOOGLE_APIKEY - Level:
advanced - Run:
npm run example -- python src/examples/python/long-agents/data-analyst-with-tools.py - Source: src/examples/python/long-agents/data-analyst-with-tools.py
import json
import os
from axllm import GoogleGeminiClient, agent
from axllm.runtime_quickjs import AxQuickJsCodeRuntime
api_key = os.getenv("GOOGLE_APIKEY")
if not api_key:
raise SystemExit("Set GOOGLE_APIKEY to run this example.")
client = GoogleGeminiClient(api_key=api_key, model="gemini-3.5-flash")
# ---------------------------------------------------------------------------
# The "warehouse": a few hundred rows that live in the host process and are
# reachable only through tools. The model never sees the rows -- it queries
# them. Deterministic so the example is reproducible.
# ---------------------------------------------------------------------------
MONTHS = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]
def build_warehouse():
regions = ["North", "South", "East", "West", "Central", "NW", "NE", "SE"]
products = ["Widget-A", "Widget-B", "Gadget-X", "Gadget-Y"]
rows = []
seed = 7
def rand():
nonlocal seed
seed = (seed * 1103515245 + 12345) & 0x7FFFFFFF
return seed / 0x7FFFFFFF
for region in regions:
for product in products:
trend = 90 if (product == "Gadget-X" and region == "East") else 25 # a planted winner
for m in range(len(MONTHS)):
units = round(400 + rand() * 1200 + m * trend)
price = 60 if product.startswith("Gadget") else 38
return_rate = round(0.01 + rand() * 0.05 + (0.03 if product == "Widget-B" else 0), 3)
rows.append({
"region": region, "product": product, "monthIndex": m, "month": MONTHS[m],
"units": units, "revenue": units * price, "returnRate": return_rate,
})
return rows
warehouse = build_warehouse()
# The schema/data dictionary is large-ish and goes into contextFields so the
# agent orients on column meaning + business rules without the doc entering the prompt.
schema = """
TABLE sales (one row per region x product x month)
COLUMNS
region text one of: North, South, East, West, Central, NW, NE, SE
product text one of: Widget-A, Widget-B, Gadget-X, Gadget-Y
month text Jan..Dec (calendar order; monthIndex 0..11)
units int units sold that month
revenue int integer dollars (units * unit price; Gadgets cost more)
returnRate float fraction of units returned, 0..1
BUSINESS RULES
- "Growth" = change in monthly revenue from Jan to Dec for a region+product.
- A return rate above 0.05 (5%) is flagged for quality review.
- Compare like-for-like: always group by region AND product, not either alone.
TOOLS AVAILABLE (call them, never invent figures)
query filter + aggregate a slice -> {matched, totalUnits, totalRevenue, avgReturnRate}
top rank a metric ("revenue"|"units") grouped by "product"|"region" -> [{key, value}]
trend monthly revenue series (Jan..Dec) for one region + product
""".strip()
# --- Host tool handlers over the warehouse (the model never sees the rows) ---
def query_tool(p):
region, product, month = p.get("region"), p.get("product"), p.get("month")
rows = [
r for r in warehouse
if (not region or r["region"] == region)
and (not product or r["product"] == product)
and (not month or r["month"] == month)
]
total_units = sum(r["units"] for r in rows)
total_revenue = sum(r["revenue"] for r in rows)
avg_return = round(sum(r["returnRate"] for r in rows) / len(rows), 4) if rows else 0
return {"matched": len(rows), "totalUnits": total_units, "totalRevenue": total_revenue, "avgReturnRate": avg_return}
def top_tool(p):
metric, group_by, limit = p.get("metric", "revenue"), p.get("groupBy", "product"), p.get("limit", 5)
totals = {}
for r in warehouse:
key = r["region"] if group_by == "region" else r["product"]
totals[key] = totals.get(key, 0) + (r["units"] if metric == "units" else r["revenue"])
ranked = sorted(({"key": k, "value": v} for k, v in totals.items()), key=lambda x: -x["value"])
return ranked[:limit]
def trend_tool(p):
region, product = p.get("region"), p.get("product")
series = [0] * 12
for r in warehouse:
if r["region"] == region and r["product"] == product:
series[r["monthIndex"]] = r["revenue"]
return series
runtime = AxQuickJsCodeRuntime()
runtime.register_callable("query", query_tool)
runtime.register_callable("top", top_tool)
runtime.register_callable("trend", trend_tool)
analyst = agent(
'schema:string, question:string -> answer:string, evidence:string[] "Concrete figures the answer is based on"',
{
# Big data dictionary stays out of the prompt.
"contextFields": ["schema"],
# Tool specs advertised to the model; handlers are registered on the runtime above.
"functions": [
{
"name": "query",
"description": "Filter the sales table and return aggregates for the matching rows.",
"parameters": {
"type": "object",
"properties": {
"region": {"type": "string"},
"product": {"type": "string"},
"month": {"type": "string"},
},
},
},
{
"name": "top",
"description": "Rank a metric (revenue|units) grouped by product|region, highest first.",
"parameters": {
"type": "object",
"properties": {
"metric": {"type": "string"},
"groupBy": {"type": "string"},
"limit": {"type": "number"},
},
"required": ["metric", "groupBy"],
},
},
{
"name": "trend",
"description": "Monthly revenue series (Jan..Dec) for one region and product.",
"parameters": {
"type": "object",
"properties": {"region": {"type": "string"}, "product": {"type": "string"}},
"required": ["region", "product"],
},
},
],
"contextPolicy": {"preset": "lean", "budget": "balanced"},
"runtime": {"language": "JavaScript"},
},
)
result = analyst.forward(
client,
{
"schema": schema,
"question": "Which region+product had the strongest Jan->Dec revenue growth, and which products have an average return rate above the 5% review threshold?",
},
{"runtime": runtime, "max_actor_steps": 40},
)
print(json.dumps(result, indent=2, sort_keys=True))Python Self-Improving Lab Agent
A many-tool agent that runs experiments, grades them against a rubric with an independent verifier, and distills verified rules into memory – iterating until the rubric passes.
- Provider:
openai - Env:
OPENAI_API_KEY,OPENAI_APIKEY - Level:
advanced - Run:
npm run example -- python src/examples/python/long-agents/self-improving-lab.py - Source: src/examples/python/long-agents/self-improving-lab.py
import json
import os
import re
from axllm import OpenAICompatibleClient, agent, ax
from axllm.runtime_quickjs import AxQuickJsCodeRuntime
api_key = os.getenv("OPENAI_API_KEY") or os.getenv("OPENAI_APIKEY")
if not api_key:
raise SystemExit("Set OPENAI_API_KEY or OPENAI_APIKEY to run this example.")
client = OpenAICompatibleClient(
api_key=api_key,
model=os.getenv("AX_OPENAI_MODEL", "gpt-5.4-mini"),
model_config={"temperature": 0},
)
# ---------------------------------------------------------------------------
# The "lab": a deterministic black-box experiment. It scores an ETL config plan
# against a hidden ideal and returns, for any failing check, the exact fix --
# so the agent can converge by following the feedback, not by being told.
# ---------------------------------------------------------------------------
CHECKS = ["no-nulls", "no-duplicates", "numeric-types", "trimmed-strings", "outliers-handled"]
REMEDIES = {
"no-nulls": "set nullPolicy=impute (or nullPolicy=drop)",
"no-duplicates": "set dedup=on",
"numeric-types": "set coerceTypes=on",
"trimmed-strings": "set trim=on",
"outliers-handled": "set outlier=clip (or outlier=winsorize)",
}
def run_in_sandbox(plan):
flags = dict(re.findall(r"([a-z]+)\s*=\s*([a-z0-9]+)", str(plan).lower()))
ok = {
"no-nulls": flags.get("nullpolicy") in ("impute", "drop"),
"no-duplicates": flags.get("dedup") == "on",
"numeric-types": flags.get("coercetypes") == "on",
"trimmed-strings": flags.get("trim") == "on",
"outliers-handled": flags.get("outlier") in ("clip", "winsorize"),
}
passed = [c for c in CHECKS if ok[c]]
failed = [{"check": c, "fix": REMEDIES[c]} for c in CHECKS if not ok[c]]
return {
"score": round(len(passed) / len(CHECKS), 2),
"solved": len(passed) == len(CHECKS),
"passed": passed,
"failed": failed,
"logs": f"{len(passed)}/{len(CHECKS)} checks passed",
}
# An independent verifier -- a separate ax() program, not the agent grading itself.
verifier = ax("rubric:string, evidence:json -> passed:boolean, feedback:string, missing:string[]")
verifier.set_instruction(
"You are an independent rubric grader, not a self-critique. Pass only when the evidence clearly satisfies every part of the rubric."
)
# In-memory rule store. Verified, reusable rules go here -- not raw failure notes.
memory_store = {}
runtime = AxQuickJsCodeRuntime()
runtime.register_callable("runExperiment", lambda p: run_in_sandbox(p.get("plan", "")))
runtime.register_callable("listChecks", lambda p: CHECKS)
runtime.register_callable("grade", lambda p: verifier.forward(client, {"rubric": p.get("rubric", ""), "evidence": p.get("evidence", [])}))
def recall_tool(p):
t = str(p.get("topic", "")).lower()
return [v for k, v in memory_store.items() if t in k or any(w in k for w in t.split())]
def remember_tool(p):
rule = str(p.get("rule", ""))
memory_store[rule.lower()[:48]] = f"{rule} :: {p.get('evidence', '')}"
return {"stored": True, "total": len(memory_store)}
runtime.register_callable("recall", recall_tool)
runtime.register_callable("remember", remember_tool)
def _spec(name, description, props, required=None):
return {
"name": name,
"description": description,
"parameters": {"type": "object", "properties": props, **({"required": required} if required else {})},
}
self_improving = agent(
'goal:string, rubric:string -> answer:string, experiments:string[] "Plans tried, in order", learnedRules:string[]',
{
"contextFields": [],
"functions": [
_spec("runExperiment", "Apply an ETL config plan; returns score, solved, passed[], failed[{check,fix}], logs. Pass an empty plan to discover the fixes.", {"plan": {"type": "string"}}, ["plan"]),
_spec("listChecks", "List the data-quality checks the experiment evaluates.", {}),
_spec("grade", "Independent rubric grader. Pass only when the evidence meets the rubric.", {"rubric": {"type": "string"}, "evidence": {"type": "array", "items": {"type": "string"}}}, ["rubric", "evidence"]),
_spec("recall", "Recall verified rules relevant to a topic.", {"topic": {"type": "string"}}, ["topic"]),
_spec("remember", "Store a verified, reusable rule (the rule, not raw notes).", {"rule": {"type": "string"}, "evidence": {"type": "string"}}, ["rule", "evidence"]),
],
"contextPolicy": {"preset": "adaptive", "budget": "balanced"},
"executorOptions": {
"description": "\n".join([
"Use the tools -- do not answer from your own knowledge.",
"1. recall('etl data quality') to reuse anything already learned.",
"2. runExperiment('') once to see every failing check and its fix.",
"3. Build a plan applying all the fixes, then runExperiment again. Repeat until solved is true.",
"4. grade the passing evidence against the rubric.",
"5. For each check you fixed, remember(rule, evidence).",
"6. Then return the answer, the plans you tried, and the learned rules.",
]),
},
"runtime": {"language": "JavaScript"},
},
)
result = self_improving.forward(
client,
{
"goal": "Find an ETL config plan that cleans the dirty dataset so every data-quality check passes.",
"rubric": "All five checks (no-nulls, no-duplicates, numeric-types, trimmed-strings, outliers-handled) must pass, i.e. score 1.0.",
},
{"runtime": runtime, "max_actor_steps": 18},
)
print(json.dumps(result, indent=2, sort_keys=True))
# Persist the agent's verified rules so a future run's recall reuses them.
for rule in result.get("learnedRules", []) or []:
memory_store[str(rule).lower()[:48]] = str(rule)
print(f"\nMemory now holds {len(memory_store)} rule(s) for next time.")Python Skills + Memory Ops Assistant
An on-call assistant that recalls past decisions from a memory store and loads the right runbook skill on demand, using the agent skills and memories subsystems.
- Provider:
openai - Env:
OPENAI_API_KEY,OPENAI_APIKEY - Level:
advanced - Run:
npm run example -- python src/examples/python/long-agents/skills-and-memory-assistant.py - Source: src/examples/python/long-agents/skills-and-memory-assistant.py
import json
import os
from axllm import OpenAICompatibleClient, agent
from axllm.runtime_quickjs import AxQuickJsCodeRuntime
api_key = os.getenv("OPENAI_API_KEY") or os.getenv("OPENAI_APIKEY")
if not api_key:
raise SystemExit("Set OPENAI_API_KEY or OPENAI_APIKEY to run this example.")
client = OpenAICompatibleClient(
api_key=api_key,
# gpt-5.4 (not -mini): the recall/discover loop needs reasoning to proactively
# pull memories + runbooks instead of stopping to ask for clarification.
model=os.getenv("AX_OPENAI_MODEL", "gpt-5.4"),
model_config={"temperature": 0},
)
# ---------------------------------------------------------------------------
# Memory store -- remembered decisions and postmortems. In production this is a
# vector DB / BM25 index; here a tiny KV with substring matching. The actor
# pulls relevant entries into scope via `await recall([...])`.
# ---------------------------------------------------------------------------
memory_store = {
"decision/db-failover": "Decision (2026-02): during a primary DB failover, freeze writes via the feature flag `writes.enabled=false` BEFORE promoting the replica. Promoting first caused split-brain in inc-118.",
"postmortem/inc-118": "inc-118 root cause: replica promoted while primary still accepted writes. Mitigation: write-freeze flag + 90s replication-lag gate.",
"decision/customer-comms": "Decision: for Sev-1s affecting enterprise tenants, post a status-page update within 15 minutes and notify named TAMs directly.",
}
def on_memories_search(searches, already_loaded):
skip = {m.get("id") for m in (already_loaded or [])}
matches = []
for query in searches or []:
q = str(query).lower()
for mid, content in memory_store.items():
if mid in skip:
continue
if q in mid.lower() or q in content.lower():
matches.append({"id": mid, "content": content})
return matches
# ---------------------------------------------------------------------------
# Skill store -- runbooks loaded into the executor prompt on demand via
# `await discover({ skills: [...] })`. Loaded skills persist across calls.
# ---------------------------------------------------------------------------
skill_store = [
{
"id": "runbook-db-failover",
"name": "DB failover runbook",
"content": "## DB failover\n1. Set `writes.enabled=false`.\n2. Wait for replication lag < 5s.\n3. Promote replica.\n4. Re-point app via service discovery.\n5. Re-enable writes. 6. File postmortem within 48h.",
},
{
"id": "runbook-status-comms",
"name": "Status communications runbook",
"content": "## Status comms\n- Sev-1: status-page update within 15m, every 30m thereafter.\n- Enterprise impact: notify named TAMs directly.\n- Keep updates factual; no ETAs you cannot keep.",
},
]
def on_skills_search(searches):
out = []
for query in searches or []:
q = str(query).lower()
out.extend(
s for s in skill_store
if q in s["id"].lower() or q in s["name"].lower() or q in s["content"].lower()
)
return out
assistant = agent(
'situation:string -> guidance:string "What to do, grounded in our decisions and runbooks", steps:string[]',
{
"contextFields": [],
# A base skill always loaded, independent of search.
"skills": [
{
"name": "house-style",
"content": "Be concise and operational. Prefer our remembered decisions over generic advice. Never invent flag names or steps -- cite the runbook.",
}
],
"onMemoriesSearch": on_memories_search,
"onSkillsSearch": on_skills_search,
"onLoadedMemories": lambda results: print("[memories loaded]", ", ".join(r.get("id", "") for r in results)),
"onLoadedSkills": lambda results: print("[skills loaded]", ", ".join(r.get("id") or r.get("name") for r in results)),
"executorOptions": {
"description": "\n".join([
"You do NOT know our internal flag names, incident history, or runbook steps from your own training.",
"The only source of truth is our memory (past decisions/postmortems) and our runbook skills.",
"1. recall the relevant past decisions and postmortems (e.g. the failover decision, inc-118).",
"2. discover the matching runbook skill and read its exact steps and flag names.",
"3. Answer with the precise ordered procedure, citing our exact flag names and runbook steps.",
"Generic best-practice advice is WRONG here. Do NOT answer from general knowledge and do NOT ask for clarification -- recall and discover first.",
]),
},
"runtime": {"language": "JavaScript"},
},
)
result = assistant.forward(
client,
{
"situation": (
"Our primary database is unhealthy and we're about to fail over -- the same class of "
"incident as inc-118, and enterprise checkout is affected. Per our remembered decisions "
"and runbooks: what is the exact ordered procedure, and which specific feature flag must "
"we set before promoting the replica?"
),
},
{"runtime": AxQuickJsCodeRuntime(), "max_actor_steps": 12},
)
print("\n=== Response ===")
print(json.dumps(result, indent=2, sort_keys=True))