Long-Horizon Agents

These Python examples are real runnable files. Edit the source file first; this page is rebuilt from the checked-in example and its metadata header.

Python Incident Log Forensics (RLM)

Infers service architecture and root-cause findings from a huge CloudWatch export that never enters the prompt – held in contextFields and worked through the runtime under a lean contextPolicy.

Provider: google-gemini
Env: GOOGLE_APIKEY
Level: advanced
Run: npm run example -- python src/examples/python/long-agents/incident-log-forensics.py
Source: src/examples/python/long-agents/incident-log-forensics.py

Python

import json
import os
from datetime import datetime, timedelta, timezone

from axllm import GoogleGeminiClient, agent
from axllm.runtime_quickjs import AxQuickJsCodeRuntime

api_key = os.getenv("GOOGLE_APIKEY")
if not api_key:
    raise SystemExit("Set GOOGLE_APIKEY to run this example.")

client = GoogleGeminiClient(api_key=api_key, model="gemini-3.5-flash")


# ---------------------------------------------------------------------------
# Synthetic CloudWatch-style export -- generated large on purpose. Dumping these
# raw events into a prompt would blow the context window. The agent keeps them
# in its runtime (contextFields) and only the *evidence it extracts* ever
# reaches the model. Deterministic so the example is reproducible.
# ---------------------------------------------------------------------------
def build_log_dump():
    start = datetime(2026, 3, 2, 13, 0, 0, tzinfo=timezone.utc)
    events = []

    def push(i, event):
        event = dict(event)
        event["timestamp"] = (start + timedelta(seconds=i * 2)).isoformat().replace("+00:00", "Z")
        event["requestId"] = f"req-{100000 + i}"
        events.append(event)

    for i in range(1600):
        # Routine, healthy traffic across the fleet.
        push(i, {"level": "INFO", "service": "gateway", "statusCode": 200, "latencyMs": 40 + (i % 30), "message": "route ok GET /checkout"})
        push(i, {"level": "INFO", "service": "search-api", "statusCode": 200, "latencyMs": 70 + (i % 50), "message": "query ok q=shoes"})

        # Window A: payments-gw upstream timeouts spill into checkout-api 502s for
        # enterprise tenants, with retry storms + pool exhaustion.
        if 300 <= i < 520:
            push(i, {"level": "ERROR", "service": "payments-gw", "statusCode": 504, "latencyMs": 10000, "tenantTier": "enterprise", "message": "upstream timeout calling acquirer (10s)"})
            push(i, {"level": "ERROR", "service": "checkout-api", "statusCode": 502, "tenantTier": "enterprise", "message": "bad gateway from svc-payments-gw"})
            if i % 3 == 0:
                push(i, {"level": "WARN", "service": "payments-gw", "message": "connection pool exhausted (max=64) waiting=200+"})
                push(i, {"level": "WARN", "service": "checkout-api", "tenantTier": "enterprise", "message": 'user-visible: "Payment could not be processed"'})

        # Window B: the nightly catalog-cron pins CPU and search-api returns 429s.
        if 1000 <= i < 1120:
            push(i, {"level": "WARN", "service": "catalog-cron", "latencyMs": 0, "message": "rebuild step pinning CPU at 95% on shared node"})
            push(i, {"level": "ERROR", "service": "search-api", "statusCode": 429, "message": "rate limited: downstream catalog unavailable"})

    return events


logs = build_log_dump()
print(f"Generated {len(logs)} log events (kept out of the prompt).")

log_rlm = agent(
    'task:string, logs:json "Raw CloudWatch export; keep this out of the prompt" -> architecture:string[] "Services and how they call each other", findings:json[] "Each: issue, count, window, evidence, impact", overallHealth:string, nextActions:string[]',
    {
        # The export stays in the runtime; only extracted evidence reaches the model.
        "contextFields": ["logs"],
        "contextPolicy": {"preset": "lean", "budget": "balanced"},
        "maxRuntimeChars": 12000,
        "runtime": {"language": "JavaScript"},
    },
)

report = log_rlm.forward(
    client,
    {
        "logs": logs,
        "task": "Infer the service architecture from the logs alone. Then find repeated errors, throttles, retries, and bad user states -- with the affected time window, an occurrence count, and concrete log evidence for each.",
    },
    {"runtime": AxQuickJsCodeRuntime(), "max_actor_steps": 40},
)

print("\n=== Report ===")
print(json.dumps(report, indent=2, sort_keys=True))
print("\n=== Usage ===")
print(json.dumps(log_rlm.get_usage(), indent=2, sort_keys=True))

Python Codebase Q&A with a Peek Context Map

Answers several dependency questions over one large module index by building and reusing an evolving context map (the “peek” orientation cache), so later questions skip re-scanning the corpus.

Provider: google-gemini
Env: GOOGLE_APIKEY
Level: advanced
Run: npm run example -- python src/examples/python/long-agents/codebase-peek-map.py
Source: src/examples/python/long-agents/codebase-peek-map.py

Python

import json
import os

from axllm import GoogleGeminiClient, agent
from axllm.runtime_quickjs import AxQuickJsCodeRuntime

api_key = os.getenv("GOOGLE_APIKEY")
if not api_key:
    raise SystemExit("Set GOOGLE_APIKEY to run this example.")

client = GoogleGeminiClient(api_key=api_key, model="gemini-3.5-flash")


# ---------------------------------------------------------------------------
# A large module-dependency index for a monorepo. Each block is a record the
# agent must *search* to answer -- the answers cannot be guessed, only computed
# by filtering the index. Generated large so it would not fit comfortably in a
# prompt; it lives in contextFields and is queried from the runtime.
# ---------------------------------------------------------------------------
def build_module_index():
    core = [
        {"path": "packages/api/middleware/auth.ts", "imports": ["packages/shared"], "writes": "-"},
        {"path": "packages/api/middleware/rateLimit.ts", "imports": ["packages/db"], "writes": "-"},
        {"path": "packages/api/routes/checkout.ts", "imports": ["packages/api/middleware/auth.ts", "packages/services/orders/createOrder.ts", "packages/services/payments/charge.ts"], "writes": "-"},
        {"path": "packages/api/routes/search.ts", "imports": ["packages/api/middleware/auth.ts", "packages/services/catalog/searchCatalog.ts"], "writes": "-"},
        {"path": "packages/services/orders/createOrder.ts", "imports": ["packages/db", "packages/clients/bus"], "writes": "orders"},
        {"path": "packages/services/orders/orderRepo.ts", "imports": ["packages/db"], "writes": "orders"},
        {"path": "packages/services/payments/charge.ts", "imports": ["packages/clients/acquirer", "packages/db"], "writes": "payments"},
        {"path": "packages/services/payments/refund.ts", "imports": ["packages/clients/acquirer", "packages/db"], "writes": "refunds"},
        {"path": "packages/services/catalog/searchCatalog.ts", "imports": ["packages/db"], "writes": "-"},
        {"path": "packages/clients/acquirer/index.ts", "imports": ["packages/shared"], "writes": "-"},
        {"path": "packages/clients/bus/index.ts", "imports": ["packages/shared"], "writes": "-"},
    ]
    # Filler modules so the index is genuinely large; some also depend on the acquirer.
    filler = []
    for i in range(110):
        filler.append({
            "path": f"packages/services/feature{i}/handler.ts",
            "imports": ["packages/clients/acquirer" if i % 4 == 0 else "packages/db", "packages/shared"],
            "writes": "audit" if i % 6 == 0 else "-",
        })
    return core + filler


modules = build_module_index()
codebase_index = "\n\n".join(
    f"PATH: {m['path']}\nIMPORTS: {', '.join(m['imports'])}\nWRITES: {m['writes']}" for m in modules
)
print(f"Module index: {len(modules)} records (kept out of the prompt).")

analyst = agent(
    'context:string, question:string -> answer:string, paths:string[] "Exact PATH values from the index that answer the question"',
    {
        "contextFields": ["context"],
        "contextPolicy": {"preset": "adaptive", "budget": "balanced"},
        "contextOptions": {
            "description": "The context is a module index of \"PATH / IMPORTS / WRITES\" records. Answer by filtering those records in code -- never guess. Return exact PATH values verbatim.",
        },
        # The Peek context map: small, persistent orientation reused across queries.
        "contextMap": {"maxChars": 1800, "infiniteEvolve": False, "evolveSteps": 1},
        "runtime": {"language": "JavaScript"},
    },
)

questions = [
    "Which modules import 'packages/clients/acquirer'? Give the exact PATH values.",
    "Which modules write to the 'orders' table?",
    "What are the direct IMPORTS of packages/api/routes/checkout.ts?",
]

for question in questions:
    result = analyst.forward(
        client,
        {"context": codebase_index, "question": question},
        {"runtime": AxQuickJsCodeRuntime(), "max_actor_steps": 24},
    )
    print("\nQ:", question)
    print("A:", result.get("answer"))
    print("Paths:", ", ".join(result.get("paths") or []))

print("\nThe context map evolved on the first query and was reused for the rest.")

Python Data Analyst (Large Context + Tools)

Combines a large data dictionary held in contextFields with typed warehouse tools, so the agent answers business questions over a big dataset it never has to inline.

Provider: google-gemini
Env: GOOGLE_APIKEY
Level: advanced
Run: npm run example -- python src/examples/python/long-agents/data-analyst-with-tools.py
Source: src/examples/python/long-agents/data-analyst-with-tools.py

Python

import json
import os

from axllm import GoogleGeminiClient, agent
from axllm.runtime_quickjs import AxQuickJsCodeRuntime

api_key = os.getenv("GOOGLE_APIKEY")
if not api_key:
    raise SystemExit("Set GOOGLE_APIKEY to run this example.")

client = GoogleGeminiClient(api_key=api_key, model="gemini-3.5-flash")

# ---------------------------------------------------------------------------
# The "warehouse": a few hundred rows that live in the host process and are
# reachable only through tools. The model never sees the rows -- it queries
# them. Deterministic so the example is reproducible.
# ---------------------------------------------------------------------------
MONTHS = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]


def build_warehouse():
    regions = ["North", "South", "East", "West", "Central", "NW", "NE", "SE"]
    products = ["Widget-A", "Widget-B", "Gadget-X", "Gadget-Y"]
    rows = []
    seed = 7

    def rand():
        nonlocal seed
        seed = (seed * 1103515245 + 12345) & 0x7FFFFFFF
        return seed / 0x7FFFFFFF

    for region in regions:
        for product in products:
            trend = 90 if (product == "Gadget-X" and region == "East") else 25  # a planted winner
            for m in range(len(MONTHS)):
                units = round(400 + rand() * 1200 + m * trend)
                price = 60 if product.startswith("Gadget") else 38
                return_rate = round(0.01 + rand() * 0.05 + (0.03 if product == "Widget-B" else 0), 3)
                rows.append({
                    "region": region, "product": product, "monthIndex": m, "month": MONTHS[m],
                    "units": units, "revenue": units * price, "returnRate": return_rate,
                })
    return rows


warehouse = build_warehouse()

# The schema/data dictionary is large-ish and goes into contextFields so the
# agent orients on column meaning + business rules without the doc entering the prompt.
schema = """
TABLE sales (one row per region x product x month)

COLUMNS
  region       text   one of: North, South, East, West, Central, NW, NE, SE
  product      text   one of: Widget-A, Widget-B, Gadget-X, Gadget-Y
  month        text   Jan..Dec (calendar order; monthIndex 0..11)
  units        int    units sold that month
  revenue      int    integer dollars (units * unit price; Gadgets cost more)
  returnRate   float  fraction of units returned, 0..1

BUSINESS RULES
  - "Growth" = change in monthly revenue from Jan to Dec for a region+product.
  - A return rate above 0.05 (5%) is flagged for quality review.
  - Compare like-for-like: always group by region AND product, not either alone.

TOOLS AVAILABLE (call them, never invent figures)
  query  filter + aggregate a slice -> {matched, totalUnits, totalRevenue, avgReturnRate}
  top    rank a metric ("revenue"|"units") grouped by "product"|"region" -> [{key, value}]
  trend  monthly revenue series (Jan..Dec) for one region + product
""".strip()


# --- Host tool handlers over the warehouse (the model never sees the rows) ---
def query_tool(p):
    region, product, month = p.get("region"), p.get("product"), p.get("month")
    rows = [
        r for r in warehouse
        if (not region or r["region"] == region)
        and (not product or r["product"] == product)
        and (not month or r["month"] == month)
    ]
    total_units = sum(r["units"] for r in rows)
    total_revenue = sum(r["revenue"] for r in rows)
    avg_return = round(sum(r["returnRate"] for r in rows) / len(rows), 4) if rows else 0
    return {"matched": len(rows), "totalUnits": total_units, "totalRevenue": total_revenue, "avgReturnRate": avg_return}


def top_tool(p):
    metric, group_by, limit = p.get("metric", "revenue"), p.get("groupBy", "product"), p.get("limit", 5)
    totals = {}
    for r in warehouse:
        key = r["region"] if group_by == "region" else r["product"]
        totals[key] = totals.get(key, 0) + (r["units"] if metric == "units" else r["revenue"])
    ranked = sorted(({"key": k, "value": v} for k, v in totals.items()), key=lambda x: -x["value"])
    return ranked[:limit]


def trend_tool(p):
    region, product = p.get("region"), p.get("product")
    series = [0] * 12
    for r in warehouse:
        if r["region"] == region and r["product"] == product:
            series[r["monthIndex"]] = r["revenue"]
    return series


runtime = AxQuickJsCodeRuntime()
runtime.register_callable("query", query_tool)
runtime.register_callable("top", top_tool)
runtime.register_callable("trend", trend_tool)

analyst = agent(
    'schema:string, question:string -> answer:string, evidence:string[] "Concrete figures the answer is based on"',
    {
        # Big data dictionary stays out of the prompt.
        "contextFields": ["schema"],
        # Tool specs advertised to the model; handlers are registered on the runtime above.
        "functions": [
            {
                "name": "query",
                "description": "Filter the sales table and return aggregates for the matching rows.",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "region": {"type": "string"},
                        "product": {"type": "string"},
                        "month": {"type": "string"},
                    },
                },
            },
            {
                "name": "top",
                "description": "Rank a metric (revenue|units) grouped by product|region, highest first.",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "metric": {"type": "string"},
                        "groupBy": {"type": "string"},
                        "limit": {"type": "number"},
                    },
                    "required": ["metric", "groupBy"],
                },
            },
            {
                "name": "trend",
                "description": "Monthly revenue series (Jan..Dec) for one region and product.",
                "parameters": {
                    "type": "object",
                    "properties": {"region": {"type": "string"}, "product": {"type": "string"}},
                    "required": ["region", "product"],
                },
            },
        ],
        "contextPolicy": {"preset": "lean", "budget": "balanced"},
        "runtime": {"language": "JavaScript"},
    },
)

result = analyst.forward(
    client,
    {
        "schema": schema,
        "question": "Which region+product had the strongest Jan->Dec revenue growth, and which products have an average return rate above the 5% review threshold?",
    },
    {"runtime": runtime, "max_actor_steps": 40},
)

print(json.dumps(result, indent=2, sort_keys=True))

Python Self-Improving Lab Agent

A many-tool agent that runs experiments, grades them against a rubric with an independent verifier, and distills verified rules into memory – iterating until the rubric passes.

Provider: openai
Env: OPENAI_API_KEY, OPENAI_APIKEY
Level: advanced
Run: npm run example -- python src/examples/python/long-agents/self-improving-lab.py
Source: src/examples/python/long-agents/self-improving-lab.py

Python

import json
import os
import re

from axllm import OpenAICompatibleClient, agent, ax
from axllm.runtime_quickjs import AxQuickJsCodeRuntime

api_key = os.getenv("OPENAI_API_KEY") or os.getenv("OPENAI_APIKEY")
if not api_key:
    raise SystemExit("Set OPENAI_API_KEY or OPENAI_APIKEY to run this example.")

client = OpenAICompatibleClient(
    api_key=api_key,
    model=os.getenv("AX_OPENAI_MODEL", "gpt-5.4-mini"),
    model_config={"temperature": 0},
)

# ---------------------------------------------------------------------------
# The "lab": a deterministic black-box experiment. It scores an ETL config plan
# against a hidden ideal and returns, for any failing check, the exact fix --
# so the agent can converge by following the feedback, not by being told.
# ---------------------------------------------------------------------------
CHECKS = ["no-nulls", "no-duplicates", "numeric-types", "trimmed-strings", "outliers-handled"]
REMEDIES = {
    "no-nulls": "set nullPolicy=impute (or nullPolicy=drop)",
    "no-duplicates": "set dedup=on",
    "numeric-types": "set coerceTypes=on",
    "trimmed-strings": "set trim=on",
    "outliers-handled": "set outlier=clip (or outlier=winsorize)",
}


def run_in_sandbox(plan):
    flags = dict(re.findall(r"([a-z]+)\s*=\s*([a-z0-9]+)", str(plan).lower()))
    ok = {
        "no-nulls": flags.get("nullpolicy") in ("impute", "drop"),
        "no-duplicates": flags.get("dedup") == "on",
        "numeric-types": flags.get("coercetypes") == "on",
        "trimmed-strings": flags.get("trim") == "on",
        "outliers-handled": flags.get("outlier") in ("clip", "winsorize"),
    }
    passed = [c for c in CHECKS if ok[c]]
    failed = [{"check": c, "fix": REMEDIES[c]} for c in CHECKS if not ok[c]]
    return {
        "score": round(len(passed) / len(CHECKS), 2),
        "solved": len(passed) == len(CHECKS),
        "passed": passed,
        "failed": failed,
        "logs": f"{len(passed)}/{len(CHECKS)} checks passed",
    }


# An independent verifier -- a separate ax() program, not the agent grading itself.
verifier = ax("rubric:string, evidence:json -> passed:boolean, feedback:string, missing:string[]")
verifier.set_instruction(
    "You are an independent rubric grader, not a self-critique. Pass only when the evidence clearly satisfies every part of the rubric."
)

# In-memory rule store. Verified, reusable rules go here -- not raw failure notes.
memory_store = {}

runtime = AxQuickJsCodeRuntime()
runtime.register_callable("runExperiment", lambda p: run_in_sandbox(p.get("plan", "")))
runtime.register_callable("listChecks", lambda p: CHECKS)
runtime.register_callable("grade", lambda p: verifier.forward(client, {"rubric": p.get("rubric", ""), "evidence": p.get("evidence", [])}))


def recall_tool(p):
    t = str(p.get("topic", "")).lower()
    return [v for k, v in memory_store.items() if t in k or any(w in k for w in t.split())]


def remember_tool(p):
    rule = str(p.get("rule", ""))
    memory_store[rule.lower()[:48]] = f"{rule} :: {p.get('evidence', '')}"
    return {"stored": True, "total": len(memory_store)}


runtime.register_callable("recall", recall_tool)
runtime.register_callable("remember", remember_tool)


def _spec(name, description, props, required=None):
    return {
        "name": name,
        "description": description,
        "parameters": {"type": "object", "properties": props, **({"required": required} if required else {})},
    }


self_improving = agent(
    'goal:string, rubric:string -> answer:string, experiments:string[] "Plans tried, in order", learnedRules:string[]',
    {
        "contextFields": [],
        "functions": [
            _spec("runExperiment", "Apply an ETL config plan; returns score, solved, passed[], failed[{check,fix}], logs. Pass an empty plan to discover the fixes.", {"plan": {"type": "string"}}, ["plan"]),
            _spec("listChecks", "List the data-quality checks the experiment evaluates.", {}),
            _spec("grade", "Independent rubric grader. Pass only when the evidence meets the rubric.", {"rubric": {"type": "string"}, "evidence": {"type": "array", "items": {"type": "string"}}}, ["rubric", "evidence"]),
            _spec("recall", "Recall verified rules relevant to a topic.", {"topic": {"type": "string"}}, ["topic"]),
            _spec("remember", "Store a verified, reusable rule (the rule, not raw notes).", {"rule": {"type": "string"}, "evidence": {"type": "string"}}, ["rule", "evidence"]),
        ],
        "contextPolicy": {"preset": "adaptive", "budget": "balanced"},
        "executorOptions": {
            "description": "\n".join([
                "Use the tools -- do not answer from your own knowledge.",
                "1. recall('etl data quality') to reuse anything already learned.",
                "2. runExperiment('') once to see every failing check and its fix.",
                "3. Build a plan applying all the fixes, then runExperiment again. Repeat until solved is true.",
                "4. grade the passing evidence against the rubric.",
                "5. For each check you fixed, remember(rule, evidence).",
                "6. Then return the answer, the plans you tried, and the learned rules.",
            ]),
        },
        "runtime": {"language": "JavaScript"},
    },
)

result = self_improving.forward(
    client,
    {
        "goal": "Find an ETL config plan that cleans the dirty dataset so every data-quality check passes.",
        "rubric": "All five checks (no-nulls, no-duplicates, numeric-types, trimmed-strings, outliers-handled) must pass, i.e. score 1.0.",
    },
    {"runtime": runtime, "max_actor_steps": 18},
)

print(json.dumps(result, indent=2, sort_keys=True))
# Persist the agent's verified rules so a future run's recall reuses them.
for rule in result.get("learnedRules", []) or []:
    memory_store[str(rule).lower()[:48]] = str(rule)
print(f"\nMemory now holds {len(memory_store)} rule(s) for next time.")

Python Skills + Memory Ops Assistant

An on-call assistant that recalls past decisions from a memory store and loads the right runbook skill on demand, using the agent skills and memories subsystems.

Provider: openai
Env: OPENAI_API_KEY, OPENAI_APIKEY
Level: advanced
Run: npm run example -- python src/examples/python/long-agents/skills-and-memory-assistant.py
Source: src/examples/python/long-agents/skills-and-memory-assistant.py

Python

import json
import os

from axllm import OpenAICompatibleClient, agent
from axllm.runtime_quickjs import AxQuickJsCodeRuntime

api_key = os.getenv("OPENAI_API_KEY") or os.getenv("OPENAI_APIKEY")
if not api_key:
    raise SystemExit("Set OPENAI_API_KEY or OPENAI_APIKEY to run this example.")

client = OpenAICompatibleClient(
    api_key=api_key,
    # gpt-5.4 (not -mini): the recall/discover loop needs reasoning to proactively
    # pull memories + runbooks instead of stopping to ask for clarification.
    model=os.getenv("AX_OPENAI_MODEL", "gpt-5.4"),
    model_config={"temperature": 0},
)

# ---------------------------------------------------------------------------
# Memory store -- remembered decisions and postmortems. In production this is a
# vector DB / BM25 index; here a tiny KV with substring matching. The actor
# pulls relevant entries into scope via `await recall([...])`.
# ---------------------------------------------------------------------------
memory_store = {
    "decision/db-failover": "Decision (2026-02): during a primary DB failover, freeze writes via the feature flag `writes.enabled=false` BEFORE promoting the replica. Promoting first caused split-brain in inc-118.",
    "postmortem/inc-118": "inc-118 root cause: replica promoted while primary still accepted writes. Mitigation: write-freeze flag + 90s replication-lag gate.",
    "decision/customer-comms": "Decision: for Sev-1s affecting enterprise tenants, post a status-page update within 15 minutes and notify named TAMs directly.",
}


def on_memories_search(searches, already_loaded):
    skip = {m.get("id") for m in (already_loaded or [])}
    matches = []
    for query in searches or []:
        q = str(query).lower()
        for mid, content in memory_store.items():
            if mid in skip:
                continue
            if q in mid.lower() or q in content.lower():
                matches.append({"id": mid, "content": content})
    return matches


# ---------------------------------------------------------------------------
# Skill store -- runbooks loaded into the executor prompt on demand via
# `await discover({ skills: [...] })`. Loaded skills persist across calls.
# ---------------------------------------------------------------------------
skill_store = [
    {
        "id": "runbook-db-failover",
        "name": "DB failover runbook",
        "content": "## DB failover\n1. Set `writes.enabled=false`.\n2. Wait for replication lag < 5s.\n3. Promote replica.\n4. Re-point app via service discovery.\n5. Re-enable writes. 6. File postmortem within 48h.",
    },
    {
        "id": "runbook-status-comms",
        "name": "Status communications runbook",
        "content": "## Status comms\n- Sev-1: status-page update within 15m, every 30m thereafter.\n- Enterprise impact: notify named TAMs directly.\n- Keep updates factual; no ETAs you cannot keep.",
    },
]


def on_skills_search(searches):
    out = []
    for query in searches or []:
        q = str(query).lower()
        out.extend(
            s for s in skill_store
            if q in s["id"].lower() or q in s["name"].lower() or q in s["content"].lower()
        )
    return out


assistant = agent(
    'situation:string -> guidance:string "What to do, grounded in our decisions and runbooks", steps:string[]',
    {
        "contextFields": [],
        # A base skill always loaded, independent of search.
        "skills": [
            {
                "id": "house-style",
                "name": "house-style",
                "content": "Be concise and operational. Prefer our remembered decisions over generic advice. Never invent flag names or steps -- cite the runbook.",
            }
        ],
        "onMemoriesSearch": on_memories_search,
        "onSkillsSearch": on_skills_search,
        "onLoadedMemories": lambda results: print("[memories loaded]", ", ".join(r.get("id", "") for r in results)),
        "onLoadedSkills": lambda results: print("[skills loaded]", ", ".join(r.get("id") or r.get("name") for r in results)),
        "onUsedMemories": lambda results: print("[memories used]", ", ".join(r.get("id", "") for r in results)),
        "onUsedSkills": lambda results: print("[skills used]", ", ".join(r.get("id", "") for r in results)),
        "executorOptions": {
            "description": "\n".join([
                "You do NOT know our internal flag names, incident history, or runbook steps from your own training.",
                "The only source of truth is our memory (past decisions/postmortems) and our runbook skills.",
                "1. recall the relevant past decisions and postmortems (e.g. the failover decision, inc-118).",
                "2. discover the matching runbook skill and read its exact steps and flag names.",
                "3. Answer with the precise ordered procedure, citing our exact flag names and runbook steps.",
                "Generic best-practice advice is WRONG here. Do NOT answer from general knowledge and do NOT ask for clarification -- recall and discover first.",
            ]),
        },
        "runtime": {"language": "JavaScript"},
    },
)

result = assistant.forward(
    client,
    {
        "situation": (
            "Our primary database is unhealthy and we're about to fail over -- the same class of "
            "incident as inc-118, and enterprise checkout is affected. Per our remembered decisions "
            "and runbooks: what is the exact ordered procedure, and which specific feature flag must "
            "we set before promoting the replica?"
        ),
        # Forward memories seed the first turn and reset before the next forward.
        "memories": [
            {
                "id": "incident/current",
                "content": "Current incident: enterprise checkout is affected; treat it as Sev-1 until proven otherwise.",
            }
        ],
    },
    {
        "runtime": AxQuickJsCodeRuntime(),
        "max_actor_steps": 12,
        # Same-ID forward skills override constructor presets and remain loaded.
        "skills": [
            {
                "id": "house-style",
                "name": "house-style",
                "content": "Be concise, operational, and explicit about ordering. Prefer remembered decisions over generic advice. Cite exact runbook steps.",
            }
        ],
    },
)

print("\n=== Response ===")
print(json.dumps(result, indent=2, sort_keys=True))

Python Smart Defaults Agent

Shows AxAgent smart defaults: oversized undeclared context stays out of the prompt while relevance hints and runtime tools guide the agent.

Provider: google-gemini
Env: GOOGLE_APIKEY
Level: advanced
Run: npm run example -- python src/examples/python/long-agents/smart-defaults-agent.py
Source: src/examples/python/long-agents/smart-defaults-agent.py

Python

import json
import os

from axllm import GoogleGeminiClient, agent
from axllm.runtime_quickjs import AxQuickJsCodeRuntime

api_key = os.getenv("GOOGLE_APIKEY")
if not api_key:
    raise SystemExit("Set GOOGLE_APIKEY to run this example.")

client = GoogleGeminiClient(api_key=api_key, model="gemini-3.5-flash")

TIMELINE = [
    "09:12 checkout-edge v812 deployed behind 25% of traffic",
    "09:18 payments gateway p95 rose from 420ms to 4.8s",
    "09:22 cart completion dropped 31% for enterprise accounts",
    "09:27 retries saturated the checkout-edge connection pool",
    "09:31 rollback to v811 started",
    "09:36 p95 returned below 700ms after pool reset",
]

incident_log = "\n\n".join(
    f"# log shard {i + 1}\n" + "\n".join(TIMELINE) for i in range(28)
)

INCIDENT_SUMMARY = {
    "service": "checkout",
    "severity": "sev-1",
    "rootCause": "checkout-edge v812 retried payment gateway calls without bounded concurrency, saturating the shared connection pool.",
    "errorRate": "38%",
    "affectedSessions": 1284,
    "candidateRunbook": "payments-timeout-runbook",
    "relevantMemory": "decision-enterprise-comms",
}


def summarize_incident(p):
    out = dict(INCIDENT_SUMMARY)
    out["service"] = p.get("service", "checkout")
    return out


def get_timeline(p):
    service = p.get("service", "checkout")
    return [{"service": service, "event": event} for event in TIMELINE]


def get_runbook(p):
    return {
        "id": p.get("id", "payments-timeout-runbook"),
        "steps": [
            "Freeze checkout deploys and page the payments owner.",
            "Rollback checkout-edge to v811 and reset saturated pools.",
            "Post enterprise status update after error rate stays below 2%.",
        ],
    }


runtime = AxQuickJsCodeRuntime()
runtime.register_callable("summarizeIncident", summarize_incident)
runtime.register_callable("getTimeline", get_timeline)
runtime.register_callable("getRunbook", get_runbook)

analyst = agent(
    'incidentLog:string, question:string -> rootCause:string, actions:string[] "Recommended remediation actions from the runbook", evidence:string[]',
    {
        "name": "SmartDefaultsIncidentAgent",
        "description": "Investigate checkout incidents using runtime tools, relevance hints, and compact evidence.",
        # No contextFields and no autoUpgrade option: oversized incidentLog is promoted by default.
        "functions": [
            {
                "name": "summarizeIncident",
                "description": "Summarize the current checkout incident and name the strongest runbook and memory matches.",
                "parameters": {
                    "type": "object",
                    "properties": {"service": {"type": "string"}},
                    "required": ["service"],
                },
            },
            {
                "name": "getTimeline",
                "description": "Return concrete timestamped evidence for the checkout incident.",
                "parameters": {
                    "type": "object",
                    "properties": {"service": {"type": "string"}},
                    "required": ["service"],
                },
            },
            {
                "name": "getRunbook",
                "description": "Fetch the operational runbook steps for a relevant incident pattern.",
                "parameters": {
                    "type": "object",
                    "properties": {"id": {"type": "string"}},
                    "required": ["id"],
                },
            },
        ],
        "skillsCatalog": [
            {
                "id": "payments-timeout-runbook",
                "name": "Payments timeout runbook",
                "content": "Use when checkout latency follows payment gateway retry amplification.",
            },
            {
                "id": "status-comms-runbook",
                "name": "Status communications",
                "content": "Use when customer-facing enterprise account updates are required.",
            },
        ],
        "memoriesCatalog": [
            {
                "id": "decision-enterprise-comms",
                "content": "For sev-1 checkout incidents, send an enterprise status update only after rollback is complete and error rate is below 2%.",
            },
            {
                "id": "checkout-v812-rollback",
                "content": "checkout-edge v812 rollback completed cleanly once saturated payment pools were reset.",
            },
        ],
        "executorOptions": {
            "description": "\n".join(
                [
                    "Call the bare async runtime functions summarizeIncident, getTimeline, and getRunbook before answering.",
                    "Use top-level await, for example: const s = await summarizeIncident({service:'checkout'});",
                    "The large incidentLog input is intentionally not declared as a context field; smart defaults keep it available at runtime without flooding the prompt.",
                    "Return the root cause, the first three remediation actions, and concrete evidence.",
                ]
            )
        },
        "runtime": {"language": "JavaScript"},
    },
)

result = analyst.forward(
    client,
    {
        "incidentLog": incident_log,
        "question": "Find the root cause, first three remediation actions, and concrete evidence for the checkout payment incident.",
    },
    {"runtime": runtime, "max_actor_steps": 30},
)

print(json.dumps(result, indent=2, sort_keys=True))