Long-Horizon Agents Long-Horizon Agents — TypeScript examples backed by real provider calls. typescript examples examples/long-agents src/examples/typescript/long-agents example Long-Horizon Agents

These TypeScript examples are real runnable files. Edit the source file first; this page is rebuilt from the checked-in example and its metadata header.

TypeScript Incident Log Forensics (RLM)

Infers service architecture and root-cause findings from a huge CloudWatch export that never enters the prompt — held in contextFields and worked through the runtime under a lean contextPolicy.

TypeScript
import { AxAIGoogleGeminiModel, AxJSRuntime, agent, ai } from '@ax-llm/ax';

const apiKey = process.env.GOOGLE_APIKEY;
if (!apiKey) {
  throw new Error('Set GOOGLE_APIKEY to run this example.');
}

const llm = ai({
  name: 'google-gemini',
  apiKey,
  config: {
    model: AxAIGoogleGeminiModel.Gemini35Flash,
  },
});

// ---------------------------------------------------------------------------
// Synthetic CloudWatch-style export — generated large on purpose. Dumping these
// raw events into a prompt would blow the context window. The agent keeps them
// in its runtime (contextFields) and only the *evidence it extracts* ever
// reaches the model. Deterministic so the example is reproducible.
// ---------------------------------------------------------------------------
type LogEvent = {
  timestamp: string;
  level: 'INFO' | 'WARN' | 'ERROR';
  service: string;
  requestId: string;
  statusCode?: number;
  latencyMs?: number;
  tenantTier?: 'free' | 'growth' | 'enterprise';
  message: string;
};

function buildLogDump(): LogEvent[] {
  const start = Date.parse('2026-03-02T13:00:00Z');
  const events: LogEvent[] = [];
  const push = (i: number, e: Omit<LogEvent, 'timestamp' | 'requestId'>) => {
    events.push({
      timestamp: new Date(start + i * 2000).toISOString(),
      requestId: `req-${100000 + i}`,
      ...e,
    });
  };

  for (let i = 0; i < 1600; i++) {
    // Routine, healthy traffic across the fleet.
    push(i, {
      level: 'INFO',
      service: 'gateway',
      statusCode: 200,
      latencyMs: 40 + (i % 30),
      message: 'route ok GET /checkout',
    });
    push(i, {
      level: 'INFO',
      service: 'search-api',
      statusCode: 200,
      latencyMs: 70 + (i % 50),
      message: 'query ok q=shoes',
    });

    // Window A (payments cascade): upstream timeouts in payments-gw spill into
    // checkout-api 502s for enterprise tenants, with retry storms + pool exhaustion.
    if (i >= 300 && i < 520) {
      push(i, {
        level: 'ERROR',
        service: 'payments-gw',
        statusCode: 504,
        latencyMs: 10000,
        tenantTier: 'enterprise',
        message: 'upstream timeout calling acquirer (10s)',
      });
      push(i, {
        level: 'ERROR',
        service: 'checkout-api',
        statusCode: 502,
        tenantTier: 'enterprise',
        message: 'bad gateway from svc-payments-gw',
      });
      if (i % 3 === 0) {
        push(i, {
          level: 'WARN',
          service: 'payments-gw',
          message: 'connection pool exhausted (max=64) waiting=200+',
        });
        push(i, {
          level: 'WARN',
          service: 'checkout-api',
          tenantTier: 'enterprise',
          message: 'user-visible: "Payment could not be processed"',
        });
      }
    }

    // Window B (search throttling): the nightly catalog-cron pins CPU and
    // search-api starts returning 429s.
    if (i >= 1000 && i < 1120) {
      push(i, {
        level: 'WARN',
        service: 'catalog-cron',
        latencyMs: 0,
        message: 'rebuild step pinning CPU at 95% on shared node',
      });
      push(i, {
        level: 'ERROR',
        service: 'search-api',
        statusCode: 429,
        message: 'rate limited: downstream catalog unavailable',
      });
    }
  }

  return events;
}

const logs = buildLogDump();
console.log(`Generated ${logs.length} log events (kept out of the prompt).`);

const logRLM = agent(
  'task:string, logs:json "Raw CloudWatch export; keep this out of the prompt" -> architecture:string[] "Services and how they call each other", findings:json[] "Each: issue, count, window, evidence, impact", overallHealth:string, nextActions:string[]',
  {
    runtime: new AxJSRuntime(),
    // The export stays in the runtime; only extracted evidence reaches the model.
    contextFields: ['logs'],
    contextPolicy: {
      preset: 'lean',
      budget: 'balanced',
    },
    maxTurns: 40,
    // Cap how much runtime output is echoed back into the action log per turn.
    maxRuntimeChars: 12_000,
    // Watch the actor work through the export.
    agentStatusCallback: (message, status) => {
      console.log(`[${status}] ${message}`);
    },
  }
);

const report = await logRLM.forward(llm, {
  logs,
  task: 'Infer the service architecture from the logs alone. Then find repeated errors, throttles, retries, and bad user states — with the affected time window, an occurrence count, and concrete log evidence for each.',
});

console.log('\n=== Report ===');
console.log(JSON.stringify(report, null, 2));

// Staged usage breaks token spend into the context (distiller) stage vs the
// task (executor + responder) stage — useful for costing long-context runs.
console.log('\n=== Staged usage ===');
console.log(JSON.stringify(logRLM.getStagedUsage(), null, 2));

TypeScript Codebase Q&A with a Peek Context Map

Answers several dependency questions over one large module index by building and reusing an evolving context map (the “peek” orientation cache), so later questions skip re-scanning the corpus.

TypeScript
import {
  AxAgentContextMap,
  AxAIGoogleGeminiModel,
  AxJSRuntime,
  agent,
  ai,
} from '@ax-llm/ax';

const apiKey = process.env.GOOGLE_APIKEY;
if (!apiKey) {
  throw new Error('Set GOOGLE_APIKEY to run this example.');
}

const llm = ai({
  name: 'google-gemini',
  apiKey,
  config: {
    model: AxAIGoogleGeminiModel.Gemini35Flash,
  },
});

// ---------------------------------------------------------------------------
// A large module-dependency index for a monorepo. Each block is a record the
// agent must *search* to answer — the answers cannot be guessed, only computed
// by filtering the index. Generated large so it would not fit comfortably in a
// prompt; it lives in contextFields and is queried from the runtime.
// ---------------------------------------------------------------------------
type ModuleRecord = { path: string; imports: string[]; writes: string };

function buildModuleIndex(): ModuleRecord[] {
  const core: ModuleRecord[] = [
    {
      path: 'packages/api/middleware/auth.ts',
      imports: ['packages/shared'],
      writes: '-',
    },
    {
      path: 'packages/api/middleware/rateLimit.ts',
      imports: ['packages/db'],
      writes: '-',
    },
    {
      path: 'packages/api/routes/checkout.ts',
      imports: [
        'packages/api/middleware/auth.ts',
        'packages/services/orders/createOrder.ts',
        'packages/services/payments/charge.ts',
      ],
      writes: '-',
    },
    {
      path: 'packages/api/routes/search.ts',
      imports: [
        'packages/api/middleware/auth.ts',
        'packages/services/catalog/searchCatalog.ts',
      ],
      writes: '-',
    },
    {
      path: 'packages/services/orders/createOrder.ts',
      imports: ['packages/db', 'packages/clients/bus'],
      writes: 'orders',
    },
    {
      path: 'packages/services/orders/orderRepo.ts',
      imports: ['packages/db'],
      writes: 'orders',
    },
    {
      path: 'packages/services/payments/charge.ts',
      imports: ['packages/clients/acquirer', 'packages/db'],
      writes: 'payments',
    },
    {
      path: 'packages/services/payments/refund.ts',
      imports: ['packages/clients/acquirer', 'packages/db'],
      writes: 'refunds',
    },
    {
      path: 'packages/services/catalog/searchCatalog.ts',
      imports: ['packages/db'],
      writes: '-',
    },
    {
      path: 'packages/clients/acquirer/index.ts',
      imports: ['packages/shared'],
      writes: '-',
    },
    {
      path: 'packages/clients/bus/index.ts',
      imports: ['packages/shared'],
      writes: '-',
    },
  ];
  // Filler modules so the index is genuinely large; some also depend on the acquirer.
  const filler: ModuleRecord[] = [];
  for (let i = 0; i < 110; i++) {
    filler.push({
      path: `packages/services/feature${i}/handler.ts`,
      imports: [
        i % 4 === 0 ? 'packages/clients/acquirer' : 'packages/db',
        'packages/shared',
      ],
      writes: i % 6 === 0 ? 'audit' : '-',
    });
  }
  return [...core, ...filler];
}

const modules = buildModuleIndex();
const codebaseIndex = modules
  .map(
    (m) =>
      `PATH: ${m.path}\nIMPORTS: ${m.imports.join(', ')}\nWRITES: ${m.writes}`
  )
  .join('\n\n');
console.log(
  `Module index: ${modules.length} records (kept out of the prompt).`
);

// The map is small and persistable. evolveSteps: 1 lets the first query refine
// it; later queries reuse it as compact orientation instead of re-deriving it.
const map = new AxAgentContextMap(undefined, {
  maxChars: 1_800,
  infiniteEvolve: false,
  evolveSteps: 1,
});

const analyst = agent(
  'context:string, question:string -> answer:string, paths:string[] "Exact PATH values from the index that answer the question"',
  {
    runtime: new AxJSRuntime(),
    contextFields: ['context'],
    contextPolicy: {
      preset: 'adaptive',
      budget: 'balanced',
    },
    contextOptions: {
      description:
        'The context is a module index of "PATH / IMPORTS / WRITES" records. Answer by filtering those records in code — never guess. Return exact PATH values verbatim.',
    },
    contextMap: {
      map,
      onUpdate: ({ map: updatedMap }) => {
        console.log(`\n[context map updated]\n${updatedMap.text}`);
      },
    },
    maxTurns: 24,
  }
);

const questions = [
  "Which modules import 'packages/clients/acquirer'? Give the exact PATH values.",
  "Which modules write to the 'orders' table?",
  'What are the direct IMPORTS of packages/api/routes/checkout.ts?',
];

for (const question of questions) {
  const result = await analyst.forward(llm, {
    context: codebaseIndex,
    question,
  });
  console.log('\nQ:', question);
  console.log('A:', result.answer);
  console.log('Paths:', (result.paths ?? []).join(', '));
}

console.log('\nPersist this context-map snapshot between runs:');
console.log(JSON.stringify(map.snapshot(), null, 2));

TypeScript Data Analyst (Large Context + Tools)

Combines a large data dictionary held in contextFields with typed fn() warehouse tools, so the agent answers business questions over a big dataset it never has to inline.

TypeScript
import {
  AxAIGoogleGeminiModel,
  AxJSRuntime,
  agent,
  ai,
  f,
  fn,
} from '@ax-llm/ax';

const apiKey = process.env.GOOGLE_APIKEY;
if (!apiKey) {
  throw new Error('Set GOOGLE_APIKEY to run this example.');
}

const llm = ai({
  name: 'google-gemini',
  apiKey,
  config: {
    model: AxAIGoogleGeminiModel.Gemini35Flash,
  },
});

// ---------------------------------------------------------------------------
// The "warehouse": a few hundred rows that live in the host process and are
// reachable only through tools. The model never sees the rows — it queries them.
// Deterministic so the example is reproducible.
// ---------------------------------------------------------------------------
type Row = {
  region: string;
  product: string;
  monthIndex: number;
  month: string;
  units: number;
  revenue: number;
  returnRate: number;
};

const MONTHS = [
  'Jan',
  'Feb',
  'Mar',
  'Apr',
  'May',
  'Jun',
  'Jul',
  'Aug',
  'Sep',
  'Oct',
  'Nov',
  'Dec',
];

function buildWarehouse(): Row[] {
  const regions = [
    'North',
    'South',
    'East',
    'West',
    'Central',
    'NW',
    'NE',
    'SE',
  ];
  const products = ['Widget-A', 'Widget-B', 'Gadget-X', 'Gadget-Y'];
  const rows: Row[] = [];
  let seed = 7;
  const rand = () => {
    seed = (seed * 1103515245 + 12345) & 0x7fffffff;
    return seed / 0x7fffffff;
  };
  for (const region of regions) {
    for (const product of products) {
      const trend = product === 'Gadget-X' && region === 'East' ? 90 : 25; // a planted winner
      for (let m = 0; m < MONTHS.length; m++) {
        const units = Math.round(400 + rand() * 1200 + m * trend);
        const price = product.startsWith('Gadget') ? 60 : 38;
        const returnRate = +(
          0.01 +
          rand() * 0.05 +
          (product === 'Widget-B' ? 0.03 : 0)
        ).toFixed(3);
        rows.push({
          region,
          product,
          monthIndex: m,
          month: MONTHS[m],
          units,
          revenue: units * price,
          returnRate,
        });
      }
    }
  }
  return rows;
}

const warehouse = buildWarehouse();

// The schema/data dictionary is large-ish and goes into contextFields, so the
// agent can orient itself on column meaning and business rules without the doc
// ever entering the prompt.
const schema = `
TABLE sales (one row per region x product x month)

COLUMNS
  region       text   one of: North, South, East, West, Central, NW, NE, SE
  product      text   one of: Widget-A, Widget-B, Gadget-X, Gadget-Y
  month        text   Jan..Dec (calendar order; monthIndex 0..11)
  units        int    units sold that month
  revenue      int    integer dollars (units * unit price; Gadgets cost more)
  returnRate   float  fraction of units returned, 0..1

BUSINESS RULES
  - "Growth" = change in monthly revenue from Jan to Dec for a region+product.
  - A return rate above 0.05 (5%) is flagged for quality review.
  - Compare like-for-like: always group by region AND product, not either alone.

TOOLS AVAILABLE
  warehouse.query   filter + aggregate a slice
  warehouse.top     rank a metric grouped by product or region
  warehouse.trend   monthly revenue series (Jan..Dec) for one region+product
`.trim();

const queryTool = fn('query')
  .namespace('warehouse')
  .description(
    'Filter the sales table and return aggregates for the matching rows.'
  )
  .arg('region', f.string('Optional region filter').optional())
  .arg('product', f.string('Optional product filter').optional())
  .arg('month', f.string('Optional month filter, e.g. Jan').optional())
  .returns(
    f.object({
      matched: f.number('Number of rows matched'),
      totalUnits: f.number(),
      totalRevenue: f.number(),
      avgReturnRate: f.number(),
    })
  )
  .handler(({ region, product, month }) => {
    const rows = warehouse.filter(
      (r) =>
        (!region || r.region === region) &&
        (!product || r.product === product) &&
        (!month || r.month === month)
    );
    const totalUnits = rows.reduce((s, r) => s + r.units, 0);
    const totalRevenue = rows.reduce((s, r) => s + r.revenue, 0);
    const avgReturnRate = rows.length
      ? +(rows.reduce((s, r) => s + r.returnRate, 0) / rows.length).toFixed(4)
      : 0;
    return { matched: rows.length, totalUnits, totalRevenue, avgReturnRate };
  })
  .build();

const topTool = fn('top')
  .namespace('warehouse')
  .description('Rank a metric grouped by product or region, highest first.')
  .arg('metric', f.string('revenue or units'))
  .arg('groupBy', f.string('product or region'))
  .arg('limit', f.number('How many groups to return').optional())
  .returns(f.json('Array of { key, value } sorted by value descending'))
  .handler(({ metric, groupBy, limit }) => {
    const totals = new Map<string, number>();
    for (const r of warehouse) {
      const key = groupBy === 'region' ? r.region : r.product;
      const value = metric === 'units' ? r.units : r.revenue;
      totals.set(key, (totals.get(key) ?? 0) + value);
    }
    return [...totals.entries()]
      .map(([key, value]) => ({ key, value }))
      .sort((a, b) => b.value - a.value)
      .slice(0, limit ?? 5);
  })
  .build();

const trendTool = fn('trend')
  .namespace('warehouse')
  .description('Monthly revenue series (Jan..Dec) for one region and product.')
  .arg('region', f.string())
  .arg('product', f.string())
  .returns(f.number('Revenue for each month, Jan..Dec').array())
  .handler(({ region, product }) => {
    const series = new Array(12).fill(0);
    for (const r of warehouse) {
      if (r.region === region && r.product === product)
        series[r.monthIndex] = r.revenue;
    }
    return series;
  })
  .build();

const analyst = agent(
  'schema:string, question:string -> answer:string, evidence:string[] "Concrete figures the answer is based on"',
  {
    runtime: new AxJSRuntime(),
    // Big data dictionary stays out of the prompt.
    contextFields: ['schema'],
    // Tools reach the data the prompt never sees.
    functions: [queryTool, topTool, trendTool],
    contextPolicy: {
      preset: 'lean',
      budget: 'balanced',
    },
    maxTurns: 40,
    executorOptions: {
      description: [
        'Consult the schema for column meaning and business rules.',
        'Answer using the warehouse tools — never invent figures.',
        'Group by region AND product when comparing. Cite concrete numbers as evidence.',
      ].join('\n'),
    },
  }
);

const result = await analyst.forward(llm, {
  schema,
  question:
    'Which region+product had the strongest Jan->Dec revenue growth, and which products have an average return rate above the 5% review threshold?',
});

console.log(JSON.stringify(result, null, 2));

TypeScript Self-Improving Lab Agent

A many-tool agent that runs experiments, grades them against a rubric with an independent verifier, and distills verified rules into memory — iterating until the rubric passes.

TypeScript
import { AxAIOpenAIModel, AxJSRuntime, agent, ai, ax, f, fn } from '@ax-llm/ax';

const apiKey = process.env.OPENAI_API_KEY ?? process.env.OPENAI_APIKEY;
if (!apiKey) {
  throw new Error('Set OPENAI_API_KEY or OPENAI_APIKEY to run this example.');
}

const llm = ai({
  name: 'openai',
  apiKey,
  config: {
    model: AxAIOpenAIModel.GPT54Mini,
    temperature: 0,
  },
});

// ---------------------------------------------------------------------------
// The "lab": a deterministic black-box experiment. It scores an ETL config plan
// against a hidden ideal by checking how many data-quality checks it satisfies.
// The agent must discover the right flags by experimenting, not by being told.
// ---------------------------------------------------------------------------
const CHECKS = [
  'no-nulls',
  'no-duplicates',
  'numeric-types',
  'trimmed-strings',
  'outliers-handled',
];

// What it takes to satisfy each check. A real harness gives actionable feedback;
// this one returns the exact fix for any failing check so the agent can converge.
const REMEDIES: Record<string, string> = {
  'no-nulls': 'set nullPolicy=impute (or nullPolicy=drop)',
  'no-duplicates': 'set dedup=on',
  'numeric-types': 'set coerceTypes=on',
  'trimmed-strings': 'set trim=on',
  'outliers-handled': 'set outlier=clip (or outlier=winsorize)',
};

function runInSandbox(plan: string) {
  const flags: Record<string, string> = {};
  for (const m of plan.toLowerCase().matchAll(/([a-z]+)\s*=\s*([a-z0-9]+)/g)) {
    flags[m[1]] = m[2];
  }
  const ok: Record<string, boolean> = {
    'no-nulls': flags.nullpolicy === 'impute' || flags.nullpolicy === 'drop',
    'no-duplicates': flags.dedup === 'on',
    'numeric-types': flags.coercetypes === 'on',
    'trimmed-strings': flags.trim === 'on',
    'outliers-handled':
      flags.outlier === 'clip' || flags.outlier === 'winsorize',
  };
  const passed = CHECKS.filter((c) => ok[c]);
  const failed = CHECKS.filter((c) => !ok[c]).map((c) => ({
    check: c,
    fix: REMEDIES[c],
  }));
  const score = +(passed.length / CHECKS.length).toFixed(2);
  return {
    score,
    solved: passed.length === CHECKS.length,
    passed,
    failed,
    logs: `${passed.length}/${CHECKS.length} checks passed`,
  };
}

// An independent verifier — a separate ax() program, not the agent grading itself.
const verifier = ax(
  'rubric:string, evidence:json -> passed:boolean, feedback:string, missing:string[]'
);
verifier.setInstruction(
  'You are an independent rubric grader, not a self-critique. Pass only when the evidence clearly satisfies every part of the rubric.'
);

// In-memory rule store. Verified, reusable rules go here — not raw failure notes.
const memoryStore = new Map<string, string>();

// ---------------------------------------------------------------------------
// Tool catalog across three namespaces. functionDiscovery lets the agent pull
// in the ones it needs instead of carrying all of them in every prompt.
// ---------------------------------------------------------------------------
const runExperiment = fn('runExperiment')
  .namespace('lab')
  .description(
    'Run one experiment: apply an ETL config plan and return the score, whether it is solved, and — for any failing check — the exact fix to apply. Start with an empty plan to see every fix.'
  )
  .arg(
    'plan',
    f.string(
      'Config plan as key=value flags, e.g. "nullPolicy=impute, dedup=on, coerceTypes=on, trim=on, outlier=clip". Pass "" to discover the fixes.'
    )
  )
  .returns(
    f.json(
      'Experiment result: score, solved, passed[], failed[{check,fix}], logs'
    )
  )
  .handler(({ plan }) => runInSandbox(plan))
  .build();

const listChecks = fn('listChecks')
  .namespace('lab')
  .description('List the data-quality checks the experiment evaluates.')
  .returns(f.string('Check name').array())
  .handler(() => CHECKS)
  .build();

const grade = fn('grade')
  .namespace('verifier')
  .description(
    'Independent rubric grader. Pass only when the evidence meets the rubric.'
  )
  .arg('rubric', f.string('The rubric to grade against'))
  .arg('evidence', f.string('Observed experiment results').array())
  .returns(f.json('Verifier result: passed, feedback, missing[]'))
  .handler(({ rubric, evidence }) =>
    verifier.forward(llm, { rubric, evidence })
  )
  .build();

const recall = fn('recall')
  .namespace('memory')
  .description('Recall verified rules relevant to a topic.')
  .arg('topic', f.string('Topic to search remembered rules for'))
  .returns(f.string('A verified rule').array())
  .handler(({ topic }) => {
    const t = topic.toLowerCase();
    return [...memoryStore.entries()]
      .filter(
        ([key]) =>
          key.includes(t) ||
          t.includes(key) ||
          t.split(' ').some((w) => key.includes(w))
      )
      .map(([, value]) => value);
  })
  .build();

const remember = fn('remember')
  .namespace('memory')
  .description(
    'Store a verified, reusable rule. Store the distilled rule, not raw failure notes.'
  )
  .arg('rule', f.string('Verified general rule'))
  .arg('evidence', f.string('Why it is true'))
  .returns(f.json('Stored confirmation'))
  .handler(({ rule, evidence }) => {
    memoryStore.set(rule.toLowerCase().slice(0, 48), `${rule} :: ${evidence}`);
    return { stored: true, total: memoryStore.size };
  })
  .build();

const listMemory = fn('list')
  .namespace('memory')
  .description('List every rule currently in memory.')
  .returns(f.string('A stored rule').array())
  .handler(() => [...memoryStore.values()])
  .build();

const selfImprovingAgent = agent(
  'goal:string, rubric:string -> answer:string, experiments:string[] "Plans tried, in order", learnedRules:string[]',
  {
    runtime: new AxJSRuntime(),
    contextFields: [],
    functions: [runExperiment, listChecks, grade, recall, remember, listMemory],
    contextPolicy: {
      preset: 'adaptive',
      budget: 'balanced',
    },
    maxTurns: 18,
    executorOptions: {
      description: [
        'Use the tools — do not answer from your own knowledge.',
        '1. Call memory.recall("etl data quality") to reuse anything already learned.',
        '2. Call lab.runExperiment("") once to see every failing check and its fix.',
        '3. Build a plan that applies all the fixes, then call lab.runExperiment again. Repeat until the result has solved=true.',
        '4. Call verifier.grade with the passing evidence against the rubric.',
        '5. For EACH check you fixed, call memory.remember(rule, evidence) — store the reusable rule, not raw notes.',
        '6. Only after the rules are stored, return the answer, the plans you tried, and the learned rules.',
      ].join('\n'),
    },
  }
);

const result = await selfImprovingAgent.forward(llm, {
  goal: 'Find an ETL config plan that cleans the dirty dataset so every data-quality check passes.',
  rubric:
    'All five checks (no-nulls, no-duplicates, numeric-types, trimmed-strings, outliers-handled) must pass, i.e. score 1.0.',
});

console.log(JSON.stringify(result, null, 2));

// Persist the agent's verified rules so a future run's memory.recall reuses them.
// (The agent may also call memory.remember mid-run; this guarantees the durable
// store is populated either way.)
for (const rule of result.learnedRules) {
  memoryStore.set(rule.toLowerCase().slice(0, 48), rule);
}
console.log(`\nMemory now holds ${memoryStore.size} rule(s) for next time:`);
console.log([...memoryStore.values()].map((r) => ` • ${r}`).join('\n'));

TypeScript Skills + Memory Ops Assistant

An on-call assistant that recalls past decisions from a memory store and loads the right runbook skill on demand, using the agent skills and memories subsystems.

TypeScript
import {
  type AxAgentMemoriesSearchFn,
  type AxAgentMemoryResult,
  type AxAgentSkillsSearchFn,
  AxAIOpenAIModel,
  AxJSRuntime,
  agent,
  ai,
} from '@ax-llm/ax';

const apiKey = process.env.OPENAI_API_KEY ?? process.env.OPENAI_APIKEY;
if (!apiKey) {
  throw new Error('Set OPENAI_API_KEY or OPENAI_APIKEY to run this example.');
}

const llm = ai({
  name: 'openai',
  apiKey,
  config: {
    model: AxAIOpenAIModel.GPT54,
    temperature: 0,
  },
});

// ---------------------------------------------------------------------------
// Memory store — remembered decisions and postmortems. In production this is a
// vector DB / BM25 index; here a tiny KV with substring matching. The actor
// pulls relevant entries into scope via `await recall([...])`.
// ---------------------------------------------------------------------------
const memoryStore: Record<string, string> = {
  'decision/db-failover':
    'Decision (2026-02): during a primary DB failover, freeze writes via the feature flag `writes.enabled=false` BEFORE promoting the replica. Promoting first caused split-brain in inc-118.',
  'postmortem/inc-118':
    'inc-118 root cause: replica promoted while primary still accepted writes. Mitigation: write-freeze flag + 90s replication-lag gate.',
  'decision/customer-comms':
    'Decision: for Sev-1s affecting enterprise tenants, post a status-page update within 15 minutes and notify named TAMs directly.',
};

const onMemoriesSearch: AxAgentMemoriesSearchFn = async (
  searches,
  alreadyLoaded
) => {
  const skip = new Set(alreadyLoaded.map((m) => m.id));
  const matches: AxAgentMemoryResult[] = [];
  for (const query of searches) {
    const q = query.toLowerCase();
    for (const [id, content] of Object.entries(memoryStore)) {
      if (skip.has(id)) continue;
      if (id.toLowerCase().includes(q) || content.toLowerCase().includes(q)) {
        matches.push({ id, content });
      }
    }
  }
  return matches;
};

// ---------------------------------------------------------------------------
// Skill store — runbooks loaded into the executor prompt on demand via
// `await discover({ skills: [...] })`. Loaded skills persist across calls.
// ---------------------------------------------------------------------------
const skillStore = [
  {
    id: 'runbook-db-failover',
    name: 'DB failover runbook',
    content:
      '## DB failover\n1. Set `writes.enabled=false`.\n2. Wait for replication lag < 5s.\n3. Promote replica.\n4. Re-point app via service discovery.\n5. Re-enable writes. 6. File postmortem within 48h.',
  },
  {
    id: 'runbook-status-comms',
    name: 'Status communications runbook',
    content:
      '## Status comms\n- Sev-1: status-page update within 15m, every 30m thereafter.\n- Enterprise impact: notify named TAMs directly.\n- Keep updates factual; no ETAs you cannot keep.',
  },
] as const;

const onSkillsSearch: AxAgentSkillsSearchFn = async (searches) =>
  searches.flatMap((query) => {
    const q = query.toLowerCase();
    return skillStore.filter(
      (s) =>
        s.id.toLowerCase().includes(q) ||
        s.name.toLowerCase().includes(q) ||
        s.content.toLowerCase().includes(q)
    );
  });

const assistant = agent(
  'situation:string -> guidance:string "What to do, grounded in our decisions and runbooks", steps:string[]',
  {
    runtime: new AxJSRuntime(),
    contextFields: [],
    // A base skill that is always loaded, independent of search.
    skills: [
      {
        name: 'house-style',
        content:
          'Be concise and operational. Prefer our remembered decisions over generic advice. Never invent flag names or steps — cite the runbook.',
      },
    ],
    onMemoriesSearch,
    onSkillsSearch,
    // Observability: what got loaded and what the actor actually used.
    onLoadedMemories: (results) => {
      console.log('[memories loaded]', results.map((r) => r.id).join(', '));
    },
    onLoadedSkills: (results) => {
      console.log(
        '[skills loaded]',
        results.map((r) => r.id ?? r.name).join(', ')
      );
    },
    onUsedMemories: (used) => {
      console.log('[memories used]', used);
    },
    onUsedSkills: (used) => {
      console.log('[skills used]', used);
    },
    maxTurns: 10,
  }
);

const result = await assistant.forward(llm, {
  situation:
    'Our primary database is unhealthy and we may need to fail over. Enterprise checkout is affected. What exactly should I do, in order?',
});

console.log('\n=== Response ===');
console.log(JSON.stringify(result, null, 2));
Docs