Audio | Ax

These C++ examples are real runnable files. Edit the source file first; this page is rebuilt from the checked-in example and its metadata header.

C++ Text To Speech

Generates speech audio through OpenAI.

Provider: openai
Env: OPENAI_API_KEY, OPENAI_APIKEY
Level: beginner
Run: npm run example -- cpp src/examples/cpp/audio/speech_audio.cpp
Source: src/examples/cpp/audio/speech_audio.cpp

C++

#include "axllm/axllm.hpp"
#include <cstdlib>
#include <fstream>
#include <iostream>
#include <sstream>
#include <variant>


int main() {
  const char* key = std::getenv("OPENAI_API_KEY");
  if (key == nullptr || std::string(key).empty()) key = std::getenv("OPENAI_APIKEY");
  if (key == nullptr || std::string(key).empty()) {
    std::cerr << "Set OPENAI_API_KEY or OPENAI_APIKEY to run this example.\n";
    return 2;
  }
  const char* model = std::getenv("AX_OPENAI_MODEL");
  axllm::OpenAIResponsesClient client(axllm::object({
      {"api_key", key},
      {"model", model == nullptr || std::string(model).empty() ? "gpt-5.4-mini" : model},
      {"model_config", axllm::object({{"temperature", 0}})},
  }));
  axllm::Value speech = client.speak(axllm::object({{"text", "Ax turns LLM prompts into typed programs."}, {"voice", "alloy"}, {"format", "mp3"}}));
  axllm::Value audio_value = axllm::Core::get(speech, "audio");
  double audio_len = audio_value.is_string() ? static_cast<double>(std::get<std::string>(audio_value.data).size()) : 0.0;
  std::cout << axllm::stringify(axllm::object({{"format", axllm::Core::get(speech, "format")}, {"audioBytesBase64", audio_len}})) << "\n";
}

C++ Speech To Text

Transcribes a checked-in WAV file through OpenAI.

Provider: openai
Env: OPENAI_API_KEY, OPENAI_APIKEY
Level: intermediate
Run: npm run example -- cpp src/examples/cpp/audio/transcribe_audio.cpp
Source: src/examples/cpp/audio/transcribe_audio.cpp

C++

#include "axllm/axllm.hpp"
#include <cstdlib>
#include <fstream>
#include <iostream>
#include <sstream>
#include <string>


// transcribe() expects the audio as a base64 string (same contract as the
// TypeScript/Python/Go/Java examples). C++ has no standard base64, so encode here.
static std::string b64encode(const std::string& in) {
  static const char t[] =
      "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
  std::string out;
  int val = 0, bits = -6;
  for (unsigned char c : in) {
    val = (val << 8) + c;
    bits += 8;
    while (bits >= 0) {
      out.push_back(t[(val >> bits) & 0x3F]);
      bits -= 6;
    }
  }
  if (bits > -6) out.push_back(t[((val << 8) >> (bits + 8)) & 0x3F]);
  while (out.size() % 4) out.push_back('=');
  return out;
}


int main() {
  const char* key = std::getenv("OPENAI_API_KEY");
  if (key == nullptr || std::string(key).empty()) key = std::getenv("OPENAI_APIKEY");
  if (key == nullptr || std::string(key).empty()) {
    std::cerr << "Set OPENAI_API_KEY or OPENAI_APIKEY to run this example.\n";
    return 2;
  }
  const char* model = std::getenv("AX_OPENAI_MODEL");
  axllm::OpenAIResponsesClient client(axllm::object({
      {"api_key", key},
      {"model", model == nullptr || std::string(model).empty() ? "gpt-5.4-mini" : model},
      {"model_config", axllm::object({{"temperature", 0}})},
  }));
  std::ifstream file("src/examples/assets/presentation.wav", std::ios::binary);
  std::ostringstream buffer;
  buffer << file.rdbuf();
  axllm::Value transcript = client.transcribe(axllm::object({{"audio", b64encode(buffer.str())}, {"language", "en"}, {"model", "gpt-4o-mini-transcribe"}, {"format", "json"}}));
  std::cout << axllm::stringify(transcript) << "\n";
}

C++ Audio Summary Pipeline

Transcribes audio and summarizes the transcript with an OpenAI-backed generator.

Provider: openai
Env: OPENAI_API_KEY, OPENAI_APIKEY
Level: advanced
Run: npm run example -- cpp src/examples/cpp/audio/pipeline_audio.cpp
Source: src/examples/cpp/audio/pipeline_audio.cpp

C++

#include "axllm/axllm.hpp"
#include <cstdlib>
#include <fstream>
#include <iostream>
#include <sstream>
#include <string>


// transcribe() expects the audio as a base64 string (same contract as the
// TypeScript/Python/Go/Java examples). C++ has no standard base64, so encode here.
static std::string b64encode(const std::string& in) {
  static const char t[] =
      "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
  std::string out;
  int val = 0, bits = -6;
  for (unsigned char c : in) {
    val = (val << 8) + c;
    bits += 8;
    while (bits >= 0) {
      out.push_back(t[(val >> bits) & 0x3F]);
      bits -= 6;
    }
  }
  if (bits > -6) out.push_back(t[((val << 8) >> (bits + 8)) & 0x3F]);
  while (out.size() % 4) out.push_back('=');
  return out;
}


int main() {
  const char* key = std::getenv("OPENAI_API_KEY");
  if (key == nullptr || std::string(key).empty()) key = std::getenv("OPENAI_APIKEY");
  if (key == nullptr || std::string(key).empty()) {
    std::cerr << "Set OPENAI_API_KEY or OPENAI_APIKEY to run this example.\n";
    return 2;
  }
  const char* model = std::getenv("AX_OPENAI_MODEL");
  axllm::OpenAIResponsesClient client(axllm::object({
      {"api_key", key},
      {"model", model == nullptr || std::string(model).empty() ? "gpt-5.4-mini" : model},
      {"model_config", axllm::object({{"temperature", 0}})},
  }));
  std::ifstream file("src/examples/assets/presentation.wav", std::ios::binary);
  std::ostringstream buffer;
  buffer << file.rdbuf();
  axllm::Value transcript = client.transcribe(axllm::object({{"audio", b64encode(buffer.str())}, {"language", "en"}, {"model", "gpt-4o-mini-transcribe"}, {"format", "json"}}));
  axllm::OpenAICompatibleClient text_client(axllm::object({{"api_key", key}, {"model", model == nullptr || std::string(model).empty() ? "gpt-5.4-mini" : model}, {"model_config", axllm::object({{"temperature", 0}})}}));
  axllm::AxGen summarize = axllm::ax("transcript:string -> summary:string, followUps:string[]");
  axllm::Value result = summarize.forward(text_client, axllm::object({{"transcript", axllm::Core::get(transcript, "text")}}));
  std::cout << axllm::stringify(axllm::object({{"transcript", axllm::Core::get(transcript, "text")}, {"result", result}})) << "\n";
}