Convert text to natural-sounding speech audio
The speech endpoint converts text into audio using AI-powered voices.
POST /v1/talk/speechRequires a do_live_* API key with the talk:speech scope.
Authorization: Bearer <key> header (recommended)X-API-Key headerSee Authentication for details.
| Parameter | Type | Required | Default | Description |
|---|---|---|---|---|
text | string | Yes | - | The text to convert to speech (max 10,000 characters) |
voice | string | No | asteria | Voice ID to use (see Voices) |
format | string | No | mp3 | Output format: mp3, wav, flac, aac, opus |
speed | number | No | 1.0 | Speaking speed (0.5 to 2.0) |
sampleRate | number | No | varies | Sample rate in Hz (8000, 16000, 24000, 48000) |
customVoiceId | string | No | - | ElevenLabs voice ID for custom/cloned voices |
curl -X POST "https://api.do.dev/v1/talk/speech" \
-H "Authorization: Bearer do_live_your_key_here" \
-H "Content-Type: application/json" \
-d '{
"text": "Welcome to the Talk API! This is a demonstration of text-to-speech.",
"voice": "aria",
"format": "mp3",
"speed": 1.0
}' \
--output output.mp3Returns the audio file directly as binary data.
Response Headers:
| Header | Description |
|---|---|
Content-Type | Audio MIME type (e.g., audio/mpeg for MP3) |
X-Characters-Used | Number of characters processed |
X-Audio-Duration | Audio duration in seconds (Deepgram voices only) |
Missing or invalid text:
{
"error": "'text' is required and must be a string"
}Text too long:
{
"error": "'text' exceeds maximum length of 10,000 characters"
}Invalid voice:
{
"error": "Invalid voice: unknown. Valid voices: thalia, helena, aria, ..."
}Invalid speed:
{
"error": "'speed' must be a number between 0.5 and 2.0"
}{
"error": "API key required. Use Authorization: Bearer <key> or X-API-Key header."
}{
"error": "Rate limit exceeded"
}| Voice ID | Display Name | Gender | Accent | Description |
|---|---|---|---|---|
thalia | Zara | Female | American | Clear, Confident, Energetic |
helena | Grace | Female | American | Caring, Natural, Friendly |
aria | Claire | Female | American | Warm, Professional, Expressive |
cora | Sage | Female | American | Smooth, Calm, Soothing |
emma | Victoria | Female | British | Elegant, Refined, Clear |
evelyn | Maya | Female | American | Warm, Empathetic, Approachable |
apollo | Max | Male | American | Confident, Casual, Comfortable |
orion | Drake | Male | American | Deep, Authoritative, Clear |
theo | Finn | Male | American | Friendly, Natural, Warm |
marcus | Blake | Male | American | Professional, Confident, Clear |
james | Oliver | Male | British | Refined, Articulate, Warm |
| Voice ID | Display Name | Gender | Accent | Description |
|---|---|---|---|---|
asteria | Echo | Female | American | Classic, Clear |
luna | Serena | Female | American | Soft, Gentle |
stella | Nova | Female | American | Bright, Energetic |
athena | Iris | Female | British | Sophisticated, Wise |
zeus | Atlas | Male | American | Commanding, Strong |
orpheus | Phoenix | Male | American | Smooth, Melodic |
For custom/cloned voices, pass the ElevenLabs voice ID:
curl -X POST "https://api.do.dev/v1/talk/speech" \
-H "Authorization: Bearer do_live_your_key_here" \
-H "Content-Type: application/json" \
-d '{
"text": "Hello from my custom voice!",
"customVoiceId": "your_elevenlabs_voice_id"
}' \
--output custom.mp3| Format | Content-Type | Description |
|---|---|---|
mp3 | audio/mpeg | Most compatible, good compression |
wav | audio/wav | Uncompressed, highest quality |
flac | audio/flac | Lossless compression |
aac | audio/aac | Good for Apple devices |
opus | audio/opus | Web-optimized, efficient |
async function generateSpeech(text, voice = "aria") {
const response = await fetch("https://api.do.dev/v1/talk/speech", {
method: "POST",
headers: {
"Authorization": `Bearer ${API_KEY}`,
"Content-Type": "application/json"
},
body: JSON.stringify({ text, voice })
});
if (!response.ok) {
const error = await response.json();
throw new Error(error.error);
}
// Play audio in browser
const blob = await response.blob();
const url = URL.createObjectURL(blob);
const audio = new Audio(url);
audio.play();
return audio;
}
await generateSpeech("Hello, this is a test!", "aria");import requests
import os
def generate_speech(text, voice="aria", format="mp3"):
response = requests.post(
"https://api.do.dev/v1/talk/speech",
headers={
"Authorization": f"Bearer {os.environ['DO_API_KEY']}",
"Content-Type": "application/json"
},
json={
"text": text,
"voice": voice,
"format": format
}
)
response.raise_for_status()
return response.content
audio = generate_speech("Hello, this is a test!")
with open("output.mp3", "wb") as f:
f.write(audio)import fs from "fs";
async function generateSpeech(text, voice = "aria", format = "mp3") {
const response = await fetch("https://api.do.dev/v1/talk/speech", {
method: "POST",
headers: {
"Authorization": `Bearer ${process.env.DO_API_KEY}`,
"Content-Type": "application/json"
},
body: JSON.stringify({ text, voice, format })
});
if (!response.ok) {
const error = await response.json();
throw new Error(error.error);
}
return Buffer.from(await response.arrayBuffer());
}
const audio = await generateSpeech("Hello, this is a test!");
fs.writeFileSync("output.mp3", audio);