Audio Transcription API
This guide provides instructions for Node.js developers on how to integrate and utilize the API to submit audio files for transcription and retrieve results.
Before you start integrating the API, ensure you have obtained a valid API token. This token is necessary for authenticating your requests.
Setup
Install the required packages, axios for making HTTP requests and form-data to handle multipart/form-data which is necessary for file uploads.
npm install axios form-data
Authentication
To authenticate requests to the API, include your bearer token in the headers of each request:
const headers = {
Authorization: `Bearer ${token}`, // Replace `${token}` with your actual API token
};
Transcribing Audio Files
The API provides two methods for submitting audio for transcription:
Method 1: File Upload (Recommended)
Use this endpoint to transcribe audio files by uploading them directly:
POST /v2/audio/transcribe_file
Request Parameters
- audio_files (required): One or more audio files to transcribe
- language (required): Language code for transcription (e.g.,
de,fr,it,en,es,de-CH) - glossary (optional): Array of glossary backend names to bias the transcription
- patient_context_id (optional): Patient context identifier to provide additional information for transcription
Supported Audio Formats
- MP3
- WAV
- WebM
- MP4
- M4A
Example Code
Here's a Node.js example using axios and form-data to submit an audio file:
const axios = require("axios");
const FormData = require("form-data");
const fs = require("fs");
async function transcribeAudioFile(filePath, token, options = {}) {
const url = `https://api.44ai.ch/v2/audio/transcribe_file`;
const formData = new FormData();
// Add the audio file(s)
formData.append("audio_files", fs.createReadStream(filePath));
// Add required language parameter
formData.append("language", options.language || "de-CH");
// Add optional parameters
if (options.glossary) {
formData.append("glossary", JSON.stringify(options.glossary));
}
if (options.patientContextId) {
formData.append("patient_context_id", options.patientContextId);
}
try {
const response = await axios.post(url, formData, {
headers: {
...formData.getHeaders(),
Authorization: `Bearer ${token}`,
},
});
console.log("Transcription queued:", response.data);
return response.data;
} catch (error) {
console.error("Error:", error.response?.data || error.message);
throw error;
}
}
// Usage
transcribeAudioFile("./recording.m4a", "your-api-token", {
language: "de-CH",
glossary: ["medical-terms", "cardiology"],
patientContextId: "patient-123",
});
Method 2: Base64 Encoding
Use this endpoint to transcribe base64-encoded audio:
POST /v2/audio/transcribe_base64
Request Body
{
"file_base64": "base64-encoded-audio-data",
"file_name": "recording.m4a",
"language": "de-CH",
"glossary": ["medical-terms"],
"additional_audio_base64": ["base64-encoded-audio-2"],
"patient_context_id": "patient-123"
}
Parameters
- file_base64 (required): Base64 encoded audio payload
- file_name (required): Original file name including extension (e.g., "recording.m4a")
- language (required): Language hint for transcription (e.g.,
de,fr,it,en,es,de-CH) - glossary (optional): List of glossary names to bias the transcription
- additional_audio_base64 (optional): Additional audio payloads that will be concatenated before transcription
- patient_context_id (optional): Patient context to provide additional information
Example Code
async function transcribeAudioBase64(
audioBase64,
fileName,
token,
options = {}
) {
const url = `https://api.44ai.ch/v2/audio/transcribe_base64`;
const requestBody = {
file_base64: audioBase64,
file_name: fileName,
language: options.language || "de-CH",
glossary: options.glossary,
additional_audio_base64: options.additionalAudio,
patient_context_id: options.patientContextId,
};
try {
const response = await axios.post(url, requestBody, {
headers: {
"Content-Type": "application/json",
Authorization: `Bearer ${token}`,
},
});
console.log("Transcription queued:", response.data);
return response.data;
} catch (error) {
console.error("Error:", error.response?.data || error.message);
throw error;
}
}
// Usage
const fs = require("fs");
const audioBuffer = fs.readFileSync("./recording.m4a");
const audioBase64 = audioBuffer.toString("base64");
transcribeAudioBase64(audioBase64, "recording.m4a", "your-api-token", {
language: "de-CH",
glossary: ["medical-terms"],
});
Live Transcription (Audio Chunks)
For real-time or streaming transcription scenarios, use the audio chunk endpoint:
POST /v2/audio/audio_chunk
Starting a Session
To start a new live transcription session, use "NEW" as the task_id:
async function startLiveTranscription(audioChunk, token, parameters) {
const url = `https://api.44ai.ch/v2/audio/audio_chunk`;
const requestBody = {
audio_base64: audioChunk,
sample_rate: 16000,
file_format: "webm",
task_id: "NEW",
final: false,
parameters: {
template: "de-internal-medicine",
language: "de-CH",
glossary: ["medical-terms"],
},
};
try {
const response = await axios.post(url, requestBody, {
headers: {
"Content-Type": "application/json",
Authorization: `Bearer ${token}`,
},
});
console.log("Session started:", response.data);
return response.data.task_id;
} catch (error) {
console.error("Error:", error.response?.data || error.message);
throw error;
}
}
Sending Subsequent Chunks
Once you have a task_id, send subsequent chunks using that ID:
async function sendAudioChunk(audioChunk, taskId, token, isFinal = false) {
const url = `https://api.44ai.ch/v2/audio/audio_chunk`;
const requestBody = {
audio_base64: audioChunk,
sample_rate: 16000,
file_format: "webm",
task_id: taskId,
final: isFinal,
parameters: {
template: "de-internal-medicine",
language: "de-CH",
},
};
try {
const response = await axios.post(url, requestBody, {
headers: {
"Content-Type": "application/json",
Authorization: `Bearer ${token}`,
},
});
return response.data;
} catch (error) {
console.error("Error:", error.response?.data || error.message);
throw error;
}
}
Audio Chunk Parameters
- audio_base64 (required): Base64 encoded chunk of audio data
- sample_rate (required): Sample rate of the audio (e.g., 16000, 44100)
- file_format (required): Format of the audio (mp3, wav, webm, mp4)
- task_id (required): Use
"NEW"to start a session, then use the returned task_id for subsequent chunks - final (required): Set to
truefor the last chunk of the session - parameters (required): Consultation parameters including template and language
Handling Responses
Successful Submission
A successful submission will return HTTP status 200 with a response containing the task_id:
{
"task_id": "3jfck3qipv6l5g8",
"status": "QUEUED",
"percentage": 0,
"started_at": "2024-11-11T10:30:00Z",
"completed_at": null,
"status_changes_at": {
"CREATED": "2024-11-11T10:30:00Z"
}
}
Error Responses
400 Bad Request
Returned when the request is malformed:
"No audio file provided"
"File name is required"
"Invalid base64 audio file"
"Could not decode base64 audio file"
401 Unauthorized
Returned when the API token is invalid or missing.
404 Not Found (Audio Chunks)
For the audio chunk endpoint, returned when the specified task_id is not found:
"Task not found"
Polling for Task Completion
To check the status of a transcription task, use the tasks endpoint:
POST /v2/tasks
Request Body
{
"task_id": "3jfck3qipv6l5g8"
}
Example Code
async function checkTaskStatus(taskId, token) {
const url = `https://api.44ai.ch/v2/tasks`;
try {
const response = await axios.post(
url,
{ task_id: taskId },
{
headers: {
"Content-Type": "application/json",
Authorization: `Bearer ${token}`,
},
}
);
console.log("Task Status:", response.data);
return response.data;
} catch (error) {
console.error("Error:", error.response?.data || error.message);
throw error;
}
}
Task Statuses
The task progresses through several statuses:
- CREATED: Task has been created
- QUEUED: Task is waiting in the queue
- STARTED: Task processing has begun
- AUDIO_PREPROCESSING: Audio is being preprocessed
- AUDIO_PROCESSING: Audio is being transcribed
- TRANSCRIPTION_DONE: Transcription is complete
- DONE: Task is fully complete and results are available
- FAILED: Task failed due to an error
Polling Example
Keep polling the endpoint until the status is DONE:
async function waitForTranscription(taskId, token, maxAttempts = 60) {
for (let i = 0; i < maxAttempts; i++) {
const result = await checkTaskStatus(taskId, token);
if (result.status === "DONE") {
console.log("Transcription complete!");
return result.result;
}
if (result.status === "FAILED") {
throw new Error("Transcription failed");
}
console.log(`Progress: ${result.percentage}% - Status: ${result.status}`);
// Wait 2 seconds before next poll
await new Promise((resolve) => setTimeout(resolve, 2000));
}
throw new Error("Transcription timed out");
}
Task Result
When the task status is DONE, the result will include the transcript:
{
"task_id": "3jfck3qipv6l5g8",
"status": "DONE",
"percentage": 100,
"started_at": "2024-11-11T10:30:00Z",
"completed_at": "2024-11-11T10:32:15Z",
"result": {
"bubbles": [
{
"text": "Patient presents with headache and fever.",
"start": 0.0,
"end": 3.5
},
{
"text": "Symptoms began two days ago.",
"start": 3.5,
"end": 6.2
}
]
}
}
Each bubble in the transcript contains:
- text: The transcribed text
- start: Start time in seconds
- end: End time in seconds
Once a task is marked as DONE or FAILED and retrieved via the /v2/tasks endpoint, it will no longer be available for subsequent fetches. Make sure to save the results when you retrieve them.