VoiceHub
Stream bidirectional PCM audio over SignalR for real-time voice agent sessions. Full method and event reference.
VoiceHub
VoiceHub provides bidirectional PCM audio streaming between a client and a Voisnap voice agent. It is the core real-time interface for custom telephony integrations and direct audio streaming.
Connection
wss://api.voisnap.ai/hubs/voice?agentId={agentId}&access_token={jwt}
Required query parameters:
| Parameter | Description |
|---|---|
agentId | The agent that will handle this session |
access_token | JWT access token |
Optional query parameters:
| Parameter | Description |
|---|---|
sessionId | Resume a specific session (for reconnection) |
metadata | URL-encoded JSON of arbitrary key-value pairs |
Audio format
| Direction | Format | Sample Rate | Bit Depth | Channels |
|---|---|---|---|---|
| Client → Server (input) | Raw PCM (little-endian) | 16,000 Hz | 16-bit | Mono |
| Server → Client (output) | Raw PCM (little-endian) | 24,000 Hz | 16-bit | Mono |
Send audio in chunks of 20ms (640 bytes at 16kHz/16-bit/mono) for lowest latency.
Client → Server methods
SendAudioAsync
Send a chunk of raw PCM audio from the user's microphone.
await connection.invoke("SendAudioAsync", audioChunkBase64);
The argument is a base64-encoded byte array of raw PCM audio.
RequestReconnectionAsync
Request a graceful reconnection (e.g., on network instability).
await connection.invoke("RequestReconnectionAsync");
EndSessionAsync
End the voice session cleanly.
await connection.invoke("EndSessionAsync", "user_ended");
// Reason options: "user_ended", "timeout", "transfer_complete"
SendDtmfAsync
Send a DTMF tone (e.g., when the user presses a keypad button).
await connection.invoke("SendDtmfAsync", "5"); // single digit 0-9, *, #
Server → Client events
AudioReceived
Agent's TTS response audio chunk. Buffer and play immediately.
connection.on("AudioReceived", (audioBase64) => {
const bytes = base64ToArrayBuffer(audioBase64);
audioPlayer.enqueue(bytes); // your PCM playback buffer
});
Transcription
Partial (streaming) transcription of the user's speech.
connection.on("Transcription", (data) => {
// data: { text: string, isFinal: boolean, confidence: number }
if (data.isFinal) {
displayFinalTranscription(data.text);
} else {
displayPartialTranscription(data.text);
}
});
TranscriptionOf
Agent's speech transcription (what the agent said).
connection.on("TranscriptionOf", (data) => {
// data: { text: string, isFinal: boolean }
displayAgentSubtitle(data.text);
});
SessionStarted
connection.on("SessionStarted", (data) => {
// data: { conversationId: string, agentName: string, timestamp: string }
console.log(`Session started: ${data.conversationId}`);
});
SessionEnded
connection.on("SessionEnded", (data) => {
// data: { conversationId: string, reason: string, durationSeconds: number }
console.log(`Session ended after ${data.durationSeconds}s: ${data.reason}`);
connection.stop();
});
TransferInitiated
connection.on("TransferInitiated", (data) => {
// data: { transferTo: string, transferType: string, summary: string }
console.log(`Transfer to ${data.transferTo}: ${data.summary}`);
});
Error
connection.on("Error", (data) => {
// data: { code: string, message: string, recoverable: boolean }
console.error(`Hub error [${data.code}]: ${data.message}`);
if (!data.recoverable) connection.stop();
});
Reconnected
connection.on("Reconnected", (data) => {
// data: { sessionId: string, message: string }
console.log("Reconnected to session:", data.sessionId);
});
Full browser example
<!DOCTYPE html>
<html>
<head>
<title>Voisnap Voice Demo</title>
<script src="https://cdn.jsdelivr.net/npm/@microsoft/signalr@latest/dist/browser/signalr.min.js"></script>
</head>
<body>
<button id="startBtn">Start Call</button>
<button id="endBtn" disabled>End Call</button>
<div id="transcript"></div>
<script>
const AGENT_ID = 'agt_01HXK8Z3MNPQRS';
const ACCESS_TOKEN = 'eyJhbGci...'; // obtain from your auth flow
let connection;
let audioContext;
let mediaStream;
let processor;
let playbackBuffer = [];
let isPlaying = false;
// ---- Audio utilities ----
function arrayBufferToBase64(buffer) {
return btoa(String.fromCharCode(...new Uint8Array(buffer)));
}
function base64ToFloat32(base64) {
const binary = atob(base64);
const bytes = new Uint8Array(binary.length);
for (let i = 0; i < binary.length; i++) bytes[i] = binary.charCodeAt(i);
const int16 = new Int16Array(bytes.buffer);
const float32 = new Float32Array(int16.length);
for (let i = 0; i < int16.length; i++) float32[i] = int16[i] / 32768.0;
return float32;
}
function playAudioChunk(base64) {
const float32 = base64ToFloat32(base64);
const buffer = audioContext.createBuffer(1, float32.length, 24000);
buffer.copyToChannel(float32, 0);
const source = audioContext.createBufferSource();
source.buffer = buffer;
source.connect(audioContext.destination);
source.start();
}
// ---- Connection ----
async function startCall() {
audioContext = new AudioContext({ sampleRate: 16000 });
connection = new signalR.HubConnectionBuilder()
.withUrl(`wss://api.voisnap.ai/hubs/voice?agentId=${AGENT_ID}&access_token=${ACCESS_TOKEN}`)
.withAutomaticReconnect()
.build();
// Events
connection.on('SessionStarted', d => console.log('Session:', d.conversationId));
connection.on('SessionEnded', d => {
console.log('Ended:', d.reason);
stopCall();
});
connection.on('AudioReceived', base64 => {
const ctx = new AudioContext({ sampleRate: 24000 });
playAudioChunk(base64);
});
connection.on('Transcription', d => {
document.getElementById('transcript').textContent =
(d.isFinal ? '✓ ' : '… ') + d.text;
});
connection.on('Error', d => console.error(d.code, d.message));
await connection.start();
// Capture microphone
mediaStream = await navigator.mediaDevices.getUserMedia({ audio: {
sampleRate: 16000, channelCount: 1, echoCancellation: true, noiseSuppression: true
}});
const source = audioContext.createMediaStreamSource(mediaStream);
processor = audioContext.createScriptProcessor(320, 1, 1); // 20ms @ 16kHz
processor.onaudioprocess = (e) => {
const float32 = e.inputBuffer.getChannelData(0);
const int16 = new Int16Array(float32.length);
for (let i = 0; i < float32.length; i++) {
int16[i] = Math.max(-32768, Math.min(32767, float32[i] * 32768));
}
const base64 = arrayBufferToBase64(int16.buffer);
connection.invoke('SendAudioAsync', base64).catch(console.error);
};
source.connect(processor);
processor.connect(audioContext.destination);
document.getElementById('startBtn').disabled = true;
document.getElementById('endBtn').disabled = false;
}
async function stopCall() {
if (connection) {
await connection.invoke('EndSessionAsync', 'user_ended').catch(() => {});
await connection.stop();
}
if (processor) processor.disconnect();
if (mediaStream) mediaStream.getTracks().forEach(t => t.stop());
if (audioContext) audioContext.close();
document.getElementById('startBtn').disabled = false;
document.getElementById('endBtn').disabled = true;
}
document.getElementById('startBtn').addEventListener('click', startCall);
document.getElementById('endBtn').addEventListener('click', stopCall);
</script>
</body>
</html>
Latency tips
- Send audio in exactly 320 samples (20ms) chunks — smaller chunks increase overhead, larger add latency.
- Use
echoCancellation: trueandnoiseSuppression: trueingetUserMediaconstraints. - Keep your SignalR connection on a persistent WebSocket (avoid HTTP polling fallback).
- For lowest latency, use GPT-4o Mini as LLM and Deepgram Nova-2 for STT.