Voisnap Docs
Real-Time APIs

VoiceHub

Stream bidirectional PCM audio over SignalR for real-time voice agent sessions. Full method and event reference.

VoiceHub

VoiceHub provides bidirectional PCM audio streaming between a client and a Voisnap voice agent. It is the core real-time interface for custom telephony integrations and direct audio streaming.


Connection

wss://api.voisnap.ai/hubs/voice?agentId={agentId}&access_token={jwt}

Required query parameters:

ParameterDescription
agentIdThe agent that will handle this session
access_tokenJWT access token

Optional query parameters:

ParameterDescription
sessionIdResume a specific session (for reconnection)
metadataURL-encoded JSON of arbitrary key-value pairs

Audio format

DirectionFormatSample RateBit DepthChannels
Client → Server (input)Raw PCM (little-endian)16,000 Hz16-bitMono
Server → Client (output)Raw PCM (little-endian)24,000 Hz16-bitMono

Send audio in chunks of 20ms (640 bytes at 16kHz/16-bit/mono) for lowest latency.


Client → Server methods

SendAudioAsync

Send a chunk of raw PCM audio from the user's microphone.

await connection.invoke("SendAudioAsync", audioChunkBase64);

The argument is a base64-encoded byte array of raw PCM audio.

RequestReconnectionAsync

Request a graceful reconnection (e.g., on network instability).

await connection.invoke("RequestReconnectionAsync");

EndSessionAsync

End the voice session cleanly.

await connection.invoke("EndSessionAsync", "user_ended");
// Reason options: "user_ended", "timeout", "transfer_complete"

SendDtmfAsync

Send a DTMF tone (e.g., when the user presses a keypad button).

await connection.invoke("SendDtmfAsync", "5"); // single digit 0-9, *, #

Server → Client events

AudioReceived

Agent's TTS response audio chunk. Buffer and play immediately.

connection.on("AudioReceived", (audioBase64) => {
  const bytes = base64ToArrayBuffer(audioBase64);
  audioPlayer.enqueue(bytes); // your PCM playback buffer
});

Transcription

Partial (streaming) transcription of the user's speech.

connection.on("Transcription", (data) => {
  // data: { text: string, isFinal: boolean, confidence: number }
  if (data.isFinal) {
    displayFinalTranscription(data.text);
  } else {
    displayPartialTranscription(data.text);
  }
});

TranscriptionOf

Agent's speech transcription (what the agent said).

connection.on("TranscriptionOf", (data) => {
  // data: { text: string, isFinal: boolean }
  displayAgentSubtitle(data.text);
});

SessionStarted

connection.on("SessionStarted", (data) => {
  // data: { conversationId: string, agentName: string, timestamp: string }
  console.log(`Session started: ${data.conversationId}`);
});

SessionEnded

connection.on("SessionEnded", (data) => {
  // data: { conversationId: string, reason: string, durationSeconds: number }
  console.log(`Session ended after ${data.durationSeconds}s: ${data.reason}`);
  connection.stop();
});

TransferInitiated

connection.on("TransferInitiated", (data) => {
  // data: { transferTo: string, transferType: string, summary: string }
  console.log(`Transfer to ${data.transferTo}: ${data.summary}`);
});

Error

connection.on("Error", (data) => {
  // data: { code: string, message: string, recoverable: boolean }
  console.error(`Hub error [${data.code}]: ${data.message}`);
  if (!data.recoverable) connection.stop();
});

Reconnected

connection.on("Reconnected", (data) => {
  // data: { sessionId: string, message: string }
  console.log("Reconnected to session:", data.sessionId);
});

Full browser example

<!DOCTYPE html>
<html>
<head>
  <title>Voisnap Voice Demo</title>
  <script src="https://cdn.jsdelivr.net/npm/@microsoft/signalr@latest/dist/browser/signalr.min.js"></script>
</head>
<body>
  <button id="startBtn">Start Call</button>
  <button id="endBtn" disabled>End Call</button>
  <div id="transcript"></div>
 
  <script>
    const AGENT_ID = 'agt_01HXK8Z3MNPQRS';
    const ACCESS_TOKEN = 'eyJhbGci...'; // obtain from your auth flow
 
    let connection;
    let audioContext;
    let mediaStream;
    let processor;
    let playbackBuffer = [];
    let isPlaying = false;
 
    // ---- Audio utilities ----
    function arrayBufferToBase64(buffer) {
      return btoa(String.fromCharCode(...new Uint8Array(buffer)));
    }
 
    function base64ToFloat32(base64) {
      const binary = atob(base64);
      const bytes = new Uint8Array(binary.length);
      for (let i = 0; i < binary.length; i++) bytes[i] = binary.charCodeAt(i);
      const int16 = new Int16Array(bytes.buffer);
      const float32 = new Float32Array(int16.length);
      for (let i = 0; i < int16.length; i++) float32[i] = int16[i] / 32768.0;
      return float32;
    }
 
    function playAudioChunk(base64) {
      const float32 = base64ToFloat32(base64);
      const buffer = audioContext.createBuffer(1, float32.length, 24000);
      buffer.copyToChannel(float32, 0);
      const source = audioContext.createBufferSource();
      source.buffer = buffer;
      source.connect(audioContext.destination);
      source.start();
    }
 
    // ---- Connection ----
    async function startCall() {
      audioContext = new AudioContext({ sampleRate: 16000 });
 
      connection = new signalR.HubConnectionBuilder()
        .withUrl(`wss://api.voisnap.ai/hubs/voice?agentId=${AGENT_ID}&access_token=${ACCESS_TOKEN}`)
        .withAutomaticReconnect()
        .build();
 
      // Events
      connection.on('SessionStarted', d => console.log('Session:', d.conversationId));
      connection.on('SessionEnded', d => {
        console.log('Ended:', d.reason);
        stopCall();
      });
      connection.on('AudioReceived', base64 => {
        const ctx = new AudioContext({ sampleRate: 24000 });
        playAudioChunk(base64);
      });
      connection.on('Transcription', d => {
        document.getElementById('transcript').textContent =
          (d.isFinal ? '✓ ' : '… ') + d.text;
      });
      connection.on('Error', d => console.error(d.code, d.message));
 
      await connection.start();
 
      // Capture microphone
      mediaStream = await navigator.mediaDevices.getUserMedia({ audio: {
        sampleRate: 16000, channelCount: 1, echoCancellation: true, noiseSuppression: true
      }});
 
      const source = audioContext.createMediaStreamSource(mediaStream);
      processor = audioContext.createScriptProcessor(320, 1, 1); // 20ms @ 16kHz
 
      processor.onaudioprocess = (e) => {
        const float32 = e.inputBuffer.getChannelData(0);
        const int16 = new Int16Array(float32.length);
        for (let i = 0; i < float32.length; i++) {
          int16[i] = Math.max(-32768, Math.min(32767, float32[i] * 32768));
        }
        const base64 = arrayBufferToBase64(int16.buffer);
        connection.invoke('SendAudioAsync', base64).catch(console.error);
      };
 
      source.connect(processor);
      processor.connect(audioContext.destination);
 
      document.getElementById('startBtn').disabled = true;
      document.getElementById('endBtn').disabled = false;
    }
 
    async function stopCall() {
      if (connection) {
        await connection.invoke('EndSessionAsync', 'user_ended').catch(() => {});
        await connection.stop();
      }
      if (processor) processor.disconnect();
      if (mediaStream) mediaStream.getTracks().forEach(t => t.stop());
      if (audioContext) audioContext.close();
 
      document.getElementById('startBtn').disabled = false;
      document.getElementById('endBtn').disabled = true;
    }
 
    document.getElementById('startBtn').addEventListener('click', startCall);
    document.getElementById('endBtn').addEventListener('click', stopCall);
  </script>
</body>
</html>

Latency tips

  • Send audio in exactly 320 samples (20ms) chunks — smaller chunks increase overhead, larger add latency.
  • Use echoCancellation: true and noiseSuppression: true in getUserMedia constraints.
  • Keep your SignalR connection on a persistent WebSocket (avoid HTTP polling fallback).
  • For lowest latency, use GPT-4o Mini as LLM and Deepgram Nova-2 for STT.

On this page