January 29, 2026

·

RunAnywhere Kotlin SDK Part 3: Text-to-Speech with Piper

RunAnywhere Kotlin SDK Part 3: Text-to-Speech with Piper
DEVELOPERS

Natural Voice Synthesis Entirely On-Device


This is Part 3 of our RunAnywhere Kotlin SDK tutorial series:

  1. Chat with LLMs — Project setup and streaming text generation
  2. Speech-to-Text — Real-time transcription with Whisper
  3. Text-to-Speech (this post) — Natural voice synthesis with Piper
  4. Voice Pipeline — Full voice assistant with VAD

Text-to-speech brings your app to life. With RunAnywhere, you can synthesize natural-sounding speech using Piper—completely on-device, with no network latency.

The SDK returns audio data that can be played using Android's AudioTrack for direct playback.

Prerequisites

  • Complete Part 1 first to set up your project with the RunAnywhere SDK
  • ~65MB additional storage for the Piper voice model

Register the TTS Voice

Add Piper to your model registration in RunAnywhereApp.kt:

kotlin
1import com.runanywhere.sdk.core.types.InferenceFramework
2import com.runanywhere.sdk.public.extensions.Models.ModelCategory
3import com.runanywhere.sdk.public.extensions.registerModel
4
5// Register TTS voice (Piper)
6RunAnywhere.registerModel(
7 id = "vits-piper-en_US-lessac-medium",
8 name = "Piper US English",
9 url = "https://github.com/RunanywhereAI/sherpa-onnx/releases/download/runanywhere-models-v1/vits-piper-en_US-lessac-medium.tar.gz",
10 framework = InferenceFramework.ONNX,
11 modality = ModelCategory.SPEECH_SYNTHESIS,
12 memoryRequirement = 65_000_000
13)

Important: Piper Output Format

Piper outputs audio in a specific format:

ParameterValue
Sample Rate22,050 Hz
Channels1 (mono)
FormatWAV audio bytes

The SDK handles the conversion internally and returns playable WAV data.

Audio Playback Service

Create TTSAudioPlayer.kt:

kotlin
1package com.example.localaiplayground.domain.services
2
3import android.media.AudioAttributes
4import android.media.AudioFormat
5import android.media.AudioManager
6import android.media.AudioTrack
7import android.util.Log
8import kotlinx.coroutines.Dispatchers
9import kotlinx.coroutines.withContext
10
11class TTSAudioPlayer {
12 companion object {
13 private const val TAG = "TTSAudioPlayer"
14 private const val SAMPLE_RATE = 22050 // Piper outputs at 22.05kHz
15 }
16
17 private var audioTrack: AudioTrack? = null
18
19 suspend fun playAudio(audioData: ByteArray) = withContext(Dispatchers.IO) {
20 try {
21 stop() // Stop any current playback
22
23 // Skip WAV header if present (first 44 bytes)
24 val pcmData = if (isWavFile(audioData)) {
25 audioData.copyOfRange(44, audioData.size)
26 } else {
27 audioData
28 }
29
30 val bufferSize = AudioTrack.getMinBufferSize(
31 SAMPLE_RATE,
32 AudioFormat.CHANNEL_OUT_MONO,
33 AudioFormat.ENCODING_PCM_16BIT
34 )
35
36 audioTrack = AudioTrack.Builder()
37 .setAudioAttributes(
38 AudioAttributes.Builder()
39 .setUsage(AudioAttributes.USAGE_MEDIA)
40 .setContentType(AudioAttributes.CONTENT_TYPE_SPEECH)
41 .build()
42 )
43 .setAudioFormat(
44 AudioFormat.Builder()
45 .setEncoding(AudioFormat.ENCODING_PCM_16BIT)
46 .setSampleRate(SAMPLE_RATE)
47 .setChannelMask(AudioFormat.CHANNEL_OUT_MONO)
48 .build()
49 )
50 .setBufferSizeInBytes(maxOf(bufferSize, pcmData.size))
51 .setTransferMode(AudioTrack.MODE_STATIC)
52 .build()
53
54 audioTrack?.apply {
55 write(pcmData, 0, pcmData.size)
56 play()
57
58 // Wait for playback to complete
59 val durationMs = (pcmData.size / 2 * 1000L) / SAMPLE_RATE
60 Thread.sleep(durationMs)
61
62 stop()
63 release()
64 }
65
66 Log.d(TAG, "Playback complete: ${pcmData.size} bytes")
67
68 } catch (e: Exception) {
69 Log.e(TAG, "Playback error", e)
70 throw e
71 }
72 }
73
74 fun stop() {
75 try {
76 audioTrack?.apply {
77 if (playState == AudioTrack.PLAYSTATE_PLAYING) {
78 stop()
79 }
80 release()
81 }
82 audioTrack = null
83 } catch (e: Exception) {
84 Log.w(TAG, "Error stopping playback", e)
85 }
86 }
87
88 private fun isWavFile(data: ByteArray): Boolean {
89 return data.size > 44 &&
90 data[0] == 'R'.code.toByte() &&
91 data[1] == 'I'.code.toByte() &&
92 data[2] == 'F'.code.toByte() &&
93 data[3] == 'F'.code.toByte()
94 }
95}

TTS ViewModel

Create TextToSpeechViewModel.kt:

kotlin
1package com.example.localaiplayground.presentation.tts
2
3import android.app.Application
4import androidx.lifecycle.AndroidViewModel
5import androidx.lifecycle.viewModelScope
6import com.example.localaiplayground.domain.services.TTSAudioPlayer
7import com.runanywhere.sdk.public.RunAnywhere
8import com.runanywhere.sdk.public.extensions.TTS.TTSOptions
9import com.runanywhere.sdk.public.extensions.availableModels
10import com.runanywhere.sdk.public.extensions.downloadModel
11import com.runanywhere.sdk.public.extensions.isTTSVoiceLoaded
12import com.runanywhere.sdk.public.extensions.loadTTSVoice
13import com.runanywhere.sdk.public.extensions.synthesize
14import com.runanywhere.sdk.public.extensions.unloadTTSVoice
15import kotlinx.coroutines.flow.*
16import kotlinx.coroutines.launch
17
18data class TTSUiState(
19 val isLoading: Boolean = true,
20 val isModelLoaded: Boolean = false,
21 val downloadProgress: Float = 0f,
22 val isSynthesizing: Boolean = false,
23 val speechRate: Float = 1.0f,
24 val pitch: Float = 1.0f,
25 val error: String? = null
26)
27
28class TextToSpeechViewModel(application: Application) : AndroidViewModel(application) {
29 private val _uiState = MutableStateFlow(TTSUiState())
30 val uiState: StateFlow<TTSUiState> = _uiState.asStateFlow()
31
32 private val audioPlayer = TTSAudioPlayer()
33 private val modelId = "vits-piper-en_US-lessac-medium"
34
35 init {
36 loadModel()
37 }
38
39 private fun loadModel() {
40 viewModelScope.launch {
41 try {
42 val models = RunAnywhere.availableModels()
43 val isDownloaded = models.any { it.id == modelId && it.localPath != null }
44
45 if (!isDownloaded) {
46 RunAnywhere.downloadModel(modelId).collect { progress ->
47 _uiState.update {
48 it.copy(downloadProgress = progress.progress)
49 }
50 }
51 }
52
53 // Load TTS voice
54 RunAnywhere.loadTTSVoice(modelId)
55
56 _uiState.update {
57 it.copy(isLoading = false, isModelLoaded = true)
58 }
59
60 } catch (e: Exception) {
61 _uiState.update {
62 it.copy(isLoading = false, error = e.message)
63 }
64 }
65 }
66 }
67
68 fun setSpeechRate(rate: Float) {
69 _uiState.update { it.copy(speechRate = rate) }
70 }
71
72 fun setPitch(pitch: Float) {
73 _uiState.update { it.copy(pitch = pitch) }
74 }
75
76 fun synthesizeAndPlay(text: String) {
77 if (text.isBlank() || _uiState.value.isSynthesizing) return
78
79 viewModelScope.launch {
80 _uiState.update { it.copy(isSynthesizing = true, error = null) }
81
82 try {
83 val options = TTSOptions(
84 rate = _uiState.value.speechRate,
85 pitch = _uiState.value.pitch,
86 )
87
88 val result = RunAnywhere.synthesize(text, options)
89
90 // result.audioData contains WAV bytes
91 audioPlayer.playAudio(result.audioData)
92
93 } catch (e: Exception) {
94 _uiState.update { it.copy(error = e.message) }
95 } finally {
96 _uiState.update { it.copy(isSynthesizing = false) }
97 }
98 }
99 }
100
101 override fun onCleared() {
102 super.onCleared()
103 audioPlayer.stop()
104 }
105}

TTS Screen

Create TextToSpeechScreen.kt:

kotlin
1package com.example.localaiplayground.presentation.tts
2
3import androidx.compose.foundation.background
4import androidx.compose.foundation.layout.*
5import androidx.compose.foundation.shape.RoundedCornerShape
6import androidx.compose.material3.*
7import androidx.compose.runtime.*
8import androidx.compose.ui.Alignment
9import androidx.compose.ui.Modifier
10import androidx.compose.ui.graphics.Color
11import androidx.compose.ui.unit.dp
12import androidx.lifecycle.viewmodel.compose.viewModel
13
14@Composable
15fun TextToSpeechScreen(
16 viewModel: TextToSpeechViewModel = viewModel()
17) {
18 val uiState by viewModel.uiState.collectAsState()
19 var inputText by remember {
20 mutableStateOf("Hello! This is text-to-speech running entirely on your device.")
21 }
22
23 Column(
24 modifier = Modifier
25 .fillMaxSize()
26 .background(Color.Black)
27 .padding(24.dp)
28 ) {
29 // Loading state
30 if (uiState.isLoading) {
31 Box(
32 modifier = Modifier.fillMaxSize(),
33 contentAlignment = Alignment.Center
34 ) {
35 Column(horizontalAlignment = Alignment.CenterHorizontally) {
36 CircularProgressIndicator()
37 Spacer(modifier = Modifier.height(16.dp))
38 Text(
39 "Downloading voice model... ${(uiState.downloadProgress * 100).toInt()}%",
40 color = Color.White
41 )
42 LinearProgressIndicator(
43 progress = { uiState.downloadProgress },
44 modifier = Modifier
45 .fillMaxWidth()
46 .padding(top = 8.dp)
47 )
48 }
49 }
50 return
51 }
52
53 // Text input
54 OutlinedTextField(
55 value = inputText,
56 onValueChange = { inputText = it },
57 modifier = Modifier
58 .fillMaxWidth()
59 .height(150.dp),
60 label = { Text("Text to speak") },
61 colors = OutlinedTextFieldDefaults.colors(
62 focusedTextColor = Color.White,
63 unfocusedTextColor = Color.White,
64 focusedLabelColor = Color.White,
65 unfocusedLabelColor = Color.Gray,
66 focusedBorderColor = Color(0xFF007AFF),
67 unfocusedBorderColor = Color.Gray
68 )
69 )
70
71 Spacer(modifier = Modifier.height(24.dp))
72
73 // Speed slider
74 Text(
75 "Speed: ${String.format("%.1f", uiState.speechRate)}x",
76 color = Color.White
77 )
78 Slider(
79 value = uiState.speechRate,
80 onValueChange = { viewModel.setSpeechRate(it) },
81 valueRange = 0.5f..2.0f,
82 steps = 15,
83 modifier = Modifier.fillMaxWidth()
84 )
85
86 Spacer(modifier = Modifier.height(16.dp))
87
88 // Pitch slider
89 Text(
90 "Pitch: ${String.format("%.1f", uiState.pitch)}",
91 color = Color.White
92 )
93 Slider(
94 value = uiState.pitch,
95 onValueChange = { viewModel.setPitch(it) },
96 valueRange = 0.5f..1.5f,
97 steps = 10,
98 modifier = Modifier.fillMaxWidth()
99 )
100
101 Spacer(modifier = Modifier.height(32.dp))
102
103 // Speak button
104 Button(
105 onClick = { viewModel.synthesizeAndPlay(inputText) },
106 modifier = Modifier
107 .fillMaxWidth()
108 .height(56.dp),
109 enabled = uiState.isModelLoaded && !uiState.isSynthesizing && inputText.isNotBlank(),
110 shape = RoundedCornerShape(12.dp)
111 ) {
112 if (uiState.isSynthesizing) {
113 CircularProgressIndicator(
114 modifier = Modifier.size(24.dp),
115 color = Color.White,
116 strokeWidth = 2.dp
117 )
118 Spacer(modifier = Modifier.width(8.dp))
119 Text("Synthesizing...")
120 } else {
121 Text("🔊 Speak")
122 }
123 }
124
125 // Error display
126 uiState.error?.let { error ->
127 Spacer(modifier = Modifier.height(16.dp))
128 Text(
129 text = error,
130 color = Color.Red,
131 modifier = Modifier.padding(8.dp)
132 )
133 }
134 }
135}
Text-to-speech on Android

Memory Management

When you're done with TTS, unload the voice:

kotlin
1// Unload TTS voice
2RunAnywhere.unloadTTSVoice()

TTS voices can be loaded independently alongside the LLM and STT models—they don't conflict.

Models Reference

Model IDSizeNotes
vits-piper-en_US-lessac-medium~65MBNatural US English

What's Next

In Part 4, we'll combine everything into a complete voice assistant with automatic Voice Activity Detection.


Resources


Questions? Open an issue on GitHub or reach out on Twitter/X.

RunAnywhere Logo

RunAnywhere

Connect with developers, share ideas, get support, and stay updated on the latest features. Our Discord community is the heart of everything we build.

Company

Copyright © 2025 RunAnywhere, Inc.