Enable to stop TTS generation (#1041)

2024-06-22 18:18:36 +08:00
parent 96ab843173
commit 9dd0e03568
32 changed files with 249 additions and 70 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -8,7 +8,7 @@ project(sherpa-onnx)
 # ./nodejs-addon-examples
 # ./dart-api-examples/
 # ./sherpa-onnx/flutter/CHANGELOG.md
-set(SHERPA_ONNX_VERSION "1.10.0")
+set(SHERPA_ONNX_VERSION "1.10.1")

 # Disable warning about
 #
--- a/android/SherpaOnnxTts/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.kt
+++ b/android/SherpaOnnxTts/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.kt
@@ -26,6 +26,9 @@ class MainActivity : AppCompatActivity() {
    private lateinit var speed: EditText
    private lateinit var generate: Button
    private lateinit var play: Button
+    private lateinit var stop: Button
+    private var stopped: Boolean = false
+    private var mediaPlayer: MediaPlayer? = null

    // see
    // https://developer.android.com/reference/kotlin/android/media/AudioTrack
@@ -49,9 +52,11 @@ class MainActivity : AppCompatActivity() {

        generate = findViewById(R.id.generate)
        play = findViewById(R.id.play)
+        stop = findViewById(R.id.stop)

        generate.setOnClickListener { onClickGenerate() }
        play.setOnClickListener { onClickPlay() }
+        stop.setOnClickListener { onClickStop() }

        sid.setText("0")
        speed.setText("1.0")
@@ -70,7 +75,7 @@ class MainActivity : AppCompatActivity() {
            AudioFormat.CHANNEL_OUT_MONO,
            AudioFormat.ENCODING_PCM_FLOAT
        )
-        Log.i(TAG, "sampleRate: ${sampleRate}, buffLength: ${bufLength}")
+        Log.i(TAG, "sampleRate: $sampleRate, buffLength: $bufLength")

        val attr = AudioAttributes.Builder().setContentType(AudioAttributes.CONTENT_TYPE_SPEECH)
            .setUsage(AudioAttributes.USAGE_MEDIA)
@@ -90,8 +95,14 @@ class MainActivity : AppCompatActivity() {
    }

    // this function is called from C++
-    private fun callback(samples: FloatArray) {
-        track.write(samples, 0, samples.size, AudioTrack.WRITE_BLOCKING)
+    private fun callback(samples: FloatArray): Int {
+        if (!stopped) {
+            track.write(samples, 0, samples.size, AudioTrack.WRITE_BLOCKING)
+            return 1
+        } else {
+            track.stop()
+            return 0
+        }
    }

    private fun onClickGenerate() {
@@ -127,6 +138,8 @@ class MainActivity : AppCompatActivity() {
        track.play()

        play.isEnabled = false
+        generate.isEnabled = false
+        stopped = false
        Thread {
            val audio = tts.generateWithCallback(
                text = textStr,
@@ -140,6 +153,7 @@ class MainActivity : AppCompatActivity() {
            if (ok) {
                runOnUiThread {
                    play.isEnabled = true
+                    generate.isEnabled = true
                    track.stop()
                }
            }
@@ -148,11 +162,22 @@ class MainActivity : AppCompatActivity() {

    private fun onClickPlay() {
        val filename = application.filesDir.absolutePath + "/generated.wav"
-        val mediaPlayer = MediaPlayer.create(
+        mediaPlayer?.stop()
+        mediaPlayer = MediaPlayer.create(
            applicationContext,
            Uri.fromFile(File(filename))
        )
-        mediaPlayer.start()
+        mediaPlayer?.start()
+    }
+
+    private fun onClickStop() {
+        stopped = true
+        play.isEnabled = true
+        generate.isEnabled = true
+        track.pause()
+        track.flush()
+        mediaPlayer?.stop()
+        mediaPlayer = null
    }

    private fun initTts() {
--- a/android/SherpaOnnxTts/app/src/main/java/com/k2fsa/sherpa/onnx/Tts.kt
+++ b/android/SherpaOnnxTts/app/src/main/java/com/k2fsa/sherpa/onnx/Tts.kt
@@ -76,7 +76,7 @@ class OfflineTts(
        text: String,
        sid: Int = 0,
        speed: Float = 1.0f,
-        callback: (samples: FloatArray) -> Unit
+        callback: (samples: FloatArray) -> Int
    ): GeneratedAudio {
        val objArray = generateWithCallbackImpl(
            ptr,
@@ -146,7 +146,7 @@ class OfflineTts(
        text: String,
        sid: Int = 0,
        speed: Float = 1.0f,
-        callback: (samples: FloatArray) -> Unit
+        callback: (samples: FloatArray) -> Int
    ): Array<Any>

    companion object {
--- a/android/SherpaOnnxTts/app/src/main/res/layout/activity_main.xml
+++ b/android/SherpaOnnxTts/app/src/main/res/layout/activity_main.xml
@@ -84,4 +84,16 @@
        app:layout_constraintLeft_toLeftOf="parent"
        app:layout_constraintRight_toRightOf="parent"
        app:layout_constraintTop_toBottomOf="@id/generate" />
+
+    <Button
+        android:id="@+id/stop"
+        android:textAllCaps="false"
+        android:layout_width="match_parent"
+        android:layout_height="50dp"
+        android:layout_marginTop="4dp"
+        android:text="@string/stop"
+        app:layout_constraintLeft_toLeftOf="parent"
+        app:layout_constraintRight_toRightOf="parent"
+        app:layout_constraintTop_toBottomOf="@id/play" />
+
 </androidx.constraintlayout.widget.ConstraintLayout>
--- a/android/SherpaOnnxTts/app/src/main/res/values/strings.xml
+++ b/android/SherpaOnnxTts/app/src/main/res/values/strings.xml
@@ -7,4 +7,5 @@
    <string name="text_hint">Please input your text here</string>
    <string name="generate">Generate</string>
    <string name="play">Play</string>
+    <string name="stop">Stop</string>
 </resources>
--- a/android/SherpaOnnxTtsEngine/app/src/main/java/com/k2fsa/sherpa/onnx/tts/engine/TtsService.kt
+++ b/android/SherpaOnnxTtsEngine/app/src/main/java/com/k2fsa/sherpa/onnx/tts/engine/TtsService.kt
@@ -126,7 +126,7 @@ class TtsService : TextToSpeechService() {
            return
        }

-        val ttsCallback = { floatSamples: FloatArray ->
+        val ttsCallback: (FloatArray) -> Int = fun(floatSamples): Int {
            // convert FloatArray to ByteArray
            val samples = floatArrayToByteArray(floatSamples)
            val maxBufferSize: Int = callback.maxBufferSize
@@ -137,6 +137,9 @@ class TtsService : TextToSpeechService() {
                offset += bytesToWrite
            }

+            // 1 means to continue
+            // 0 means to stop
+            return 1
        }

        Log.i(TAG, "text: $text")
@@ -160,4 +163,4 @@ class TtsService : TextToSpeechService() {
        }
        return byteArray
    }
-}
+}
--- a/dart-api-examples/non-streaming-asr/pubspec.yaml
+++ b/dart-api-examples/non-streaming-asr/pubspec.yaml
@@ -10,7 +10,7 @@ environment:

 # Add regular dependencies here.
 dependencies:
-  sherpa_onnx: ^1.10.0
+  sherpa_onnx: ^1.10.1
  path: ^1.9.0
  args: ^2.5.0

--- a/dart-api-examples/streaming-asr/pubspec.yaml
+++ b/dart-api-examples/streaming-asr/pubspec.yaml
@@ -11,7 +11,7 @@ environment:

 # Add regular dependencies here.
 dependencies:
-  sherpa_onnx: ^1.10.0
+  sherpa_onnx: ^1.10.1
  path: ^1.9.0
  args: ^2.5.0

--- a/dart-api-examples/tts/bin/piper.dart
+++ b/dart-api-examples/tts/bin/piper.dart
@@ -68,6 +68,10 @@ void main(List<String> arguments) async {
      callback: (Float32List samples) {
        print('${samples.length} samples received');
        // You can play samples in a separate thread/isolate
+
+        // 1 means to continue
+        // 0 means to stop
+        return 1;
      });
  tts.free();

--- a/dart-api-examples/tts/pubspec.yaml
+++ b/dart-api-examples/tts/pubspec.yaml
@@ -8,7 +8,7 @@ environment:

 # Add regular dependencies here.
 dependencies:
-  sherpa_onnx: ^1.10.0
+  sherpa_onnx: ^1.10.1
  path: ^1.9.0
  args: ^2.5.0

--- a/dart-api-examples/vad/pubspec.yaml
+++ b/dart-api-examples/vad/pubspec.yaml
@@ -9,7 +9,7 @@ environment:
  sdk: ^3.4.0

 dependencies:
-  sherpa_onnx: ^1.10.0
+  sherpa_onnx: ^1.10.1
  path: ^1.9.0
  args: ^2.5.0

--- a/dotnet-examples/offline-tts-play/Program.cs
+++ b/dotnet-examples/offline-tts-play/Program.cs
@@ -187,6 +187,10 @@ to download more models.
      Marshal.Copy(samples, data, 0, n);

      dataItems.Add(data);
+
+      // 1 means to keep generating
+      // 0 means to stop generating
+      return 1;
    };

    bool playFinished = false;
--- a/kotlin-api-examples/test_tts.kt
+++ b/kotlin-api-examples/test_tts.kt
@@ -25,6 +25,46 @@ fun testTts() {
  println("Saved to test-en.wav")
 }

-fun callback(samples: FloatArray): Unit {
-  println("callback got called with ${samples.size} samples");
+/*
+1. Unzip test_tts.jar
+2.
+javap ./com/k2fsa/sherpa/onnx/Test_ttsKt\$testTts\$audio\$1.class
+
+3. It prints:
+Compiled from "test_tts.kt"
+final class com.k2fsa.sherpa.onnx.Test_ttsKt$testTts$audio$1 extends kotlin.jvm.internal.FunctionReferenceImpl implements kotlin.jvm.functions.Function1<float[], java.lang.Integer> {
+  public static final com.k2fsa.sherpa.onnx.Test_ttsKt$testTts$audio$1 INSTANCE;
+  com.k2fsa.sherpa.onnx.Test_ttsKt$testTts$audio$1();
+  public final java.lang.Integer invoke(float[]);
+  public java.lang.Object invoke(java.lang.Object);
+  static {};
+}
+
+4.
+javap -s ./com/k2fsa/sherpa/onnx/Test_ttsKt\$testTts\$audio\$1.class
+
+5. It prints
+Compiled from "test_tts.kt"
+final class com.k2fsa.sherpa.onnx.Test_ttsKt$testTts$audio$1 extends kotlin.jvm.internal.FunctionReferenceImpl implements kotlin.jvm.functions.Function1<float[], java.lang.Integer> {
+  public static final com.k2fsa.sherpa.onnx.Test_ttsKt$testTts$audio$1 INSTANCE;
+    descriptor: Lcom/k2fsa/sherpa/onnx/Test_ttsKt$testTts$audio$1;
+  com.k2fsa.sherpa.onnx.Test_ttsKt$testTts$audio$1();
+    descriptor: ()V
+
+  public final java.lang.Integer invoke(float[]);
+    descriptor: ([F)Ljava/lang/Integer;
+
+  public java.lang.Object invoke(java.lang.Object);
+    descriptor: (Ljava/lang/Object;)Ljava/lang/Object;
+
+  static {};
+    descriptor: ()V
+}
+*/
+fun callback(samples: FloatArray): Int {
+  println("callback got called with ${samples.size} samples");
+
+  // 1 means to continue
+  // 0 means to stop
+  return 1
 }
--- a/mfc-examples/NonStreamingTextToSpeech/NonStreamingTextToSpeech.rc
+++ b/mfc-examples/NonStreamingTextToSpeech/NonStreamingTextToSpeech.rc
--- a/mfc-examples/NonStreamingTextToSpeech/NonStreamingTextToSpeechDlg.cpp
+++ b/mfc-examples/NonStreamingTextToSpeech/NonStreamingTextToSpeechDlg.cpp
@@ -57,7 +57,7 @@ static bool g_started = false;
 static bool g_stopped = false;
 static bool g_killed = false;

-static void AudioGeneratedCallback(const float *s, int32_t n) {
+static int32_t AudioGeneratedCallback(const float *s, int32_t n) {
  if (n > 0) {
    Samples samples;
    samples.data = std::vector<float>{s, s + n};
@@ -66,6 +66,10 @@ static void AudioGeneratedCallback(const float *s, int32_t n) {
    g_buffer.samples.push(std::move(samples));
    g_started = true;
  }
+  if (g_killed) {
+    return 0;
+  }
+  return 1;
 }

 static int PlayCallback(const void * /*in*/, void *out,
@@ -324,6 +328,7 @@ BEGIN_MESSAGE_MAP(CNonStreamingTextToSpeechDlg, CDialogEx)
 	ON_WM_PAINT()
 	ON_WM_QUERYDRAGICON()
        ON_BN_CLICKED(IDOK, &CNonStreamingTextToSpeechDlg::OnBnClickedOk)
+        ON_BN_CLICKED(IDC_STOP, &CNonStreamingTextToSpeechDlg::OnBnClickedStop)
        END_MESSAGE_MAP()


@@ -492,11 +497,18 @@ void CNonStreamingTextToSpeechDlg::Init() {
  if (tts_) {
    SherpaOnnxDestroyOfflineTts(tts_);
  }
+  if (generate_thread_ && generate_thread_->joinable()) {
+    generate_thread_->join();
+  }
+
+  if (play_thread_ && play_thread_->joinable()) {
+    play_thread_->join();
+  }
 }


 static std::string ToString(const CString &s) {
-    CT2CA pszConvertedAnsiString( s);
+    CT2CA pszConvertedAnsiString(s);
    return std::string(pszConvertedAnsiString);
 }

@@ -510,7 +522,7 @@ void CNonStreamingTextToSpeechDlg::OnBnClickedOk() {
  }

  speed_.GetWindowText(s);
-  float speed = static_cast<float>(_ttof(s)); 
+  float speed = static_cast<float>(_ttof(s));
  if (speed < 0) {
    AfxMessageBox(Utf8ToUtf16("Please input a valid speed").c_str(), MB_OK);
    return;
@@ -541,28 +553,40 @@ void CNonStreamingTextToSpeechDlg::OnBnClickedOk() {
  // for simplicity
  play_thread_ = std::make_unique<std::thread>(StartPlayback, SherpaOnnxOfflineTtsSampleRate(tts_));

-  generate_btn_.EnableWindow(FALSE);
-
-  const SherpaOnnxGeneratedAudio *audio =
-      SherpaOnnxOfflineTtsGenerateWithCallback(tts_, ss.c_str(), speaker_id, speed, &AudioGeneratedCallback);
-
-  generate_btn_.EnableWindow(TRUE);
+  if (generate_thread_ && generate_thread_->joinable()) {
+    generate_thread_->join();
+  }

  output_filename_.GetWindowText(s);
  std::string filename = ToString(s);

-  int ok = SherpaOnnxWriteWave(audio->samples, audio->n, audio->sample_rate,
-                    filename.c_str());
+  generate_thread_ = std::make_unique<std::thread>([ss, this,filename, speaker_id, speed]() {
+      std::string text = ss;

-  SherpaOnnxDestroyOfflineTtsGeneratedAudio(audio);
+      // generate_btn_.EnableWindow(FALSE);

-  if (ok) {
-    // AfxMessageBox(Utf8ToUtf16(std::string("Saved to ") + filename + " successfully").c_str(), MB_OK);
-    AppendLineToMultilineEditCtrl(my_hint_, std::string("Saved to ") + filename + " successfully");
-  } else {
-    // AfxMessageBox(Utf8ToUtf16(std::string("Failed to save to ") + filename).c_str(), MB_OK);
-    AppendLineToMultilineEditCtrl(my_hint_, std::string("Failed to saved to ") + filename);
-  }
+	  const SherpaOnnxGeneratedAudio *audio =
+		  SherpaOnnxOfflineTtsGenerateWithCallback(tts_, text.c_str(), speaker_id, speed, &AudioGeneratedCallback);
+      // generate_btn_.EnableWindow(TRUE);
+       g_stopped = true;
+
+	  int ok = SherpaOnnxWriteWave(audio->samples, audio->n, audio->sample_rate,
+						filename.c_str());
+
+	  SherpaOnnxDestroyOfflineTtsGeneratedAudio(audio);
+
+	  if (ok) {
+		// AfxMessageBox(Utf8ToUtf16(std::string("Saved to ") + filename + " successfully").c_str(), MB_OK);
+
+		// AppendLineToMultilineEditCtrl(my_hint_, std::string("Saved to ") + filename + " successfully");
+	  } else {
+		// AfxMessageBox(Utf8ToUtf16(std::string("Failed to save to ") + filename).c_str(), MB_OK);
+
+		// AppendLineToMultilineEditCtrl(my_hint_, std::string("Failed to saved to ") + filename);
+	  }
+  });

  //CDialogEx::OnOK();
 }
+
+void CNonStreamingTextToSpeechDlg::OnBnClickedStop() { g_killed = true; }
--- a/mfc-examples/NonStreamingTextToSpeech/NonStreamingTextToSpeechDlg.h
+++ b/mfc-examples/NonStreamingTextToSpeech/NonStreamingTextToSpeechDlg.h
@@ -60,5 +60,8 @@ public:
 private:
    Microphone mic_;
 	std::unique_ptr<std::thread> play_thread_;
+	std::unique_ptr<std::thread> generate_thread_;

+   public:
+    afx_msg void OnBnClickedStop();
 };
--- a/mfc-examples/NonStreamingTextToSpeech/Resource.h
+++ b/mfc-examples/NonStreamingTextToSpeech/Resource.h
@@ -13,6 +13,7 @@
 #define IDC_HINT                        1005
 #define IDC_EDIT1                       1006
 #define IDC_OUTPUT_FILENAME             1006
+#define IDC_STOP                        1009

 // Next default values for new objects
 // 
@@ -20,7 +21,7 @@
 #ifndef APSTUDIO_READONLY_SYMBOLS
 #define _APS_NEXT_RESOURCE_VALUE        130
 #define _APS_NEXT_COMMAND_VALUE         32771
-#define _APS_NEXT_CONTROL_VALUE         1007
+#define _APS_NEXT_CONTROL_VALUE         1010
 #define _APS_NEXT_SYMED_VALUE           101
 #endif
 #endif
--- a/nodejs-addon-examples/package.json
+++ b/nodejs-addon-examples/package.json
@@ -1,5 +1,5 @@
 {
  "dependencies": {
-    "sherpa-onnx-node": "^1.10.0"
+    "sherpa-onnx-node": "^1.10.1"
  }
 }
--- a/python-api-examples/offline-tts-play.py
+++ b/python-api-examples/offline-tts-play.py
@@ -228,6 +228,13 @@ def generated_audio_callback(samples: np.ndarray, progress: float):
        logging.info("Start playing ...")
    started = True

+    # 1 means to keep generating
+    # 0 means to stop generating
+    if killed:
+        return 0
+
+    return 1
+

 # see https://python-sounddevice.readthedocs.io/en/0.4.6/api/streams.html#sounddevice.OutputStream
 def play_audio_callback(
--- a/scripts/dotnet/OfflineTts.cs
+++ b/scripts/dotnet/OfflineTts.cs
@@ -8,8 +8,8 @@ using System;

 namespace SherpaOnnx
 {
-    // IntPtr is actuallly a `const float*` from C++
-    public delegate void OfflineTtsCallback(IntPtr samples, int n);
+    // IntPtr is actually a `const float*` from C++
+    public delegate int OfflineTtsCallback(IntPtr samples, int n);

    public class OfflineTts : IDisposable
    {
@@ -88,4 +88,4 @@ namespace SherpaOnnx
        [DllImport(Dll.Filename, CallingConvention = CallingConvention.Cdecl)]
        private static extern IntPtr SherpaOnnxOfflineTtsGenerateWithCallback(IntPtr handle, [MarshalAs(UnmanagedType.LPStr)] string text, int sid, float speed, OfflineTtsCallback callback);
    }
-}
+}
--- a/sherpa-onnx/c-api/c-api.cc
+++ b/sherpa-onnx/c-api/c-api.cc
@@ -935,7 +935,7 @@ int32_t SherpaOnnxOfflineTtsNumSpeakers(const SherpaOnnxOfflineTts *tts) {

 static const SherpaOnnxGeneratedAudio *SherpaOnnxOfflineTtsGenerateInternal(
    const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid, float speed,
-    std::function<void(const float *, int32_t, float)> callback) {
+    std::function<int32_t(const float *, int32_t, float)> callback) {
  sherpa_onnx::GeneratedAudio audio =
      tts->impl->Generate(text, sid, speed, callback);

@@ -965,7 +965,9 @@ const SherpaOnnxGeneratedAudio *SherpaOnnxOfflineTtsGenerateWithCallback(
    const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid, float speed,
    SherpaOnnxGeneratedAudioCallback callback) {
  auto wrapper = [callback](const float *samples, int32_t n,
-                            float /*progress*/) { callback(samples, n); };
+                            float /*progress*/) {
+    return callback(samples, n);
+  };

  return SherpaOnnxOfflineTtsGenerateInternal(tts, text, sid, speed, wrapper);
 }
@@ -975,7 +977,7 @@ SherpaOnnxOfflineTtsGenerateWithProgressCallback(
    const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid, float speed,
    SherpaOnnxGeneratedAudioProgressCallback callback) {
  auto wrapper = [callback](const float *samples, int32_t n, float progress) {
-    callback(samples, n, progress);
+    return callback(samples, n, progress);
  };
  return SherpaOnnxOfflineTtsGenerateInternal(tts, text, sid, speed, wrapper);
 }
@@ -985,7 +987,7 @@ const SherpaOnnxGeneratedAudio *SherpaOnnxOfflineTtsGenerateWithCallbackWithArg(
    SherpaOnnxGeneratedAudioCallbackWithArg callback, void *arg) {
  auto wrapper = [callback, arg](const float *samples, int32_t n,
                                 float /*progress*/) {
-    callback(samples, n, arg);
+    return callback(samples, n, arg);
  };

  return SherpaOnnxOfflineTtsGenerateInternal(tts, text, sid, speed, wrapper);
--- a/sherpa-onnx/c-api/c-api.h
+++ b/sherpa-onnx/c-api/c-api.h
@@ -850,14 +850,17 @@ SHERPA_ONNX_API typedef struct SherpaOnnxGeneratedAudio {
  int32_t sample_rate;
 } SherpaOnnxGeneratedAudio;

-typedef void (*SherpaOnnxGeneratedAudioCallback)(const float *samples,
-                                                 int32_t n);
+// If the callback returns 0, then it stops generating
+// If the callback returns 1, then it keeps generating
+typedef int32_t (*SherpaOnnxGeneratedAudioCallback)(const float *samples,
+                                                    int32_t n);

-typedef void (*SherpaOnnxGeneratedAudioCallbackWithArg)(const float *samples,
-                                                        int32_t n, void *arg);
+typedef int32_t (*SherpaOnnxGeneratedAudioCallbackWithArg)(const float *samples,
+                                                           int32_t n,
+                                                           void *arg);

-typedef void (*SherpaOnnxGeneratedAudioProgressCallback)(const float *samples,
-                                                         int32_t n, float p);
+typedef int32_t (*SherpaOnnxGeneratedAudioProgressCallback)(
+    const float *samples, int32_t n, float p);

 SHERPA_ONNX_API typedef struct SherpaOnnxOfflineTts SherpaOnnxOfflineTts;

--- a/sherpa-onnx/csrc/offline-tts-vits-impl.h
+++ b/sherpa-onnx/csrc/offline-tts-vits-impl.h
@@ -216,9 +216,11 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl {

    GeneratedAudio ans;

+    int32_t should_continue = 1;
+
    int32_t k = 0;

-    for (int32_t b = 0; b != num_batches; ++b) {
+    for (int32_t b = 0; b != num_batches && should_continue; ++b) {
      batch.clear();
      for (int32_t i = 0; i != batch_size; ++i, ++k) {
        batch.push_back(std::move(x[k]));
@@ -229,8 +231,8 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl {
      ans.samples.insert(ans.samples.end(), audio.samples.begin(),
                         audio.samples.end());
      if (callback) {
-        callback(audio.samples.data(), audio.samples.size(),
-                 b * 1.0 / num_batches);
+        should_continue = callback(audio.samples.data(), audio.samples.size(),
+                                   b * 1.0 / num_batches);
        // Caution(fangjun): audio is freed when the callback returns, so users
        // should copy the data if they want to access the data after
        // the callback returns to avoid segmentation fault.
@@ -238,7 +240,7 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl {
    }

    batch.clear();
-    while (k < static_cast<int32_t>(x.size())) {
+    while (k < static_cast<int32_t>(x.size()) && should_continue) {
      batch.push_back(std::move(x[k]));
      ++k;
    }
--- a/sherpa-onnx/csrc/offline-tts.h
+++ b/sherpa-onnx/csrc/offline-tts.h
@@ -59,7 +59,9 @@ struct GeneratedAudio {

 class OfflineTtsImpl;

-using GeneratedAudioCallback = std::function<void(
+// If the callback returns 0, then it stop generating
+// if the callback returns 1, then it keeps generating
+using GeneratedAudioCallback = std::function<int32_t(
    const float * /*samples*/, int32_t /*n*/, float /*progress*/)>;

 class OfflineTts {
--- a/sherpa-onnx/csrc/sherpa-onnx-offline-tts-play-alsa.cc
+++ b/sherpa-onnx/csrc/sherpa-onnx-offline-tts-play-alsa.cc
@@ -44,13 +44,20 @@ static void Handler(int32_t /*sig*/) {
  fprintf(stderr, "\nCaught Ctrl + C. Exiting\n");
 }

-static void AudioGeneratedCallback(const float *s, int32_t n,
-                                   float /*progress*/) {
+static int32_t AudioGeneratedCallback(const float *s, int32_t n,
+                                      float /*progress*/) {
  if (n > 0) {
    std::lock_guard<std::mutex> lock(g_buffer.mutex);
    g_buffer.samples.push({s, s + n});
    g_cv.notify_all();
  }
+
+  if (g_killed) {
+    return 0;  // stop generating
+  }
+
+  // continue generating
+  return 1;
 }

 static void StartPlayback(const std::string &device_name, int32_t sample_rate) {
--- a/sherpa-onnx/csrc/sherpa-onnx-offline-tts-play.cc
+++ b/sherpa-onnx/csrc/sherpa-onnx-offline-tts-play.cc
@@ -47,8 +47,8 @@ static void Handler(int32_t /*sig*/) {
  fprintf(stderr, "\nCaught Ctrl + C. Exiting\n");
 }

-static void AudioGeneratedCallback(const float *s, int32_t n,
-                                   float /*progress*/) {
+static int32_t AudioGeneratedCallback(const float *s, int32_t n,
+                                      float /*progress*/) {
  if (n > 0) {
    Samples samples;
    samples.data = std::vector<float>{s, s + n};
@@ -57,6 +57,12 @@ static void AudioGeneratedCallback(const float *s, int32_t n,
    g_buffer.samples.push(std::move(samples));
    g_started = true;
  }
+  if (g_killed) {
+    return 0;  // stop generating
+  }
+
+  // continue generating
+  return 1;
 }

 static int PlayCallback(const void * /*in*/, void *out,
--- a/sherpa-onnx/csrc/sherpa-onnx-offline-tts.cc
+++ b/sherpa-onnx/csrc/sherpa-onnx-offline-tts.cc
@@ -9,8 +9,9 @@
 #include "sherpa-onnx/csrc/parse-options.h"
 #include "sherpa-onnx/csrc/wave-writer.h"

-void audioCallback(const float * /*samples*/, int32_t n, float progress) {
+int32_t audioCallback(const float * /*samples*/, int32_t n, float progress) {
  printf("sample=%d, progress=%f\n", n, progress);
+  return 1;
 }

 int main(int32_t argc, char *argv[]) {
--- a/sherpa-onnx/flutter/CHANGELOG.md
+++ b/sherpa-onnx/flutter/CHANGELOG.md
@@ -1,3 +1,7 @@
+## 1.10.1
+
+* Enable to stop TTS generation
+
 ## 1.10.0

 * Add inverse text normalization
--- a/sherpa-onnx/flutter/lib/src/sherpa_onnx_bindings.dart
+++ b/sherpa-onnx/flutter/lib/src/sherpa_onnx_bindings.dart
@@ -326,7 +326,7 @@ typedef SherpaOnnxDestroyOfflineTtsGeneratedAudioNative = Void Function(
 typedef SherpaOnnxDestroyOfflineTtsGeneratedAudio = void Function(
    Pointer<SherpaOnnxGeneratedAudio>);

-typedef SherpaOnnxGeneratedAudioCallbackNative = Void Function(
+typedef SherpaOnnxGeneratedAudioCallbackNative = Int Function(
    Pointer<Float>, Int32);

 typedef SherpaOnnxOfflineTtsGenerateWithCallbackNative
--- a/sherpa-onnx/flutter/lib/src/tts.dart
+++ b/sherpa-onnx/flutter/lib/src/tts.dart
@@ -149,7 +149,7 @@ class OfflineTts {
      {required String text,
      int sid = 0,
      double speed = 1.0,
-      required void Function(Float32List samples) callback}) {
+      required int Function(Float32List samples) callback}) {
    // see
    // https://github.com/dart-lang/sdk/issues/54276#issuecomment-1846109285
    // https://stackoverflow.com/questions/69537440/callbacks-in-dart-dartffi-only-supports-calling-static-dart-functions-from-nat
@@ -159,8 +159,8 @@ class OfflineTts {
            (Pointer<Float> samples, int n) {
      final s = samples.asTypedList(n);
      final newSamples = Float32List.fromList(s);
-      callback(newSamples);
-    });
+      return callback(newSamples);
+    }, exceptionalReturn: 0);

    final Pointer<Utf8> textPtr = text.toNativeUtf8();
    final p = SherpaOnnxBindings.offlineTtsGenerateWithCallback
--- a/sherpa-onnx/jni/offline-tts.cc
+++ b/sherpa-onnx/jni/offline-tts.cc
@@ -186,14 +186,42 @@ Java_com_k2fsa_sherpa_onnx_OfflineTts_generateWithCallbackImpl(
  const char *p_text = env->GetStringUTFChars(text, nullptr);
  SHERPA_ONNX_LOGE("string is: %s", p_text);

-  std::function<void(const float *, int32_t, float)> callback_wrapper =
+  std::function<int32_t(const float *, int32_t, float)> callback_wrapper =
      [env, callback](const float *samples, int32_t n, float /*progress*/) {
        jclass cls = env->GetObjectClass(callback);
-        jmethodID mid = env->GetMethodID(cls, "invoke", "([F)V");
+
+#if 0
+        // this block is for debugging only
+        // see also
+        // https://jnjosh.com/posts/kotlinfromcpp/
+        jmethodID classMethodId =
+            env->GetMethodID(cls, "getClass", "()Ljava/lang/Class;");
+        jobject klassObj = env->CallObjectMethod(callback, classMethodId);
+        auto klassObject = env->GetObjectClass(klassObj);
+        auto nameMethodId =
+            env->GetMethodID(klassObject, "getName", "()Ljava/lang/String;");
+        jstring classString =
+            (jstring)env->CallObjectMethod(klassObj, nameMethodId);
+        auto className = env->GetStringUTFChars(classString, NULL);
+        SHERPA_ONNX_LOGE("name is: %s", className);
+        env->ReleaseStringUTFChars(classString, className);
+#endif
+
+        jmethodID mid =
+            env->GetMethodID(cls, "invoke", "([F)Ljava/lang/Integer;");
+        if (mid == nullptr) {
+          SHERPA_ONNX_LOGE("Failed to get the callback. Ignore it.");
+          return 1;
+        }

        jfloatArray samples_arr = env->NewFloatArray(n);
        env->SetFloatArrayRegion(samples_arr, 0, n, samples);
-        env->CallVoidMethod(callback, mid, samples_arr);
+
+        jobject should_continue =
+            env->CallObjectMethod(callback, mid, samples_arr);
+        jclass jklass = env->GetObjectClass(should_continue);
+        jmethodID int_value_mid = env->GetMethodID(jklass, "intValue", "()I");
+        return env->CallIntMethod(should_continue, int_value_mid);
      };

  auto audio = reinterpret_cast<sherpa_onnx::OfflineTts *>(ptr)->Generate(
--- a/sherpa-onnx/python/csrc/offline-tts.cc
+++ b/sherpa-onnx/python/csrc/offline-tts.cc
@@ -57,13 +57,13 @@ void PybindOfflineTts(py::module *m) {
          "generate",
          [](const PyClass &self, const std::string &text, int64_t sid,
             float speed,
-             std::function<void(py::array_t<float>, float)> callback)
+             std::function<int32_t(py::array_t<float>, float)> callback)
              -> GeneratedAudio {
            if (!callback) {
              return self.Generate(text, sid, speed);
            }

-            std::function<void(const float *, int32_t, float)>
+            std::function<int32_t(const float *, int32_t, float)>
                callback_wrapper = [callback](const float *samples, int32_t n,
                                              float progress) {
                  // CAUTION(fangjun): we have to copy samples since it is
@@ -75,7 +75,7 @@ void PybindOfflineTts(py::module *m) {
                  py::buffer_info buf = array.request();
                  auto p = static_cast<float *>(buf.ptr);
                  std::copy(samples, samples + n, p);
-                  callback(array, progress);
+                  return callback(array, progress);
                };

            return self.Generate(text, sid, speed, callback_wrapper);