Enable to stop TTS generation (#1041)
This commit is contained in:
@@ -8,7 +8,7 @@ project(sherpa-onnx)
|
|||||||
# ./nodejs-addon-examples
|
# ./nodejs-addon-examples
|
||||||
# ./dart-api-examples/
|
# ./dart-api-examples/
|
||||||
# ./sherpa-onnx/flutter/CHANGELOG.md
|
# ./sherpa-onnx/flutter/CHANGELOG.md
|
||||||
set(SHERPA_ONNX_VERSION "1.10.0")
|
set(SHERPA_ONNX_VERSION "1.10.1")
|
||||||
|
|
||||||
# Disable warning about
|
# Disable warning about
|
||||||
#
|
#
|
||||||
|
|||||||
@@ -26,6 +26,9 @@ class MainActivity : AppCompatActivity() {
|
|||||||
private lateinit var speed: EditText
|
private lateinit var speed: EditText
|
||||||
private lateinit var generate: Button
|
private lateinit var generate: Button
|
||||||
private lateinit var play: Button
|
private lateinit var play: Button
|
||||||
|
private lateinit var stop: Button
|
||||||
|
private var stopped: Boolean = false
|
||||||
|
private var mediaPlayer: MediaPlayer? = null
|
||||||
|
|
||||||
// see
|
// see
|
||||||
// https://developer.android.com/reference/kotlin/android/media/AudioTrack
|
// https://developer.android.com/reference/kotlin/android/media/AudioTrack
|
||||||
@@ -49,9 +52,11 @@ class MainActivity : AppCompatActivity() {
|
|||||||
|
|
||||||
generate = findViewById(R.id.generate)
|
generate = findViewById(R.id.generate)
|
||||||
play = findViewById(R.id.play)
|
play = findViewById(R.id.play)
|
||||||
|
stop = findViewById(R.id.stop)
|
||||||
|
|
||||||
generate.setOnClickListener { onClickGenerate() }
|
generate.setOnClickListener { onClickGenerate() }
|
||||||
play.setOnClickListener { onClickPlay() }
|
play.setOnClickListener { onClickPlay() }
|
||||||
|
stop.setOnClickListener { onClickStop() }
|
||||||
|
|
||||||
sid.setText("0")
|
sid.setText("0")
|
||||||
speed.setText("1.0")
|
speed.setText("1.0")
|
||||||
@@ -70,7 +75,7 @@ class MainActivity : AppCompatActivity() {
|
|||||||
AudioFormat.CHANNEL_OUT_MONO,
|
AudioFormat.CHANNEL_OUT_MONO,
|
||||||
AudioFormat.ENCODING_PCM_FLOAT
|
AudioFormat.ENCODING_PCM_FLOAT
|
||||||
)
|
)
|
||||||
Log.i(TAG, "sampleRate: ${sampleRate}, buffLength: ${bufLength}")
|
Log.i(TAG, "sampleRate: $sampleRate, buffLength: $bufLength")
|
||||||
|
|
||||||
val attr = AudioAttributes.Builder().setContentType(AudioAttributes.CONTENT_TYPE_SPEECH)
|
val attr = AudioAttributes.Builder().setContentType(AudioAttributes.CONTENT_TYPE_SPEECH)
|
||||||
.setUsage(AudioAttributes.USAGE_MEDIA)
|
.setUsage(AudioAttributes.USAGE_MEDIA)
|
||||||
@@ -90,8 +95,14 @@ class MainActivity : AppCompatActivity() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// this function is called from C++
|
// this function is called from C++
|
||||||
private fun callback(samples: FloatArray) {
|
private fun callback(samples: FloatArray): Int {
|
||||||
track.write(samples, 0, samples.size, AudioTrack.WRITE_BLOCKING)
|
if (!stopped) {
|
||||||
|
track.write(samples, 0, samples.size, AudioTrack.WRITE_BLOCKING)
|
||||||
|
return 1
|
||||||
|
} else {
|
||||||
|
track.stop()
|
||||||
|
return 0
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private fun onClickGenerate() {
|
private fun onClickGenerate() {
|
||||||
@@ -127,6 +138,8 @@ class MainActivity : AppCompatActivity() {
|
|||||||
track.play()
|
track.play()
|
||||||
|
|
||||||
play.isEnabled = false
|
play.isEnabled = false
|
||||||
|
generate.isEnabled = false
|
||||||
|
stopped = false
|
||||||
Thread {
|
Thread {
|
||||||
val audio = tts.generateWithCallback(
|
val audio = tts.generateWithCallback(
|
||||||
text = textStr,
|
text = textStr,
|
||||||
@@ -140,6 +153,7 @@ class MainActivity : AppCompatActivity() {
|
|||||||
if (ok) {
|
if (ok) {
|
||||||
runOnUiThread {
|
runOnUiThread {
|
||||||
play.isEnabled = true
|
play.isEnabled = true
|
||||||
|
generate.isEnabled = true
|
||||||
track.stop()
|
track.stop()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -148,11 +162,22 @@ class MainActivity : AppCompatActivity() {
|
|||||||
|
|
||||||
private fun onClickPlay() {
|
private fun onClickPlay() {
|
||||||
val filename = application.filesDir.absolutePath + "/generated.wav"
|
val filename = application.filesDir.absolutePath + "/generated.wav"
|
||||||
val mediaPlayer = MediaPlayer.create(
|
mediaPlayer?.stop()
|
||||||
|
mediaPlayer = MediaPlayer.create(
|
||||||
applicationContext,
|
applicationContext,
|
||||||
Uri.fromFile(File(filename))
|
Uri.fromFile(File(filename))
|
||||||
)
|
)
|
||||||
mediaPlayer.start()
|
mediaPlayer?.start()
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun onClickStop() {
|
||||||
|
stopped = true
|
||||||
|
play.isEnabled = true
|
||||||
|
generate.isEnabled = true
|
||||||
|
track.pause()
|
||||||
|
track.flush()
|
||||||
|
mediaPlayer?.stop()
|
||||||
|
mediaPlayer = null
|
||||||
}
|
}
|
||||||
|
|
||||||
private fun initTts() {
|
private fun initTts() {
|
||||||
|
|||||||
@@ -76,7 +76,7 @@ class OfflineTts(
|
|||||||
text: String,
|
text: String,
|
||||||
sid: Int = 0,
|
sid: Int = 0,
|
||||||
speed: Float = 1.0f,
|
speed: Float = 1.0f,
|
||||||
callback: (samples: FloatArray) -> Unit
|
callback: (samples: FloatArray) -> Int
|
||||||
): GeneratedAudio {
|
): GeneratedAudio {
|
||||||
val objArray = generateWithCallbackImpl(
|
val objArray = generateWithCallbackImpl(
|
||||||
ptr,
|
ptr,
|
||||||
@@ -146,7 +146,7 @@ class OfflineTts(
|
|||||||
text: String,
|
text: String,
|
||||||
sid: Int = 0,
|
sid: Int = 0,
|
||||||
speed: Float = 1.0f,
|
speed: Float = 1.0f,
|
||||||
callback: (samples: FloatArray) -> Unit
|
callback: (samples: FloatArray) -> Int
|
||||||
): Array<Any>
|
): Array<Any>
|
||||||
|
|
||||||
companion object {
|
companion object {
|
||||||
|
|||||||
@@ -84,4 +84,16 @@
|
|||||||
app:layout_constraintLeft_toLeftOf="parent"
|
app:layout_constraintLeft_toLeftOf="parent"
|
||||||
app:layout_constraintRight_toRightOf="parent"
|
app:layout_constraintRight_toRightOf="parent"
|
||||||
app:layout_constraintTop_toBottomOf="@id/generate" />
|
app:layout_constraintTop_toBottomOf="@id/generate" />
|
||||||
|
|
||||||
|
<Button
|
||||||
|
android:id="@+id/stop"
|
||||||
|
android:textAllCaps="false"
|
||||||
|
android:layout_width="match_parent"
|
||||||
|
android:layout_height="50dp"
|
||||||
|
android:layout_marginTop="4dp"
|
||||||
|
android:text="@string/stop"
|
||||||
|
app:layout_constraintLeft_toLeftOf="parent"
|
||||||
|
app:layout_constraintRight_toRightOf="parent"
|
||||||
|
app:layout_constraintTop_toBottomOf="@id/play" />
|
||||||
|
|
||||||
</androidx.constraintlayout.widget.ConstraintLayout>
|
</androidx.constraintlayout.widget.ConstraintLayout>
|
||||||
@@ -7,4 +7,5 @@
|
|||||||
<string name="text_hint">Please input your text here</string>
|
<string name="text_hint">Please input your text here</string>
|
||||||
<string name="generate">Generate</string>
|
<string name="generate">Generate</string>
|
||||||
<string name="play">Play</string>
|
<string name="play">Play</string>
|
||||||
|
<string name="stop">Stop</string>
|
||||||
</resources>
|
</resources>
|
||||||
@@ -126,7 +126,7 @@ class TtsService : TextToSpeechService() {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
val ttsCallback = { floatSamples: FloatArray ->
|
val ttsCallback: (FloatArray) -> Int = fun(floatSamples): Int {
|
||||||
// convert FloatArray to ByteArray
|
// convert FloatArray to ByteArray
|
||||||
val samples = floatArrayToByteArray(floatSamples)
|
val samples = floatArrayToByteArray(floatSamples)
|
||||||
val maxBufferSize: Int = callback.maxBufferSize
|
val maxBufferSize: Int = callback.maxBufferSize
|
||||||
@@ -137,6 +137,9 @@ class TtsService : TextToSpeechService() {
|
|||||||
offset += bytesToWrite
|
offset += bytesToWrite
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// 1 means to continue
|
||||||
|
// 0 means to stop
|
||||||
|
return 1
|
||||||
}
|
}
|
||||||
|
|
||||||
Log.i(TAG, "text: $text")
|
Log.i(TAG, "text: $text")
|
||||||
@@ -160,4 +163,4 @@ class TtsService : TextToSpeechService() {
|
|||||||
}
|
}
|
||||||
return byteArray
|
return byteArray
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -10,7 +10,7 @@ environment:
|
|||||||
|
|
||||||
# Add regular dependencies here.
|
# Add regular dependencies here.
|
||||||
dependencies:
|
dependencies:
|
||||||
sherpa_onnx: ^1.10.0
|
sherpa_onnx: ^1.10.1
|
||||||
path: ^1.9.0
|
path: ^1.9.0
|
||||||
args: ^2.5.0
|
args: ^2.5.0
|
||||||
|
|
||||||
|
|||||||
@@ -11,7 +11,7 @@ environment:
|
|||||||
|
|
||||||
# Add regular dependencies here.
|
# Add regular dependencies here.
|
||||||
dependencies:
|
dependencies:
|
||||||
sherpa_onnx: ^1.10.0
|
sherpa_onnx: ^1.10.1
|
||||||
path: ^1.9.0
|
path: ^1.9.0
|
||||||
args: ^2.5.0
|
args: ^2.5.0
|
||||||
|
|
||||||
|
|||||||
@@ -68,6 +68,10 @@ void main(List<String> arguments) async {
|
|||||||
callback: (Float32List samples) {
|
callback: (Float32List samples) {
|
||||||
print('${samples.length} samples received');
|
print('${samples.length} samples received');
|
||||||
// You can play samples in a separate thread/isolate
|
// You can play samples in a separate thread/isolate
|
||||||
|
|
||||||
|
// 1 means to continue
|
||||||
|
// 0 means to stop
|
||||||
|
return 1;
|
||||||
});
|
});
|
||||||
tts.free();
|
tts.free();
|
||||||
|
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ environment:
|
|||||||
|
|
||||||
# Add regular dependencies here.
|
# Add regular dependencies here.
|
||||||
dependencies:
|
dependencies:
|
||||||
sherpa_onnx: ^1.10.0
|
sherpa_onnx: ^1.10.1
|
||||||
path: ^1.9.0
|
path: ^1.9.0
|
||||||
args: ^2.5.0
|
args: ^2.5.0
|
||||||
|
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ environment:
|
|||||||
sdk: ^3.4.0
|
sdk: ^3.4.0
|
||||||
|
|
||||||
dependencies:
|
dependencies:
|
||||||
sherpa_onnx: ^1.10.0
|
sherpa_onnx: ^1.10.1
|
||||||
path: ^1.9.0
|
path: ^1.9.0
|
||||||
args: ^2.5.0
|
args: ^2.5.0
|
||||||
|
|
||||||
|
|||||||
@@ -187,6 +187,10 @@ to download more models.
|
|||||||
Marshal.Copy(samples, data, 0, n);
|
Marshal.Copy(samples, data, 0, n);
|
||||||
|
|
||||||
dataItems.Add(data);
|
dataItems.Add(data);
|
||||||
|
|
||||||
|
// 1 means to keep generating
|
||||||
|
// 0 means to stop generating
|
||||||
|
return 1;
|
||||||
};
|
};
|
||||||
|
|
||||||
bool playFinished = false;
|
bool playFinished = false;
|
||||||
|
|||||||
@@ -25,6 +25,46 @@ fun testTts() {
|
|||||||
println("Saved to test-en.wav")
|
println("Saved to test-en.wav")
|
||||||
}
|
}
|
||||||
|
|
||||||
fun callback(samples: FloatArray): Unit {
|
/*
|
||||||
println("callback got called with ${samples.size} samples");
|
1. Unzip test_tts.jar
|
||||||
|
2.
|
||||||
|
javap ./com/k2fsa/sherpa/onnx/Test_ttsKt\$testTts\$audio\$1.class
|
||||||
|
|
||||||
|
3. It prints:
|
||||||
|
Compiled from "test_tts.kt"
|
||||||
|
final class com.k2fsa.sherpa.onnx.Test_ttsKt$testTts$audio$1 extends kotlin.jvm.internal.FunctionReferenceImpl implements kotlin.jvm.functions.Function1<float[], java.lang.Integer> {
|
||||||
|
public static final com.k2fsa.sherpa.onnx.Test_ttsKt$testTts$audio$1 INSTANCE;
|
||||||
|
com.k2fsa.sherpa.onnx.Test_ttsKt$testTts$audio$1();
|
||||||
|
public final java.lang.Integer invoke(float[]);
|
||||||
|
public java.lang.Object invoke(java.lang.Object);
|
||||||
|
static {};
|
||||||
|
}
|
||||||
|
|
||||||
|
4.
|
||||||
|
javap -s ./com/k2fsa/sherpa/onnx/Test_ttsKt\$testTts\$audio\$1.class
|
||||||
|
|
||||||
|
5. It prints
|
||||||
|
Compiled from "test_tts.kt"
|
||||||
|
final class com.k2fsa.sherpa.onnx.Test_ttsKt$testTts$audio$1 extends kotlin.jvm.internal.FunctionReferenceImpl implements kotlin.jvm.functions.Function1<float[], java.lang.Integer> {
|
||||||
|
public static final com.k2fsa.sherpa.onnx.Test_ttsKt$testTts$audio$1 INSTANCE;
|
||||||
|
descriptor: Lcom/k2fsa/sherpa/onnx/Test_ttsKt$testTts$audio$1;
|
||||||
|
com.k2fsa.sherpa.onnx.Test_ttsKt$testTts$audio$1();
|
||||||
|
descriptor: ()V
|
||||||
|
|
||||||
|
public final java.lang.Integer invoke(float[]);
|
||||||
|
descriptor: ([F)Ljava/lang/Integer;
|
||||||
|
|
||||||
|
public java.lang.Object invoke(java.lang.Object);
|
||||||
|
descriptor: (Ljava/lang/Object;)Ljava/lang/Object;
|
||||||
|
|
||||||
|
static {};
|
||||||
|
descriptor: ()V
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
fun callback(samples: FloatArray): Int {
|
||||||
|
println("callback got called with ${samples.size} samples");
|
||||||
|
|
||||||
|
// 1 means to continue
|
||||||
|
// 0 means to stop
|
||||||
|
return 1
|
||||||
}
|
}
|
||||||
|
|||||||
Binary file not shown.
@@ -57,7 +57,7 @@ static bool g_started = false;
|
|||||||
static bool g_stopped = false;
|
static bool g_stopped = false;
|
||||||
static bool g_killed = false;
|
static bool g_killed = false;
|
||||||
|
|
||||||
static void AudioGeneratedCallback(const float *s, int32_t n) {
|
static int32_t AudioGeneratedCallback(const float *s, int32_t n) {
|
||||||
if (n > 0) {
|
if (n > 0) {
|
||||||
Samples samples;
|
Samples samples;
|
||||||
samples.data = std::vector<float>{s, s + n};
|
samples.data = std::vector<float>{s, s + n};
|
||||||
@@ -66,6 +66,10 @@ static void AudioGeneratedCallback(const float *s, int32_t n) {
|
|||||||
g_buffer.samples.push(std::move(samples));
|
g_buffer.samples.push(std::move(samples));
|
||||||
g_started = true;
|
g_started = true;
|
||||||
}
|
}
|
||||||
|
if (g_killed) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int PlayCallback(const void * /*in*/, void *out,
|
static int PlayCallback(const void * /*in*/, void *out,
|
||||||
@@ -324,6 +328,7 @@ BEGIN_MESSAGE_MAP(CNonStreamingTextToSpeechDlg, CDialogEx)
|
|||||||
ON_WM_PAINT()
|
ON_WM_PAINT()
|
||||||
ON_WM_QUERYDRAGICON()
|
ON_WM_QUERYDRAGICON()
|
||||||
ON_BN_CLICKED(IDOK, &CNonStreamingTextToSpeechDlg::OnBnClickedOk)
|
ON_BN_CLICKED(IDOK, &CNonStreamingTextToSpeechDlg::OnBnClickedOk)
|
||||||
|
ON_BN_CLICKED(IDC_STOP, &CNonStreamingTextToSpeechDlg::OnBnClickedStop)
|
||||||
END_MESSAGE_MAP()
|
END_MESSAGE_MAP()
|
||||||
|
|
||||||
|
|
||||||
@@ -492,11 +497,18 @@ void CNonStreamingTextToSpeechDlg::Init() {
|
|||||||
if (tts_) {
|
if (tts_) {
|
||||||
SherpaOnnxDestroyOfflineTts(tts_);
|
SherpaOnnxDestroyOfflineTts(tts_);
|
||||||
}
|
}
|
||||||
|
if (generate_thread_ && generate_thread_->joinable()) {
|
||||||
|
generate_thread_->join();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (play_thread_ && play_thread_->joinable()) {
|
||||||
|
play_thread_->join();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static std::string ToString(const CString &s) {
|
static std::string ToString(const CString &s) {
|
||||||
CT2CA pszConvertedAnsiString( s);
|
CT2CA pszConvertedAnsiString(s);
|
||||||
return std::string(pszConvertedAnsiString);
|
return std::string(pszConvertedAnsiString);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -510,7 +522,7 @@ void CNonStreamingTextToSpeechDlg::OnBnClickedOk() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
speed_.GetWindowText(s);
|
speed_.GetWindowText(s);
|
||||||
float speed = static_cast<float>(_ttof(s));
|
float speed = static_cast<float>(_ttof(s));
|
||||||
if (speed < 0) {
|
if (speed < 0) {
|
||||||
AfxMessageBox(Utf8ToUtf16("Please input a valid speed").c_str(), MB_OK);
|
AfxMessageBox(Utf8ToUtf16("Please input a valid speed").c_str(), MB_OK);
|
||||||
return;
|
return;
|
||||||
@@ -541,28 +553,40 @@ void CNonStreamingTextToSpeechDlg::OnBnClickedOk() {
|
|||||||
// for simplicity
|
// for simplicity
|
||||||
play_thread_ = std::make_unique<std::thread>(StartPlayback, SherpaOnnxOfflineTtsSampleRate(tts_));
|
play_thread_ = std::make_unique<std::thread>(StartPlayback, SherpaOnnxOfflineTtsSampleRate(tts_));
|
||||||
|
|
||||||
generate_btn_.EnableWindow(FALSE);
|
if (generate_thread_ && generate_thread_->joinable()) {
|
||||||
|
generate_thread_->join();
|
||||||
const SherpaOnnxGeneratedAudio *audio =
|
}
|
||||||
SherpaOnnxOfflineTtsGenerateWithCallback(tts_, ss.c_str(), speaker_id, speed, &AudioGeneratedCallback);
|
|
||||||
|
|
||||||
generate_btn_.EnableWindow(TRUE);
|
|
||||||
|
|
||||||
output_filename_.GetWindowText(s);
|
output_filename_.GetWindowText(s);
|
||||||
std::string filename = ToString(s);
|
std::string filename = ToString(s);
|
||||||
|
|
||||||
int ok = SherpaOnnxWriteWave(audio->samples, audio->n, audio->sample_rate,
|
generate_thread_ = std::make_unique<std::thread>([ss, this,filename, speaker_id, speed]() {
|
||||||
filename.c_str());
|
std::string text = ss;
|
||||||
|
|
||||||
SherpaOnnxDestroyOfflineTtsGeneratedAudio(audio);
|
// generate_btn_.EnableWindow(FALSE);
|
||||||
|
|
||||||
if (ok) {
|
const SherpaOnnxGeneratedAudio *audio =
|
||||||
// AfxMessageBox(Utf8ToUtf16(std::string("Saved to ") + filename + " successfully").c_str(), MB_OK);
|
SherpaOnnxOfflineTtsGenerateWithCallback(tts_, text.c_str(), speaker_id, speed, &AudioGeneratedCallback);
|
||||||
AppendLineToMultilineEditCtrl(my_hint_, std::string("Saved to ") + filename + " successfully");
|
// generate_btn_.EnableWindow(TRUE);
|
||||||
} else {
|
g_stopped = true;
|
||||||
// AfxMessageBox(Utf8ToUtf16(std::string("Failed to save to ") + filename).c_str(), MB_OK);
|
|
||||||
AppendLineToMultilineEditCtrl(my_hint_, std::string("Failed to saved to ") + filename);
|
int ok = SherpaOnnxWriteWave(audio->samples, audio->n, audio->sample_rate,
|
||||||
}
|
filename.c_str());
|
||||||
|
|
||||||
|
SherpaOnnxDestroyOfflineTtsGeneratedAudio(audio);
|
||||||
|
|
||||||
|
if (ok) {
|
||||||
|
// AfxMessageBox(Utf8ToUtf16(std::string("Saved to ") + filename + " successfully").c_str(), MB_OK);
|
||||||
|
|
||||||
|
// AppendLineToMultilineEditCtrl(my_hint_, std::string("Saved to ") + filename + " successfully");
|
||||||
|
} else {
|
||||||
|
// AfxMessageBox(Utf8ToUtf16(std::string("Failed to save to ") + filename).c_str(), MB_OK);
|
||||||
|
|
||||||
|
// AppendLineToMultilineEditCtrl(my_hint_, std::string("Failed to saved to ") + filename);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
//CDialogEx::OnOK();
|
//CDialogEx::OnOK();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void CNonStreamingTextToSpeechDlg::OnBnClickedStop() { g_killed = true; }
|
||||||
|
|||||||
@@ -60,5 +60,8 @@ public:
|
|||||||
private:
|
private:
|
||||||
Microphone mic_;
|
Microphone mic_;
|
||||||
std::unique_ptr<std::thread> play_thread_;
|
std::unique_ptr<std::thread> play_thread_;
|
||||||
|
std::unique_ptr<std::thread> generate_thread_;
|
||||||
|
|
||||||
|
public:
|
||||||
|
afx_msg void OnBnClickedStop();
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -13,6 +13,7 @@
|
|||||||
#define IDC_HINT 1005
|
#define IDC_HINT 1005
|
||||||
#define IDC_EDIT1 1006
|
#define IDC_EDIT1 1006
|
||||||
#define IDC_OUTPUT_FILENAME 1006
|
#define IDC_OUTPUT_FILENAME 1006
|
||||||
|
#define IDC_STOP 1009
|
||||||
|
|
||||||
// Next default values for new objects
|
// Next default values for new objects
|
||||||
//
|
//
|
||||||
@@ -20,7 +21,7 @@
|
|||||||
#ifndef APSTUDIO_READONLY_SYMBOLS
|
#ifndef APSTUDIO_READONLY_SYMBOLS
|
||||||
#define _APS_NEXT_RESOURCE_VALUE 130
|
#define _APS_NEXT_RESOURCE_VALUE 130
|
||||||
#define _APS_NEXT_COMMAND_VALUE 32771
|
#define _APS_NEXT_COMMAND_VALUE 32771
|
||||||
#define _APS_NEXT_CONTROL_VALUE 1007
|
#define _APS_NEXT_CONTROL_VALUE 1010
|
||||||
#define _APS_NEXT_SYMED_VALUE 101
|
#define _APS_NEXT_SYMED_VALUE 101
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
{
|
{
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"sherpa-onnx-node": "^1.10.0"
|
"sherpa-onnx-node": "^1.10.1"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -228,6 +228,13 @@ def generated_audio_callback(samples: np.ndarray, progress: float):
|
|||||||
logging.info("Start playing ...")
|
logging.info("Start playing ...")
|
||||||
started = True
|
started = True
|
||||||
|
|
||||||
|
# 1 means to keep generating
|
||||||
|
# 0 means to stop generating
|
||||||
|
if killed:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
return 1
|
||||||
|
|
||||||
|
|
||||||
# see https://python-sounddevice.readthedocs.io/en/0.4.6/api/streams.html#sounddevice.OutputStream
|
# see https://python-sounddevice.readthedocs.io/en/0.4.6/api/streams.html#sounddevice.OutputStream
|
||||||
def play_audio_callback(
|
def play_audio_callback(
|
||||||
|
|||||||
@@ -8,8 +8,8 @@ using System;
|
|||||||
|
|
||||||
namespace SherpaOnnx
|
namespace SherpaOnnx
|
||||||
{
|
{
|
||||||
// IntPtr is actuallly a `const float*` from C++
|
// IntPtr is actually a `const float*` from C++
|
||||||
public delegate void OfflineTtsCallback(IntPtr samples, int n);
|
public delegate int OfflineTtsCallback(IntPtr samples, int n);
|
||||||
|
|
||||||
public class OfflineTts : IDisposable
|
public class OfflineTts : IDisposable
|
||||||
{
|
{
|
||||||
@@ -88,4 +88,4 @@ namespace SherpaOnnx
|
|||||||
[DllImport(Dll.Filename, CallingConvention = CallingConvention.Cdecl)]
|
[DllImport(Dll.Filename, CallingConvention = CallingConvention.Cdecl)]
|
||||||
private static extern IntPtr SherpaOnnxOfflineTtsGenerateWithCallback(IntPtr handle, [MarshalAs(UnmanagedType.LPStr)] string text, int sid, float speed, OfflineTtsCallback callback);
|
private static extern IntPtr SherpaOnnxOfflineTtsGenerateWithCallback(IntPtr handle, [MarshalAs(UnmanagedType.LPStr)] string text, int sid, float speed, OfflineTtsCallback callback);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -935,7 +935,7 @@ int32_t SherpaOnnxOfflineTtsNumSpeakers(const SherpaOnnxOfflineTts *tts) {
|
|||||||
|
|
||||||
static const SherpaOnnxGeneratedAudio *SherpaOnnxOfflineTtsGenerateInternal(
|
static const SherpaOnnxGeneratedAudio *SherpaOnnxOfflineTtsGenerateInternal(
|
||||||
const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid, float speed,
|
const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid, float speed,
|
||||||
std::function<void(const float *, int32_t, float)> callback) {
|
std::function<int32_t(const float *, int32_t, float)> callback) {
|
||||||
sherpa_onnx::GeneratedAudio audio =
|
sherpa_onnx::GeneratedAudio audio =
|
||||||
tts->impl->Generate(text, sid, speed, callback);
|
tts->impl->Generate(text, sid, speed, callback);
|
||||||
|
|
||||||
@@ -965,7 +965,9 @@ const SherpaOnnxGeneratedAudio *SherpaOnnxOfflineTtsGenerateWithCallback(
|
|||||||
const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid, float speed,
|
const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid, float speed,
|
||||||
SherpaOnnxGeneratedAudioCallback callback) {
|
SherpaOnnxGeneratedAudioCallback callback) {
|
||||||
auto wrapper = [callback](const float *samples, int32_t n,
|
auto wrapper = [callback](const float *samples, int32_t n,
|
||||||
float /*progress*/) { callback(samples, n); };
|
float /*progress*/) {
|
||||||
|
return callback(samples, n);
|
||||||
|
};
|
||||||
|
|
||||||
return SherpaOnnxOfflineTtsGenerateInternal(tts, text, sid, speed, wrapper);
|
return SherpaOnnxOfflineTtsGenerateInternal(tts, text, sid, speed, wrapper);
|
||||||
}
|
}
|
||||||
@@ -975,7 +977,7 @@ SherpaOnnxOfflineTtsGenerateWithProgressCallback(
|
|||||||
const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid, float speed,
|
const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid, float speed,
|
||||||
SherpaOnnxGeneratedAudioProgressCallback callback) {
|
SherpaOnnxGeneratedAudioProgressCallback callback) {
|
||||||
auto wrapper = [callback](const float *samples, int32_t n, float progress) {
|
auto wrapper = [callback](const float *samples, int32_t n, float progress) {
|
||||||
callback(samples, n, progress);
|
return callback(samples, n, progress);
|
||||||
};
|
};
|
||||||
return SherpaOnnxOfflineTtsGenerateInternal(tts, text, sid, speed, wrapper);
|
return SherpaOnnxOfflineTtsGenerateInternal(tts, text, sid, speed, wrapper);
|
||||||
}
|
}
|
||||||
@@ -985,7 +987,7 @@ const SherpaOnnxGeneratedAudio *SherpaOnnxOfflineTtsGenerateWithCallbackWithArg(
|
|||||||
SherpaOnnxGeneratedAudioCallbackWithArg callback, void *arg) {
|
SherpaOnnxGeneratedAudioCallbackWithArg callback, void *arg) {
|
||||||
auto wrapper = [callback, arg](const float *samples, int32_t n,
|
auto wrapper = [callback, arg](const float *samples, int32_t n,
|
||||||
float /*progress*/) {
|
float /*progress*/) {
|
||||||
callback(samples, n, arg);
|
return callback(samples, n, arg);
|
||||||
};
|
};
|
||||||
|
|
||||||
return SherpaOnnxOfflineTtsGenerateInternal(tts, text, sid, speed, wrapper);
|
return SherpaOnnxOfflineTtsGenerateInternal(tts, text, sid, speed, wrapper);
|
||||||
|
|||||||
@@ -850,14 +850,17 @@ SHERPA_ONNX_API typedef struct SherpaOnnxGeneratedAudio {
|
|||||||
int32_t sample_rate;
|
int32_t sample_rate;
|
||||||
} SherpaOnnxGeneratedAudio;
|
} SherpaOnnxGeneratedAudio;
|
||||||
|
|
||||||
typedef void (*SherpaOnnxGeneratedAudioCallback)(const float *samples,
|
// If the callback returns 0, then it stops generating
|
||||||
int32_t n);
|
// If the callback returns 1, then it keeps generating
|
||||||
|
typedef int32_t (*SherpaOnnxGeneratedAudioCallback)(const float *samples,
|
||||||
|
int32_t n);
|
||||||
|
|
||||||
typedef void (*SherpaOnnxGeneratedAudioCallbackWithArg)(const float *samples,
|
typedef int32_t (*SherpaOnnxGeneratedAudioCallbackWithArg)(const float *samples,
|
||||||
int32_t n, void *arg);
|
int32_t n,
|
||||||
|
void *arg);
|
||||||
|
|
||||||
typedef void (*SherpaOnnxGeneratedAudioProgressCallback)(const float *samples,
|
typedef int32_t (*SherpaOnnxGeneratedAudioProgressCallback)(
|
||||||
int32_t n, float p);
|
const float *samples, int32_t n, float p);
|
||||||
|
|
||||||
SHERPA_ONNX_API typedef struct SherpaOnnxOfflineTts SherpaOnnxOfflineTts;
|
SHERPA_ONNX_API typedef struct SherpaOnnxOfflineTts SherpaOnnxOfflineTts;
|
||||||
|
|
||||||
|
|||||||
@@ -216,9 +216,11 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl {
|
|||||||
|
|
||||||
GeneratedAudio ans;
|
GeneratedAudio ans;
|
||||||
|
|
||||||
|
int32_t should_continue = 1;
|
||||||
|
|
||||||
int32_t k = 0;
|
int32_t k = 0;
|
||||||
|
|
||||||
for (int32_t b = 0; b != num_batches; ++b) {
|
for (int32_t b = 0; b != num_batches && should_continue; ++b) {
|
||||||
batch.clear();
|
batch.clear();
|
||||||
for (int32_t i = 0; i != batch_size; ++i, ++k) {
|
for (int32_t i = 0; i != batch_size; ++i, ++k) {
|
||||||
batch.push_back(std::move(x[k]));
|
batch.push_back(std::move(x[k]));
|
||||||
@@ -229,8 +231,8 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl {
|
|||||||
ans.samples.insert(ans.samples.end(), audio.samples.begin(),
|
ans.samples.insert(ans.samples.end(), audio.samples.begin(),
|
||||||
audio.samples.end());
|
audio.samples.end());
|
||||||
if (callback) {
|
if (callback) {
|
||||||
callback(audio.samples.data(), audio.samples.size(),
|
should_continue = callback(audio.samples.data(), audio.samples.size(),
|
||||||
b * 1.0 / num_batches);
|
b * 1.0 / num_batches);
|
||||||
// Caution(fangjun): audio is freed when the callback returns, so users
|
// Caution(fangjun): audio is freed when the callback returns, so users
|
||||||
// should copy the data if they want to access the data after
|
// should copy the data if they want to access the data after
|
||||||
// the callback returns to avoid segmentation fault.
|
// the callback returns to avoid segmentation fault.
|
||||||
@@ -238,7 +240,7 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl {
|
|||||||
}
|
}
|
||||||
|
|
||||||
batch.clear();
|
batch.clear();
|
||||||
while (k < static_cast<int32_t>(x.size())) {
|
while (k < static_cast<int32_t>(x.size()) && should_continue) {
|
||||||
batch.push_back(std::move(x[k]));
|
batch.push_back(std::move(x[k]));
|
||||||
++k;
|
++k;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -59,7 +59,9 @@ struct GeneratedAudio {
|
|||||||
|
|
||||||
class OfflineTtsImpl;
|
class OfflineTtsImpl;
|
||||||
|
|
||||||
using GeneratedAudioCallback = std::function<void(
|
// If the callback returns 0, then it stop generating
|
||||||
|
// if the callback returns 1, then it keeps generating
|
||||||
|
using GeneratedAudioCallback = std::function<int32_t(
|
||||||
const float * /*samples*/, int32_t /*n*/, float /*progress*/)>;
|
const float * /*samples*/, int32_t /*n*/, float /*progress*/)>;
|
||||||
|
|
||||||
class OfflineTts {
|
class OfflineTts {
|
||||||
|
|||||||
@@ -44,13 +44,20 @@ static void Handler(int32_t /*sig*/) {
|
|||||||
fprintf(stderr, "\nCaught Ctrl + C. Exiting\n");
|
fprintf(stderr, "\nCaught Ctrl + C. Exiting\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
static void AudioGeneratedCallback(const float *s, int32_t n,
|
static int32_t AudioGeneratedCallback(const float *s, int32_t n,
|
||||||
float /*progress*/) {
|
float /*progress*/) {
|
||||||
if (n > 0) {
|
if (n > 0) {
|
||||||
std::lock_guard<std::mutex> lock(g_buffer.mutex);
|
std::lock_guard<std::mutex> lock(g_buffer.mutex);
|
||||||
g_buffer.samples.push({s, s + n});
|
g_buffer.samples.push({s, s + n});
|
||||||
g_cv.notify_all();
|
g_cv.notify_all();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (g_killed) {
|
||||||
|
return 0; // stop generating
|
||||||
|
}
|
||||||
|
|
||||||
|
// continue generating
|
||||||
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void StartPlayback(const std::string &device_name, int32_t sample_rate) {
|
static void StartPlayback(const std::string &device_name, int32_t sample_rate) {
|
||||||
|
|||||||
@@ -47,8 +47,8 @@ static void Handler(int32_t /*sig*/) {
|
|||||||
fprintf(stderr, "\nCaught Ctrl + C. Exiting\n");
|
fprintf(stderr, "\nCaught Ctrl + C. Exiting\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
static void AudioGeneratedCallback(const float *s, int32_t n,
|
static int32_t AudioGeneratedCallback(const float *s, int32_t n,
|
||||||
float /*progress*/) {
|
float /*progress*/) {
|
||||||
if (n > 0) {
|
if (n > 0) {
|
||||||
Samples samples;
|
Samples samples;
|
||||||
samples.data = std::vector<float>{s, s + n};
|
samples.data = std::vector<float>{s, s + n};
|
||||||
@@ -57,6 +57,12 @@ static void AudioGeneratedCallback(const float *s, int32_t n,
|
|||||||
g_buffer.samples.push(std::move(samples));
|
g_buffer.samples.push(std::move(samples));
|
||||||
g_started = true;
|
g_started = true;
|
||||||
}
|
}
|
||||||
|
if (g_killed) {
|
||||||
|
return 0; // stop generating
|
||||||
|
}
|
||||||
|
|
||||||
|
// continue generating
|
||||||
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int PlayCallback(const void * /*in*/, void *out,
|
static int PlayCallback(const void * /*in*/, void *out,
|
||||||
|
|||||||
@@ -9,8 +9,9 @@
|
|||||||
#include "sherpa-onnx/csrc/parse-options.h"
|
#include "sherpa-onnx/csrc/parse-options.h"
|
||||||
#include "sherpa-onnx/csrc/wave-writer.h"
|
#include "sherpa-onnx/csrc/wave-writer.h"
|
||||||
|
|
||||||
void audioCallback(const float * /*samples*/, int32_t n, float progress) {
|
int32_t audioCallback(const float * /*samples*/, int32_t n, float progress) {
|
||||||
printf("sample=%d, progress=%f\n", n, progress);
|
printf("sample=%d, progress=%f\n", n, progress);
|
||||||
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
int main(int32_t argc, char *argv[]) {
|
int main(int32_t argc, char *argv[]) {
|
||||||
|
|||||||
@@ -1,3 +1,7 @@
|
|||||||
|
## 1.10.1
|
||||||
|
|
||||||
|
* Enable to stop TTS generation
|
||||||
|
|
||||||
## 1.10.0
|
## 1.10.0
|
||||||
|
|
||||||
* Add inverse text normalization
|
* Add inverse text normalization
|
||||||
|
|||||||
@@ -326,7 +326,7 @@ typedef SherpaOnnxDestroyOfflineTtsGeneratedAudioNative = Void Function(
|
|||||||
typedef SherpaOnnxDestroyOfflineTtsGeneratedAudio = void Function(
|
typedef SherpaOnnxDestroyOfflineTtsGeneratedAudio = void Function(
|
||||||
Pointer<SherpaOnnxGeneratedAudio>);
|
Pointer<SherpaOnnxGeneratedAudio>);
|
||||||
|
|
||||||
typedef SherpaOnnxGeneratedAudioCallbackNative = Void Function(
|
typedef SherpaOnnxGeneratedAudioCallbackNative = Int Function(
|
||||||
Pointer<Float>, Int32);
|
Pointer<Float>, Int32);
|
||||||
|
|
||||||
typedef SherpaOnnxOfflineTtsGenerateWithCallbackNative
|
typedef SherpaOnnxOfflineTtsGenerateWithCallbackNative
|
||||||
|
|||||||
@@ -149,7 +149,7 @@ class OfflineTts {
|
|||||||
{required String text,
|
{required String text,
|
||||||
int sid = 0,
|
int sid = 0,
|
||||||
double speed = 1.0,
|
double speed = 1.0,
|
||||||
required void Function(Float32List samples) callback}) {
|
required int Function(Float32List samples) callback}) {
|
||||||
// see
|
// see
|
||||||
// https://github.com/dart-lang/sdk/issues/54276#issuecomment-1846109285
|
// https://github.com/dart-lang/sdk/issues/54276#issuecomment-1846109285
|
||||||
// https://stackoverflow.com/questions/69537440/callbacks-in-dart-dartffi-only-supports-calling-static-dart-functions-from-nat
|
// https://stackoverflow.com/questions/69537440/callbacks-in-dart-dartffi-only-supports-calling-static-dart-functions-from-nat
|
||||||
@@ -159,8 +159,8 @@ class OfflineTts {
|
|||||||
(Pointer<Float> samples, int n) {
|
(Pointer<Float> samples, int n) {
|
||||||
final s = samples.asTypedList(n);
|
final s = samples.asTypedList(n);
|
||||||
final newSamples = Float32List.fromList(s);
|
final newSamples = Float32List.fromList(s);
|
||||||
callback(newSamples);
|
return callback(newSamples);
|
||||||
});
|
}, exceptionalReturn: 0);
|
||||||
|
|
||||||
final Pointer<Utf8> textPtr = text.toNativeUtf8();
|
final Pointer<Utf8> textPtr = text.toNativeUtf8();
|
||||||
final p = SherpaOnnxBindings.offlineTtsGenerateWithCallback
|
final p = SherpaOnnxBindings.offlineTtsGenerateWithCallback
|
||||||
|
|||||||
@@ -186,14 +186,42 @@ Java_com_k2fsa_sherpa_onnx_OfflineTts_generateWithCallbackImpl(
|
|||||||
const char *p_text = env->GetStringUTFChars(text, nullptr);
|
const char *p_text = env->GetStringUTFChars(text, nullptr);
|
||||||
SHERPA_ONNX_LOGE("string is: %s", p_text);
|
SHERPA_ONNX_LOGE("string is: %s", p_text);
|
||||||
|
|
||||||
std::function<void(const float *, int32_t, float)> callback_wrapper =
|
std::function<int32_t(const float *, int32_t, float)> callback_wrapper =
|
||||||
[env, callback](const float *samples, int32_t n, float /*progress*/) {
|
[env, callback](const float *samples, int32_t n, float /*progress*/) {
|
||||||
jclass cls = env->GetObjectClass(callback);
|
jclass cls = env->GetObjectClass(callback);
|
||||||
jmethodID mid = env->GetMethodID(cls, "invoke", "([F)V");
|
|
||||||
|
#if 0
|
||||||
|
// this block is for debugging only
|
||||||
|
// see also
|
||||||
|
// https://jnjosh.com/posts/kotlinfromcpp/
|
||||||
|
jmethodID classMethodId =
|
||||||
|
env->GetMethodID(cls, "getClass", "()Ljava/lang/Class;");
|
||||||
|
jobject klassObj = env->CallObjectMethod(callback, classMethodId);
|
||||||
|
auto klassObject = env->GetObjectClass(klassObj);
|
||||||
|
auto nameMethodId =
|
||||||
|
env->GetMethodID(klassObject, "getName", "()Ljava/lang/String;");
|
||||||
|
jstring classString =
|
||||||
|
(jstring)env->CallObjectMethod(klassObj, nameMethodId);
|
||||||
|
auto className = env->GetStringUTFChars(classString, NULL);
|
||||||
|
SHERPA_ONNX_LOGE("name is: %s", className);
|
||||||
|
env->ReleaseStringUTFChars(classString, className);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
jmethodID mid =
|
||||||
|
env->GetMethodID(cls, "invoke", "([F)Ljava/lang/Integer;");
|
||||||
|
if (mid == nullptr) {
|
||||||
|
SHERPA_ONNX_LOGE("Failed to get the callback. Ignore it.");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
jfloatArray samples_arr = env->NewFloatArray(n);
|
jfloatArray samples_arr = env->NewFloatArray(n);
|
||||||
env->SetFloatArrayRegion(samples_arr, 0, n, samples);
|
env->SetFloatArrayRegion(samples_arr, 0, n, samples);
|
||||||
env->CallVoidMethod(callback, mid, samples_arr);
|
|
||||||
|
jobject should_continue =
|
||||||
|
env->CallObjectMethod(callback, mid, samples_arr);
|
||||||
|
jclass jklass = env->GetObjectClass(should_continue);
|
||||||
|
jmethodID int_value_mid = env->GetMethodID(jklass, "intValue", "()I");
|
||||||
|
return env->CallIntMethod(should_continue, int_value_mid);
|
||||||
};
|
};
|
||||||
|
|
||||||
auto audio = reinterpret_cast<sherpa_onnx::OfflineTts *>(ptr)->Generate(
|
auto audio = reinterpret_cast<sherpa_onnx::OfflineTts *>(ptr)->Generate(
|
||||||
|
|||||||
@@ -57,13 +57,13 @@ void PybindOfflineTts(py::module *m) {
|
|||||||
"generate",
|
"generate",
|
||||||
[](const PyClass &self, const std::string &text, int64_t sid,
|
[](const PyClass &self, const std::string &text, int64_t sid,
|
||||||
float speed,
|
float speed,
|
||||||
std::function<void(py::array_t<float>, float)> callback)
|
std::function<int32_t(py::array_t<float>, float)> callback)
|
||||||
-> GeneratedAudio {
|
-> GeneratedAudio {
|
||||||
if (!callback) {
|
if (!callback) {
|
||||||
return self.Generate(text, sid, speed);
|
return self.Generate(text, sid, speed);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::function<void(const float *, int32_t, float)>
|
std::function<int32_t(const float *, int32_t, float)>
|
||||||
callback_wrapper = [callback](const float *samples, int32_t n,
|
callback_wrapper = [callback](const float *samples, int32_t n,
|
||||||
float progress) {
|
float progress) {
|
||||||
// CAUTION(fangjun): we have to copy samples since it is
|
// CAUTION(fangjun): we have to copy samples since it is
|
||||||
@@ -75,7 +75,7 @@ void PybindOfflineTts(py::module *m) {
|
|||||||
py::buffer_info buf = array.request();
|
py::buffer_info buf = array.request();
|
||||||
auto p = static_cast<float *>(buf.ptr);
|
auto p = static_cast<float *>(buf.ptr);
|
||||||
std::copy(samples, samples + n, p);
|
std::copy(samples, samples + n, p);
|
||||||
callback(array, progress);
|
return callback(array, progress);
|
||||||
};
|
};
|
||||||
|
|
||||||
return self.Generate(text, sid, speed, callback_wrapper);
|
return self.Generate(text, sid, speed, callback_wrapper);
|
||||||
|
|||||||
Reference in New Issue
Block a user