diff --git a/.github/workflows/go.yaml b/.github/workflows/go.yaml new file mode 100644 index 00000000..b19a801e --- /dev/null +++ b/.github/workflows/go.yaml @@ -0,0 +1,239 @@ +name: test-go + +on: + push: + branches: + - master + tags: + - '*' + pull_request: + branches: + - master + + workflow_dispatch: + +concurrency: + group: go-${{ github.ref }} + cancel-in-progress: true + +jobs: + go: + name: go ${{ matrix.os }} ${{matrix.arch }} + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + include: + - os: ubuntu-latest + arch: amd64 + - os: macos-latest + arch: amd64 + - os: windows-latest + arch: x64 + - os: windows-latest + arch: x86 # use 386 for GOARCH + + steps: + - uses: actions/checkout@v2 + with: + fetch-depth: 0 + - uses: actions/setup-go@v4 + with: + go-version: '>=1.20' + + - name: Display go version + shell: bash + run: | + go version + go env GOPATH + go env GOARCH + + - name: Set up MinGW + if: matrix.os == 'windows-latest' + uses: egor-tensin/setup-mingw@v2 + with: + platform: ${{ matrix.arch }} + + - name: Show gcc + if: matrix.os == 'windows-latest' + run: | + gcc --version + + - name: Test non-streaming decoding files (Linux/macOS) + if: matrix.os != 'windows-latest' + shell: bash + run: | + cd go-api-examples/non-streaming-decode-files + ls -lh + go mod tidy + cat go.mod + go build -x + ls -lh + + git lfs install + + echo "Test transducer" + git clone https://huggingface.co/csukuangfj/sherpa-onnx-zipformer-en-2023-06-26 + ./run-transducer.sh + rm -rf sherpa-onnx-zipformer-en-2023-06-26 + + echo "Test paraformer" + git clone https://huggingface.co/csukuangfj/sherpa-onnx-paraformer-zh-2023-03-28 + ./run-paraformer.sh + rm -rf sherpa-onnx-paraformer-zh-2023-03-28 + + echo "Test NeMo CTC" + git clone https://huggingface.co/csukuangfj/sherpa-onnx-nemo-ctc-en-conformer-medium + ./run-nemo-ctc.sh + rm -rf sherpa-onnx-nemo-ctc-en-conformer-medium + + - name: Test non-streaming decoding files (Win64) + if: matrix.os == 'windows-latest' && matrix.arch == 'x64' + shell: bash + run: | + cd go-api-examples/non-streaming-decode-files + ls -lh + go mod tidy + cat go.mod + go build + ls -lh + + echo $PWD + ls -lh /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/ + ls -lh /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/* + cp -v /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/sherpa-onnx-go-windows*/lib/x86_64-pc-windows-gnu/*.dll . + ls -lh + + git lfs install + + echo "Test transducer" + git clone https://huggingface.co/csukuangfj/sherpa-onnx-zipformer-en-2023-06-26 + ./run-transducer.sh + rm -rf sherpa-onnx-zipformer-en-2023-06-26 + + echo "Test paraformer" + git clone https://huggingface.co/csukuangfj/sherpa-onnx-paraformer-zh-2023-03-28 + ./run-paraformer.sh + rm -rf sherpa-onnx-paraformer-zh-2023-03-28 + + echo "Test NeMo CTC" + git clone https://huggingface.co/csukuangfj/sherpa-onnx-nemo-ctc-en-conformer-medium + ./run-nemo-ctc.sh + rm -rf sherpa-onnx-nemo-ctc-en-conformer-medium + + - name: Test non-streaming decoding files (Win32) + if: matrix.os == 'windows-latest' && matrix.arch == 'x86' + shell: bash + run: | + cd go-api-examples/non-streaming-decode-files + ls -lh + go mod tidy + cat go.mod + ls -lh + + go env GOARCH + go env + echo "------------------------------" + go env -w GOARCH=386 + go env -w CGO_ENABLED=1 + go env + + go clean + go build -x + + echo $PWD + ls -lh /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/ + cp -v /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/sherpa-onnx-go-windows*/lib/i686-pc-windows-gnu/*.dll . + ls -lh + + git lfs install + + echo "Test transducer" + git clone https://huggingface.co/csukuangfj/sherpa-onnx-zipformer-en-2023-06-26 + ./run-transducer.sh + rm -rf sherpa-onnx-zipformer-en-2023-06-26 + + echo "Test paraformer" + git clone https://huggingface.co/csukuangfj/sherpa-onnx-paraformer-zh-2023-03-28 + ./run-paraformer.sh + rm -rf sherpa-onnx-paraformer-zh-2023-03-28 + + echo "Test NeMo CTC" + git clone https://huggingface.co/csukuangfj/sherpa-onnx-nemo-ctc-en-conformer-medium + ./run-nemo-ctc.sh + rm -rf sherpa-onnx-nemo-ctc-en-conformer-medium + + - name: Test streaming decoding files (Linux/macOS) + if: matrix.os != 'windows-latest' + shell: bash + run: | + cd go-api-examples/streaming-decode-files + ls -lh + go mod tidy + cat go.mod + go build -x + ls -lh + + git lfs install + + echo "Test transducer" + git clone https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-en-2023-06-26 + ./run.sh + rm -rf sherpa-onnx-streaming-zipformer-en-2023-06-26 + + - name: Test streaming decoding files (Win64) + if: matrix.os == 'windows-latest' && matrix.arch == 'x64' + shell: bash + run: | + cd go-api-examples/streaming-decode-files + ls -lh + go mod tidy + cat go.mod + go build + ls -lh + + echo $PWD + ls -lh /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/ + ls -lh /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/* + cp -v /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/sherpa-onnx-go-windows*/lib/x86_64-pc-windows-gnu/*.dll . + ls -lh + + git lfs install + + echo "Test transducer" + git clone https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-en-2023-06-26 + ./run.sh + rm -rf sherpa-onnx-streaming-zipformer-en-2023-06-26 + + - name: Test streaming decoding files (Win32) + if: matrix.os == 'windows-latest' && matrix.arch == 'x86' + shell: bash + run: | + cd go-api-examples/streaming-decode-files + ls -lh + go mod tidy + cat go.mod + ls -lh + + go env GOARCH + go env + echo "------------------------------" + go env -w GOARCH=386 + go env -w CGO_ENABLED=1 + go env + + go clean + go build -x + + echo $PWD + ls -lh /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/ + ls -lh /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/* + cp -v /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/sherpa-onnx-go-windows*/lib/i686-pc-windows-gnu/*.dll . + ls -lh + + git lfs install + + echo "Test transducer" + git clone https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-en-2023-06-26 + ./run.sh + rm -rf sherpa-onnx-streaming-zipformer-en-2023-06-26 diff --git a/.github/workflows/release-go.yaml b/.github/workflows/release-go.yaml new file mode 100644 index 00000000..27596029 --- /dev/null +++ b/.github/workflows/release-go.yaml @@ -0,0 +1,198 @@ +name: release-go + +on: + push: + branches: + - master + tags: + - '*' + + workflow_dispatch: + inputs: + version: + description: "Version information(e.g., 1.5.3) or auto" + required: true + +env: + VERSION: + |- # Enter release tag name or version name in workflow_dispatch. Appropriate version if not specified + ${{ github.event.release.tag_name || github.event.inputs.version }} + +concurrency: + group: release-go-${{ github.ref }} + cancel-in-progress: true + +jobs: + linux-x86_64_wheel: + if: github.repository_owner == 'k2-fsa' || github.repository_owner == 'csukuangfj' + name: Linux x86_64 + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest] + + steps: + - uses: actions/checkout@v2 + + - name: SSH to GitHub + run: | + mkdir -p ~/.ssh/ + cp scripts/go/ssh_config ~/.ssh/config + echo "${{ secrets.MY_GITHUB_SSH_KEY }}" > ~/.ssh/github && chmod 600 ~/.ssh/github + ssh github.com || true + rm ~/.ssh/github + + # see https://cibuildwheel.readthedocs.io/en/stable/changelog/ + # for a list of versions + - name: Build wheels + uses: pypa/cibuildwheel@v2.11.4 + env: + CIBW_BEFORE_BUILD: "pip install -U cmake numpy" + CIBW_BUILD: "cp38-*64" + CIBW_SKIP: "cp27-* cp35-* cp36-* *-win32 pp* *-musllinux* *-manylinux_i686" + CIBW_BUILD_VERBOSITY: 3 + CIBW_ENVIRONMENT_LINUX: LD_LIBRARY_PATH='/project/build/bdist.linux-x86_64/wheel/sherpa_onnx/lib' + + - name: Display wheels + shell: bash + run: | + ls -lh ./wheelhouse/*.whl + unzip -l ./wheelhouse/*.whl + + - uses: actions/upload-artifact@v2 + with: + name: ${{ matrix.os }}-wheels-for-go + path: ./wheelhouse/*.whl + + macOS: + name: macOS ${{ matrix.arch }} + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [macos-latest] + arch: [x86_64, arm64] + + steps: + - uses: actions/checkout@v2 + - name: Configure CMake + shell: bash + run: | + mkdir build + cd build + cmake -D CMAKE_BUILD_TYPE=${{ matrix.build_type }} -D BUILD_SHARED_LIBS=ON -DCMAKE_OSX_ARCHITECTURES=${{ matrix.arch }} -DCMAKE_INSTALL_PREFIX=./install .. + + - name: Build sherpa-onnx for macOS ${{ matrix.arch }} + shell: bash + run: | + cd build + make -j2 + make install + + ls -lh lib + ls -lh bin + + file install/lib/lib* + + - uses: actions/upload-artifact@v2 + with: + name: ${{ matrix.os }}-for-${{ matrix.arch }} + path: ./build/install/lib/ + + windows: + name: Windows ${{ matrix.arch }} + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [windows-latest] + arch: [x64, Win32] + steps: + - uses: actions/checkout@v2 + with: + fetch-depth: 0 + + - name: Configure CMake + shell: bash + run: | + mkdir build + cd build + cmake -A ${{ matrix.arch }} -D CMAKE_BUILD_TYPE=Release -D BUILD_SHARED_LIBS=ON -DCMAKE_INSTALL_PREFIX=./install .. + + - name: Build sherpa-onnx for windows + shell: bash + run: | + cd build + cmake --build . --config Release -- -m:2 + cmake --build . --config Release --target install -- -m:2 + + ls -lh install/* + + ls -lh install/lib + ls -lh install/bin + + - name: Upload artifact + uses: actions/upload-artifact@v2 + with: + name: sherpa-onnx-go-windows-${{ matrix.arch }} + path: ./build/install/lib/ + + Release: + name: Release + runs-on: ubuntu-latest + needs: [linux-x86_64_wheel, macOS, windows] + + steps: + - uses: actions/checkout@v2 + + - name: Add SSH key + run: | + mkdir -p ~/.ssh/ + cp scripts/go/ssh_config ~/.ssh/config + echo "${{ secrets.MY_GITHUB_SSH_KEY }}" > ~/.ssh/github && chmod 600 ~/.ssh/github + ssh github.com || true + + - name: Retrieve artifact from ubuntu-latest + uses: actions/download-artifact@v2 + with: + name: ubuntu-latest-wheels-for-go + path: ./linux + + - name: Retrieve artifact from macos-latest (x86_64) + uses: actions/download-artifact@v2 + with: + name: macos-latest-for-x86_64 + path: ./macos-x86_64 + + - name: Retrieve artifact from macos-latest (arm64) + uses: actions/download-artifact@v2 + with: + name: macos-latest-for-arm64 + path: ./macos-arm64 + + - name: Retrieve artifact from windows-latest (x64) + uses: actions/download-artifact@v2 + with: + name: sherpa-onnx-go-windows-x64 + path: ./windows-x64 + + - name: Retrieve artifact from windows-latest (Win32) + uses: actions/download-artifact@v2 + with: + name: sherpa-onnx-go-windows-Win32 + path: ./windows-win32 + + - name: Unzip Ubuntu wheels + shell: bash + run: | + cd linux + ls -lh + unzip ./*.whl + tree . + + - name: Release go + if: env.VERSION != '' + shell: bash + run: | + ./scripts/go/release.sh diff --git a/.github/workflows/test-pip-install.yaml b/.github/workflows/test-pip-install.yaml index 9b7d7cc6..6467fc22 100644 --- a/.github/workflows/test-pip-install.yaml +++ b/.github/workflows/test-pip-install.yaml @@ -28,7 +28,7 @@ jobs: fail-fast: false matrix: os: [ubuntu-latest, windows-latest, macos-latest] - python-version: ["3.7", "3.8", "3.9", "3.10"] + python-version: ["3.8", "3.9", "3.10"] steps: - uses: actions/checkout@v2 diff --git a/cmake/onnxruntime-darwin-arm64.cmake b/cmake/onnxruntime-osx-arm64.cmake similarity index 94% rename from cmake/onnxruntime-darwin-arm64.cmake rename to cmake/onnxruntime-osx-arm64.cmake index b89ad2ba..90c4f119 100644 --- a/cmake/onnxruntime-darwin-arm64.cmake +++ b/cmake/onnxruntime-osx-arm64.cmake @@ -8,10 +8,6 @@ if(NOT CMAKE_SYSTEM_NAME STREQUAL Darwin) message(FATAL_ERROR "This file is for macOS only. Given: ${CMAKE_SYSTEM_NAME}") endif() -if(NOT CMAKE_SYSTEM_PROCESSOR STREQUAL arm64) - message(FATAL_ERROR "This file is for arm64 only. Given: ${CMAKE_SYSTEM_PROCESSOR}") -endif() - set(onnxruntime_URL "https://github.com/microsoft/onnxruntime/releases/download/v1.15.1/onnxruntime-osx-arm64-1.15.1.tgz") set(onnxruntime_URL2 "https://huggingface.co/csukuangfj/sherpa-onnx-cmake-deps/resolve/main/onnxruntime-osx-arm64-1.15.1.tgz") set(onnxruntime_HASH "SHA256=df97832fc7907c6677a6da437f92339d84a462becb74b1d65217fcb859ee9460") diff --git a/cmake/onnxruntime-darwin-universal.cmake b/cmake/onnxruntime-osx-universal.cmake similarity index 100% rename from cmake/onnxruntime-darwin-universal.cmake rename to cmake/onnxruntime-osx-universal.cmake diff --git a/cmake/onnxruntime-darwin-x86_64.cmake b/cmake/onnxruntime-osx-x86_64.cmake similarity index 94% rename from cmake/onnxruntime-darwin-x86_64.cmake rename to cmake/onnxruntime-osx-x86_64.cmake index c3663977..ba707ceb 100644 --- a/cmake/onnxruntime-darwin-x86_64.cmake +++ b/cmake/onnxruntime-osx-x86_64.cmake @@ -8,10 +8,6 @@ if(NOT CMAKE_SYSTEM_NAME STREQUAL Darwin) message(FATAL_ERROR "This file is for macOS only. Given: ${CMAKE_SYSTEM_NAME}") endif() -if(NOT CMAKE_SYSTEM_PROCESSOR STREQUAL x86_64) - message(FATAL_ERROR "This file is for x86_64 only. Given: ${CMAKE_SYSTEM_PROCESSOR}") -endif() - set(onnxruntime_URL "https://github.com/microsoft/onnxruntime/releases/download/v1.15.1/onnxruntime-osx-x86_64-1.15.1.tgz") set(onnxruntime_URL2 "https://huggingface.co/csukuangfj/sherpa-onnx-cmake-deps/resolve/main/onnxruntime-osx-x86_64-1.15.1.tgz") set(onnxruntime_HASH "SHA256=4b66ebbca24b8b96f6b74655fee3610a7e529b4e01f6790632f24ee82b778e5a") diff --git a/cmake/onnxruntime.cmake b/cmake/onnxruntime.cmake index abd35887..4e027e3a 100644 --- a/cmake/onnxruntime.cmake +++ b/cmake/onnxruntime.cmake @@ -16,12 +16,18 @@ function(download_onnxruntime) include(onnxruntime-linux-x86_64) endif() elseif(CMAKE_SYSTEM_NAME STREQUAL Darwin) - if (arm64 IN_LIST CMAKE_OSX_ARCHITECTURES OR x86_64 IN_LIST CMAKE_OSX_ARCHITECTURES) - include(onnxruntime-darwin-universal) - elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL x86_64) - include(onnxruntime-darwin-x86_64) + if (arm64 IN_LIST CMAKE_OSX_ARCHITECTURES AND x86_64 IN_LIST CMAKE_OSX_ARCHITECTURES) + include(onnxruntime-osx-universal) + elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL x86_64 AND CMAKE_OSX_ARCHITECTURES STREQUAL "arm64") + # cross compiling + include(onnxruntime-osx-arm64) + elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL arm64 AND CMAKE_OSX_ARCHITECTURES STREQUAL "x86_64") + # cross compiling + include(onnxruntime-osx-x86_64) elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL arm64) - include(onnxruntime-darwin-arm64) + include(onnxruntime-osx-arm64) + elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL x86_64) + include(onnxruntime-osx-x86_64) else() message(FATAL_ERROR "Unsupport processor {CMAKE_SYSTEM_PROCESSOR} for Darwin") endif() diff --git a/go-api-examples/non-streaming-decode-files/.gitignore b/go-api-examples/non-streaming-decode-files/.gitignore new file mode 100644 index 00000000..0ea122c7 --- /dev/null +++ b/go-api-examples/non-streaming-decode-files/.gitignore @@ -0,0 +1,2 @@ +non-streaming-decode-files +sherpa-onnx-zipformer-en-2023-06-26 diff --git a/go-api-examples/non-streaming-decode-files/go.mod b/go-api-examples/non-streaming-decode-files/go.mod new file mode 100644 index 00000000..a5590aa4 --- /dev/null +++ b/go-api-examples/non-streaming-decode-files/go.mod @@ -0,0 +1,17 @@ +module non-streaming-decode-files + +go 1.20 + +require ( + github.com/k2-fsa/sherpa-onnx-go v1.5.3-alpha.8 + github.com/spf13/pflag v1.0.5 + github.com/youpy/go-wav v0.3.2 +) + +require ( + github.com/k2-fsa/sherpa-onnx-go-linux v1.5.3-alpha.6 // indirect + github.com/k2-fsa/sherpa-onnx-go-macos v1.5.3-alpha.4 // indirect + github.com/k2-fsa/sherpa-onnx-go-windows v1.5.3-alpha.5 // indirect + github.com/youpy/go-riff v0.1.0 // indirect + github.com/zaf/g711 v0.0.0-20190814101024-76a4a538f52b // indirect +) diff --git a/go-api-examples/non-streaming-decode-files/go.sum b/go-api-examples/non-streaming-decode-files/go.sum new file mode 100644 index 00000000..d289d57c --- /dev/null +++ b/go-api-examples/non-streaming-decode-files/go.sum @@ -0,0 +1,33 @@ +github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/google/go-cmp v0.5.6 h1:BKbKCqvP6I+rmFHt06ZmyQtvB8xAkWdhFyr0ZUNZcxQ= +github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/k2-fsa/sherpa-onnx-go v1.5.3-alpha.8 h1:BXc31pWwd7CJLM6m9HavxqiyYJdN3Jc9I26pd4x+JHE= +github.com/k2-fsa/sherpa-onnx-go v1.5.3-alpha.8/go.mod h1:kszL/pwg9XTpRGi1AYW/aSwdhRBqb6LN0SjXR0jnsBo= +github.com/k2-fsa/sherpa-onnx-go-linux v1.5.3-alpha.6 h1:o0+l4Wr3IWkWH+kdt8ZZP55L8mRSpW7h1KzvXcfx9FM= +github.com/k2-fsa/sherpa-onnx-go-linux v1.5.3-alpha.6/go.mod h1:9kU02PSdDdzBApwIDBmE2jWS54WvZbRafOEvW/PVLxE= +github.com/k2-fsa/sherpa-onnx-go-macos v1.5.3-alpha.4 h1:Xri23R3+tQFkNwO6zVxqZbjMRJP40z7JtoSqW6UP6sM= +github.com/k2-fsa/sherpa-onnx-go-macos v1.5.3-alpha.4/go.mod h1:a+AJZKNQkFO+JyzGkHySysYfBzzdcoJI5ITFsnhVcmo= +github.com/k2-fsa/sherpa-onnx-go-windows v1.5.3-alpha.5 h1:OR1LLoptR8W35j5u06KrYUblG35B83HHCPWrVK31uHY= +github.com/k2-fsa/sherpa-onnx-go-windows v1.5.3-alpha.5/go.mod h1:ermMOETZUv0nM7MmnXWqeHREMR6zQXBhJpEP2fbHIZo= +github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= +github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= +github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.6.1 h1:hDPOHmpOpP40lSULcqw7IrRb/u7w6RpDC9399XyoNd0= +github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/youpy/go-riff v0.1.0 h1:vZO/37nI4tIET8tQI0Qn0Y79qQh99aEpponTPiPut7k= +github.com/youpy/go-riff v0.1.0/go.mod h1:83nxdDV4Z9RzrTut9losK7ve4hUnxUR8ASSz4BsKXwQ= +github.com/youpy/go-wav v0.3.2 h1:NLM8L/7yZ0Bntadw/0h95OyUsen+DQIVf9gay+SUsMU= +github.com/youpy/go-wav v0.3.2/go.mod h1:0FCieAXAeSdcxFfwLpRuEo0PFmAoc+8NU34h7TUvk50= +github.com/zaf/g711 v0.0.0-20190814101024-76a4a538f52b h1:QqixIpc5WFIqTLxB3Hq8qs0qImAgBdq0p6rq2Qdl634= +github.com/zaf/g711 v0.0.0-20190814101024-76a4a538f52b/go.mod h1:T2h1zV50R/q0CVYnsQOQ6L7P4a2ZxH47ixWcMXFGyx8= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gotest.tools v2.2.0+incompatible h1:VsBPFP1AI068pPrMxtb/S8Zkgf9xEmTLJjfM+P5UIEo= +gotest.tools v2.2.0+incompatible/go.mod h1:DsYFclhRJ6vuDpmuTbkuFWG+y2sxOXAzmJt81HFBacw= diff --git a/go-api-examples/non-streaming-decode-files/main.go b/go-api-examples/non-streaming-decode-files/main.go new file mode 100644 index 00000000..d2e065c7 --- /dev/null +++ b/go-api-examples/non-streaming-decode-files/main.go @@ -0,0 +1,120 @@ +package main + +import ( + "bytes" + "encoding/binary" + sherpa "github.com/k2-fsa/sherpa-onnx-go/sherpa_onnx" + flag "github.com/spf13/pflag" + "github.com/youpy/go-wav" + "os" + "strings" + + "log" +) + +func main() { + + log.SetFlags(log.LstdFlags | log.Lmicroseconds) + + config := sherpa.OfflineRecognizerConfig{} + config.FeatConfig = sherpa.FeatureConfig{SampleRate: 16000, FeatureDim: 80} + + flag.StringVar(&config.ModelConfig.Transducer.Encoder, "encoder", "", "Path to the encoder model") + flag.StringVar(&config.ModelConfig.Transducer.Decoder, "decoder", "", "Path to the decoder model") + flag.StringVar(&config.ModelConfig.Transducer.Joiner, "joiner", "", "Path to the joiner model") + flag.StringVar(&config.ModelConfig.Paraformer.Model, "paraformer", "", "Path to the paraformer model") + flag.StringVar(&config.ModelConfig.NemoCTC.Model, "nemo-ctc", "", "Path to the NeMo CTC model") + flag.StringVar(&config.ModelConfig.Tokens, "tokens", "", "Path to the tokens file") + flag.IntVar(&config.ModelConfig.NumThreads, "num-threads", 1, "Number of threads for computing") + flag.IntVar(&config.ModelConfig.Debug, "debug", 0, "Whether to show debug message") + flag.StringVar(&config.ModelConfig.ModelType, "model-type", "", "Optional. Used for loading the model in a faster way") + flag.StringVar(&config.ModelConfig.Provider, "provider", "cpu", "Provider to use") + flag.StringVar(&config.LmConfig.Model, "lm-model", "", "Optional. Path to the LM model") + flag.Float32Var(&config.LmConfig.Scale, "lm-scale", 1.0, "Optional. Scale for the LM model") + + flag.StringVar(&config.DecodingMethod, "decoding-method", "greedy_search", "Decoding method. Possible values: greedy_search, modified_beam_search") + flag.IntVar(&config.MaxActivePaths, "max-active-paths", 4, "Used only when --decoding-method is modified_beam_search") + + flag.Parse() + + if len(flag.Args()) != 1 { + log.Fatalf("Please provide one wave file") + } + + log.Println("Reading", flag.Arg(0)) + + samples, sampleRate := readWave(flag.Arg(0)) + + log.Println("Initializing recognizer (may take several seconds)") + recognizer := sherpa.NewOfflineRecognizer(&config) + log.Println("Recognizer created!") + defer sherpa.DeleteOfflineRecognizer(recognizer) + + log.Println("Start decoding!") + stream := sherpa.NewOfflineStream(recognizer) + defer sherpa.DeleteOfflineStream(stream) + + stream.AcceptWaveform(sampleRate, samples) + + recognizer.Decode(stream) + log.Println("Decoding done!") + result := stream.GetResult() + + log.Println(strings.ToLower(result.Text)) + log.Printf("Wave duration: %v seconds", float32(len(samples))/float32(sampleRate)) +} + +func readWave(filename string) (samples []float32, sampleRate int) { + file, _ := os.Open(filename) + defer file.Close() + + reader := wav.NewReader(file) + format, err := reader.Format() + if err != nil { + log.Fatalf("Failed to read wave format") + } + + if format.AudioFormat != 1 { + log.Fatalf("Support only PCM format. Given: %v\n", format.AudioFormat) + } + + if format.NumChannels != 1 { + log.Fatalf("Support only 1 channel wave file. Given: %v\n", format.NumChannels) + } + + if format.BitsPerSample != 16 { + log.Fatalf("Support only 16-bit per sample. Given: %v\n", format.BitsPerSample) + } + + reader.Duration() // so that it initializes reader.Size + + buf := make([]byte, reader.Size) + n, err := reader.Read(buf) + if n != int(reader.Size) { + log.Fatalf("Failed to read %v bytes. Returned %v bytes\n", reader.Size, n) + } + + samples = samplesInt16ToFloat(buf) + sampleRate = int(format.SampleRate) + + return +} + +func samplesInt16ToFloat(inSamples []byte) []float32 { + numSamples := len(inSamples) / 2 + outSamples := make([]float32, numSamples) + + for i := 0; i != numSamples; i++ { + s := inSamples[i*2 : (i+1)*2] + + var s16 int16 + buf := bytes.NewReader(s) + err := binary.Read(buf, binary.LittleEndian, &s16) + if err != nil { + log.Fatal("Failed to parse 16-bit sample") + } + outSamples[i] = float32(s16) / 32768 + } + + return outSamples +} diff --git a/go-api-examples/non-streaming-decode-files/run-nemo-ctc.sh b/go-api-examples/non-streaming-decode-files/run-nemo-ctc.sh new file mode 100755 index 00000000..b895437f --- /dev/null +++ b/go-api-examples/non-streaming-decode-files/run-nemo-ctc.sh @@ -0,0 +1,15 @@ +#!/usr/bin/env bash + +# Please refer to +# https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-ctc/nemo/english.html#stt-en-conformer-ctc-medium +# to download the model +# before you run this script. +# +# You can switch to a different online model if you need + +./non-streaming-decode-files \ + --nemo-ctc ./sherpa-onnx-nemo-ctc-en-conformer-medium/model.onnx \ + --tokens ./sherpa-onnx-nemo-ctc-en-conformer-medium/tokens.txt \ + --model-type nemo_ctc \ + --debug 0 \ + ./sherpa-onnx-nemo-ctc-en-conformer-medium/test_wavs/0.wav diff --git a/go-api-examples/non-streaming-decode-files/run-paraformer.sh b/go-api-examples/non-streaming-decode-files/run-paraformer.sh new file mode 100755 index 00000000..2d1658c2 --- /dev/null +++ b/go-api-examples/non-streaming-decode-files/run-paraformer.sh @@ -0,0 +1,15 @@ +#!/usr/bin/env bash + +# Please refer to +# https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-paraformer/paraformer-models.html#csukuangfj-sherpa-onnx-paraformer-zh-2023-03-28-chinese +# to download the model +# before you run this script. +# +# You can switch to a different online model if you need + +./non-streaming-decode-files \ + --paraformer ./sherpa-onnx-paraformer-zh-2023-03-28/model.int8.onnx \ + --tokens ./sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt \ + --model-type paraformer \ + --debug 0 \ + ./sherpa-onnx-paraformer-zh-2023-03-28/test_wavs/0.wav diff --git a/go-api-examples/non-streaming-decode-files/run-transducer.sh b/go-api-examples/non-streaming-decode-files/run-transducer.sh new file mode 100755 index 00000000..74837089 --- /dev/null +++ b/go-api-examples/non-streaming-decode-files/run-transducer.sh @@ -0,0 +1,17 @@ +#!/usr/bin/env bash + +# Please refer to +# https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-transducer/zipformer-transducer-models.html#csukuangfj-sherpa-onnx-zipformer-en-2023-06-26-english +# to download the model +# before you run this script. +# +# You can switch to a different online model if you need + +./non-streaming-decode-files \ + --encoder ./sherpa-onnx-zipformer-en-2023-06-26/encoder-epoch-99-avg-1.onnx \ + --decoder ./sherpa-onnx-zipformer-en-2023-06-26/decoder-epoch-99-avg-1.onnx \ + --joiner ./sherpa-onnx-zipformer-en-2023-06-26/joiner-epoch-99-avg-1.onnx \ + --tokens ./sherpa-onnx-zipformer-en-2023-06-26/tokens.txt \ + --model-type transducer \ + --debug 0 \ + ./sherpa-onnx-zipformer-en-2023-06-26/test_wavs/0.wav diff --git a/go-api-examples/real-time-speech-recognition-from-microphone/.gitignore b/go-api-examples/real-time-speech-recognition-from-microphone/.gitignore new file mode 100644 index 00000000..5d1048be --- /dev/null +++ b/go-api-examples/real-time-speech-recognition-from-microphone/.gitignore @@ -0,0 +1 @@ +real-time-speech-recognition-from-microphone diff --git a/go-api-examples/real-time-speech-recognition-from-microphone/README.md b/go-api-examples/real-time-speech-recognition-from-microphone/README.md new file mode 100644 index 00000000..c87c185a --- /dev/null +++ b/go-api-examples/real-time-speech-recognition-from-microphone/README.md @@ -0,0 +1,23 @@ +# Introduction + +This examples shows how to use the golang package of [sherpa-onnx][sherpa-onnx] +for real-time speech recognition from microphone. + +It uses +to read the microphone and you have to install `portaudio` first. + +On macOS, you can use + +``` +brew install portaudio +``` + +and it will install `portaudio` into `/usr/local/Cellar/portaudio/19.7.0`. +You need to set the following environment variable +``` +export PKG_CONFIG_PATH=/usr/local/Cellar/portaudio/19.7.0 +``` + +so that `pkg-config --cflags --libs portaudio-2.0` can run successfully. + +[sherpa-onnx]: https://github.com/k2-fsa/sherpa-onnx diff --git a/go-api-examples/real-time-speech-recognition-from-microphone/go.mod b/go-api-examples/real-time-speech-recognition-from-microphone/go.mod new file mode 100644 index 00000000..0f9483ec --- /dev/null +++ b/go-api-examples/real-time-speech-recognition-from-microphone/go.mod @@ -0,0 +1,15 @@ +module real-time-speech-recognition-from-microphone + +go 1.20 + +require ( + github.com/gordonklaus/portaudio v0.0.0-20230709114228-aafa478834f5 + github.com/k2-fsa/sherpa-onnx-go v1.5.3-alpha.8 + github.com/spf13/pflag v1.0.5 +) + +require ( + github.com/k2-fsa/sherpa-onnx-go-linux v1.5.3-alpha.6 // indirect + github.com/k2-fsa/sherpa-onnx-go-macos v1.5.3-alpha.4 // indirect + github.com/k2-fsa/sherpa-onnx-go-windows v1.5.3-alpha.5 // indirect +) diff --git a/go-api-examples/real-time-speech-recognition-from-microphone/go.sum b/go-api-examples/real-time-speech-recognition-from-microphone/go.sum new file mode 100644 index 00000000..5a0e23d4 --- /dev/null +++ b/go-api-examples/real-time-speech-recognition-from-microphone/go.sum @@ -0,0 +1,12 @@ +github.com/gordonklaus/portaudio v0.0.0-20230709114228-aafa478834f5 h1:5AlozfqaVjGYGhms2OsdUyfdJME76E6rx5MdGpjzZpc= +github.com/gordonklaus/portaudio v0.0.0-20230709114228-aafa478834f5/go.mod h1:WY8R6YKlI2ZI3UyzFk7P6yGSuS+hFwNtEzrexRyD7Es= +github.com/k2-fsa/sherpa-onnx-go v1.5.3-alpha.8 h1:BXc31pWwd7CJLM6m9HavxqiyYJdN3Jc9I26pd4x+JHE= +github.com/k2-fsa/sherpa-onnx-go v1.5.3-alpha.8/go.mod h1:kszL/pwg9XTpRGi1AYW/aSwdhRBqb6LN0SjXR0jnsBo= +github.com/k2-fsa/sherpa-onnx-go-linux v1.5.3-alpha.6 h1:o0+l4Wr3IWkWH+kdt8ZZP55L8mRSpW7h1KzvXcfx9FM= +github.com/k2-fsa/sherpa-onnx-go-linux v1.5.3-alpha.6/go.mod h1:9kU02PSdDdzBApwIDBmE2jWS54WvZbRafOEvW/PVLxE= +github.com/k2-fsa/sherpa-onnx-go-macos v1.5.3-alpha.4 h1:Xri23R3+tQFkNwO6zVxqZbjMRJP40z7JtoSqW6UP6sM= +github.com/k2-fsa/sherpa-onnx-go-macos v1.5.3-alpha.4/go.mod h1:a+AJZKNQkFO+JyzGkHySysYfBzzdcoJI5ITFsnhVcmo= +github.com/k2-fsa/sherpa-onnx-go-windows v1.5.3-alpha.5 h1:OR1LLoptR8W35j5u06KrYUblG35B83HHCPWrVK31uHY= +github.com/k2-fsa/sherpa-onnx-go-windows v1.5.3-alpha.5/go.mod h1:ermMOETZUv0nM7MmnXWqeHREMR6zQXBhJpEP2fbHIZo= +github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= +github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= diff --git a/go-api-examples/real-time-speech-recognition-from-microphone/main.go b/go-api-examples/real-time-speech-recognition-from-microphone/main.go new file mode 100644 index 00000000..b9cbdec2 --- /dev/null +++ b/go-api-examples/real-time-speech-recognition-from-microphone/main.go @@ -0,0 +1,110 @@ +package main + +import ( + "github.com/gordonklaus/portaudio" + sherpa "github.com/k2-fsa/sherpa-onnx-go/sherpa_onnx" + flag "github.com/spf13/pflag" + "strings" + + "fmt" + "log" +) + +func main() { + err := portaudio.Initialize() + if err != nil { + log.Fatalf("Unable to initialize portaudio: %v\n", err) + } + defer portaudio.Terminate() + + default_device, err := portaudio.DefaultInputDevice() + if err != nil { + log.Fatal("Failed to get default input device: %v\n", err) + } + fmt.Printf("Select default input device: %s\n", default_device.Name) + param := portaudio.StreamParameters{} + param.Input.Device = default_device + param.Input.Channels = 1 + param.Input.Latency = default_device.DefaultLowInputLatency + + param.SampleRate = 16000 + param.FramesPerBuffer = 0 + param.Flags = portaudio.ClipOff + + config := sherpa.OnlineRecognizerConfig{} + config.FeatConfig = sherpa.FeatureConfig{SampleRate: 16000, FeatureDim: 80} + + flag.StringVar(&config.ModelConfig.Encoder, "encoder", "", "Path to the encoder model") + flag.StringVar(&config.ModelConfig.Decoder, "decoder", "", "Path to the decoder model") + flag.StringVar(&config.ModelConfig.Joiner, "joiner", "", "Path to the joiner model") + flag.StringVar(&config.ModelConfig.Tokens, "tokens", "", "Path to the tokens file") + flag.IntVar(&config.ModelConfig.NumThreads, "num-threads", 1, "Number of threads for computing") + flag.IntVar(&config.ModelConfig.Debug, "debug", 0, "Whether to show debug message") + flag.StringVar(&config.ModelConfig.ModelType, "model-type", "", "Optional. Used for loading the model in a faster way") + flag.StringVar(&config.ModelConfig.Provider, "provider", "cpu", "Provider to use") + flag.StringVar(&config.DecodingMethod, "decoding-method", "greedy_search", "Decoding method. Possible values: greedy_search, modified_beam_search") + flag.IntVar(&config.MaxActivePaths, "max-active-paths", 4, "Used only when --decoding-method is modified_beam_search") + flag.IntVar(&config.EnableEndpoint, "enable-endpoint", 1, "Whether to enable endpoint") + flag.Float32Var(&config.Rule1MinTrailingSilence, "rule1-min-trailing-silence", 2.4, "Threshold for rule1") + flag.Float32Var(&config.Rule2MinTrailingSilence, "rule2-min-trailing-silence", 1.2, "Threshold for rule2") + flag.Float32Var(&config.Rule3MinUtteranceLength, "rule3-min-utterance-length", 20, "Threshold for rule3") + + flag.Parse() + + log.Println("Initializing recognizer (may take several seconds)") + recognizer := sherpa.NewOnlineRecognizer(&config) + log.Println("Recognizer created!") + defer sherpa.DeleteOnlineRecognizer(recognizer) + + stream := sherpa.NewOnlineStream(recognizer) + + // you can choose another value for 0.1 if you want + samplesPerCall := int32(param.SampleRate * 0.1) // 0.1 second + + samples := make([]float32, samplesPerCall) + s, err := portaudio.OpenStream(param, samples) + if err != nil { + log.Fatalf("Failed to open the stream") + } + defer s.Close() + chk(s.Start()) + + var last_text string + + segment_idx := 0 + + fmt.Println("Started! Please speak") + + for { + chk(s.Read()) + stream.AcceptWaveform(int(param.SampleRate), samples) + + for recognizer.IsReady(stream) { + recognizer.Decode(stream) + } + + text := recognizer.GetResult(stream).Text + if len(text) != 0 && last_text != text { + last_text = strings.ToLower(text) + fmt.Printf("\r%d: %s", segment_idx, last_text) + } + + if recognizer.IsEndpoint(stream) { + if len(text) != 0 { + segment_idx++ + fmt.Println() + } + recognizer.Reset(stream) + } + } + + chk(s.Stop()) + return + +} + +func chk(err error) { + if err != nil { + panic(err) + } +} diff --git a/go-api-examples/real-time-speech-recognition-from-microphone/run.sh b/go-api-examples/real-time-speech-recognition-from-microphone/run.sh new file mode 100755 index 00000000..479e57d8 --- /dev/null +++ b/go-api-examples/real-time-speech-recognition-from-microphone/run.sh @@ -0,0 +1,15 @@ +#!/usr/bin/env bash + +# Please refer to +# https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#pkufool-icefall-asr-zipformer-streaming-wenetspeech-20230615-chinese +# to download the model +# before you run this script. +# +# You can switch to different online models if you need + +./real-time-speech-recognition-from-microphone \ + --encoder ./icefall-asr-zipformer-streaming-wenetspeech-20230615/exp/encoder-epoch-12-avg-4-chunk-16-left-128.onnx \ + --decoder ./icefall-asr-zipformer-streaming-wenetspeech-20230615/exp/decoder-epoch-12-avg-4-chunk-16-left-128.onnx \ + --joiner ./icefall-asr-zipformer-streaming-wenetspeech-20230615/exp/joiner-epoch-12-avg-4-chunk-16-left-128.onnx \ + --tokens ./icefall-asr-zipformer-streaming-wenetspeech-20230615/data/lang_char/tokens.txt \ + --model-type zipformer2 diff --git a/go-api-examples/streaming-decode-files/.gitignore b/go-api-examples/streaming-decode-files/.gitignore new file mode 100644 index 00000000..47197904 --- /dev/null +++ b/go-api-examples/streaming-decode-files/.gitignore @@ -0,0 +1 @@ +streaming-decode-files diff --git a/go-api-examples/streaming-decode-files/go.mod b/go-api-examples/streaming-decode-files/go.mod new file mode 100644 index 00000000..4d7fee17 --- /dev/null +++ b/go-api-examples/streaming-decode-files/go.mod @@ -0,0 +1,17 @@ +module streaming-decode-files + +go 1.20 + +require ( + github.com/k2-fsa/sherpa-onnx-go v1.5.3-alpha.8 + github.com/spf13/pflag v1.0.5 + github.com/youpy/go-wav v0.3.2 +) + +require ( + github.com/k2-fsa/sherpa-onnx-go-linux v1.5.3-alpha.6 // indirect + github.com/k2-fsa/sherpa-onnx-go-macos v1.5.3-alpha.4 // indirect + github.com/k2-fsa/sherpa-onnx-go-windows v1.5.3-alpha.5 // indirect + github.com/youpy/go-riff v0.1.0 // indirect + github.com/zaf/g711 v0.0.0-20190814101024-76a4a538f52b // indirect +) diff --git a/go-api-examples/streaming-decode-files/go.sum b/go-api-examples/streaming-decode-files/go.sum new file mode 100644 index 00000000..d289d57c --- /dev/null +++ b/go-api-examples/streaming-decode-files/go.sum @@ -0,0 +1,33 @@ +github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/google/go-cmp v0.5.6 h1:BKbKCqvP6I+rmFHt06ZmyQtvB8xAkWdhFyr0ZUNZcxQ= +github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/k2-fsa/sherpa-onnx-go v1.5.3-alpha.8 h1:BXc31pWwd7CJLM6m9HavxqiyYJdN3Jc9I26pd4x+JHE= +github.com/k2-fsa/sherpa-onnx-go v1.5.3-alpha.8/go.mod h1:kszL/pwg9XTpRGi1AYW/aSwdhRBqb6LN0SjXR0jnsBo= +github.com/k2-fsa/sherpa-onnx-go-linux v1.5.3-alpha.6 h1:o0+l4Wr3IWkWH+kdt8ZZP55L8mRSpW7h1KzvXcfx9FM= +github.com/k2-fsa/sherpa-onnx-go-linux v1.5.3-alpha.6/go.mod h1:9kU02PSdDdzBApwIDBmE2jWS54WvZbRafOEvW/PVLxE= +github.com/k2-fsa/sherpa-onnx-go-macos v1.5.3-alpha.4 h1:Xri23R3+tQFkNwO6zVxqZbjMRJP40z7JtoSqW6UP6sM= +github.com/k2-fsa/sherpa-onnx-go-macos v1.5.3-alpha.4/go.mod h1:a+AJZKNQkFO+JyzGkHySysYfBzzdcoJI5ITFsnhVcmo= +github.com/k2-fsa/sherpa-onnx-go-windows v1.5.3-alpha.5 h1:OR1LLoptR8W35j5u06KrYUblG35B83HHCPWrVK31uHY= +github.com/k2-fsa/sherpa-onnx-go-windows v1.5.3-alpha.5/go.mod h1:ermMOETZUv0nM7MmnXWqeHREMR6zQXBhJpEP2fbHIZo= +github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= +github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= +github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.6.1 h1:hDPOHmpOpP40lSULcqw7IrRb/u7w6RpDC9399XyoNd0= +github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/youpy/go-riff v0.1.0 h1:vZO/37nI4tIET8tQI0Qn0Y79qQh99aEpponTPiPut7k= +github.com/youpy/go-riff v0.1.0/go.mod h1:83nxdDV4Z9RzrTut9losK7ve4hUnxUR8ASSz4BsKXwQ= +github.com/youpy/go-wav v0.3.2 h1:NLM8L/7yZ0Bntadw/0h95OyUsen+DQIVf9gay+SUsMU= +github.com/youpy/go-wav v0.3.2/go.mod h1:0FCieAXAeSdcxFfwLpRuEo0PFmAoc+8NU34h7TUvk50= +github.com/zaf/g711 v0.0.0-20190814101024-76a4a538f52b h1:QqixIpc5WFIqTLxB3Hq8qs0qImAgBdq0p6rq2Qdl634= +github.com/zaf/g711 v0.0.0-20190814101024-76a4a538f52b/go.mod h1:T2h1zV50R/q0CVYnsQOQ6L7P4a2ZxH47ixWcMXFGyx8= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gotest.tools v2.2.0+incompatible h1:VsBPFP1AI068pPrMxtb/S8Zkgf9xEmTLJjfM+P5UIEo= +gotest.tools v2.2.0+incompatible/go.mod h1:DsYFclhRJ6vuDpmuTbkuFWG+y2sxOXAzmJt81HFBacw= diff --git a/go-api-examples/streaming-decode-files/main.go b/go-api-examples/streaming-decode-files/main.go new file mode 100644 index 00000000..b7430186 --- /dev/null +++ b/go-api-examples/streaming-decode-files/main.go @@ -0,0 +1,118 @@ +package main + +import ( + "bytes" + "encoding/binary" + sherpa "github.com/k2-fsa/sherpa-onnx-go/sherpa_onnx" + flag "github.com/spf13/pflag" + "github.com/youpy/go-wav" + "os" + "strings" + + "log" +) + +func main() { + log.SetFlags(log.LstdFlags | log.Lmicroseconds) + + config := sherpa.OnlineRecognizerConfig{} + config.FeatConfig = sherpa.FeatureConfig{SampleRate: 16000, FeatureDim: 80} + + flag.StringVar(&config.ModelConfig.Encoder, "encoder", "", "Path to the encoder model") + flag.StringVar(&config.ModelConfig.Decoder, "decoder", "", "Path to the decoder model") + flag.StringVar(&config.ModelConfig.Joiner, "joiner", "", "Path to the joiner model") + flag.StringVar(&config.ModelConfig.Tokens, "tokens", "", "Path to the tokens file") + flag.IntVar(&config.ModelConfig.NumThreads, "num-threads", 1, "Number of threads for computing") + flag.IntVar(&config.ModelConfig.Debug, "debug", 0, "Whether to show debug message") + flag.StringVar(&config.ModelConfig.ModelType, "model-type", "", "Optional. Used for loading the model in a faster way") + flag.StringVar(&config.ModelConfig.Provider, "provider", "cpu", "Provider to use") + flag.StringVar(&config.DecodingMethod, "decoding-method", "greedy_search", "Decoding method. Possible values: greedy_search, modified_beam_search") + flag.IntVar(&config.MaxActivePaths, "max-active-paths", 4, "Used only when --decoding-method is modified_beam_search") + + flag.Parse() + + if len(flag.Args()) != 1 { + log.Fatalf("Please provide one wave file") + } + + log.Println("Reading", flag.Arg(0)) + + samples, sampleRate := readWave(flag.Arg(0)) + + log.Println("Initializing recognizer (may take several seconds)") + recognizer := sherpa.NewOnlineRecognizer(&config) + log.Println("Recognizer created!") + defer sherpa.DeleteOnlineRecognizer(recognizer) + + log.Println("Start decoding!") + stream := sherpa.NewOnlineStream(recognizer) + defer sherpa.DeleteOnlineStream(stream) + + stream.AcceptWaveform(sampleRate, samples) + + tailPadding := make([]float32, int(float32(sampleRate)*0.3)) + stream.AcceptWaveform(sampleRate, tailPadding) + + for recognizer.IsReady(stream) { + recognizer.Decode(stream) + } + log.Println("Decoding done!") + result := recognizer.GetResult(stream) + log.Println(strings.ToLower(result.Text)) + log.Printf("Wave duration: %v seconds", float32(len(samples))/float32(sampleRate)) +} + +func readWave(filename string) (samples []float32, sampleRate int) { + file, _ := os.Open(filename) + defer file.Close() + + reader := wav.NewReader(file) + format, err := reader.Format() + if err != nil { + log.Fatalf("Failed to read wave format") + } + + if format.AudioFormat != 1 { + log.Fatalf("Support only PCM format. Given: %v\n", format.AudioFormat) + } + + if format.NumChannels != 1 { + log.Fatalf("Support only 1 channel wave file. Given: %v\n", format.NumChannels) + } + + if format.BitsPerSample != 16 { + log.Fatalf("Support only 16-bit per sample. Given: %v\n", format.BitsPerSample) + } + + reader.Duration() // so that it initializes reader.Size + + buf := make([]byte, reader.Size) + n, err := reader.Read(buf) + if n != int(reader.Size) { + log.Fatalf("Failed to read %v bytes. Returned %v bytes\n", reader.Size, n) + } + + samples = samplesInt16ToFloat(buf) + sampleRate = int(format.SampleRate) + + return +} + +func samplesInt16ToFloat(inSamples []byte) []float32 { + numSamples := len(inSamples) / 2 + outSamples := make([]float32, numSamples) + + for i := 0; i != numSamples; i++ { + s := inSamples[i*2 : (i+1)*2] + + var s16 int16 + buf := bytes.NewReader(s) + err := binary.Read(buf, binary.LittleEndian, &s16) + if err != nil { + log.Fatal("Failed to parse 16-bit sample") + } + outSamples[i] = float32(s16) / 32768 + } + + return outSamples +} diff --git a/go-api-examples/streaming-decode-files/run.sh b/go-api-examples/streaming-decode-files/run.sh new file mode 100755 index 00000000..461aff1c --- /dev/null +++ b/go-api-examples/streaming-decode-files/run.sh @@ -0,0 +1,17 @@ +#!/usr/bin/env bash + +# Please refer to +# https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#csukuangfj-sherpa-onnx-streaming-zipformer-en-2023-06-26-english +# to download the model +# before you run this script. +# +# You can switch to a different online model if you need + +./streaming-decode-files \ + --encoder ./sherpa-onnx-streaming-zipformer-en-2023-06-26/encoder-epoch-99-avg-1-chunk-16-left-128.onnx \ + --decoder ./sherpa-onnx-streaming-zipformer-en-2023-06-26/decoder-epoch-99-avg-1-chunk-16-left-128.onnx \ + --joiner ./sherpa-onnx-streaming-zipformer-en-2023-06-26/joiner-epoch-99-avg-1-chunk-16-left-128.onnx \ + --tokens ./sherpa-onnx-streaming-zipformer-en-2023-06-26/tokens.txt \ + --model-type zipformer2 \ + --debug 0 \ + ./sherpa-onnx-streaming-zipformer-en-2023-06-26/test_wavs/0.wav diff --git a/scripts/go/release.sh b/scripts/go/release.sh new file mode 100755 index 00000000..e3ea7efc --- /dev/null +++ b/scripts/go/release.sh @@ -0,0 +1,146 @@ +#!/usr/bin/env bash + +set -ex + +git config --global user.email "csukuangfj@gmail.com" +git config --global user.name "Fangjun Kuang" + +SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2) + +echo "=========================================================================" + +git clone git@github.com:k2-fsa/sherpa-onnx-go-linux.git + +echo "Copy libs for Linux x86_64" + +rm -rf sherpa-onnx-go-linux/lib/x86_64-unknown-linux-gnu/lib* + +cp -v ./linux/sherpa_onnx/lib/libkaldi-native-fbank-core.so sherpa-onnx-go-linux/lib/x86_64-unknown-linux-gnu/ +cp -v ./linux/sherpa_onnx/lib/libonnxruntime* sherpa-onnx-go-linux/lib/x86_64-unknown-linux-gnu/ +cp -v ./linux/sherpa_onnx/lib/libsherpa-onnx-c-api.so sherpa-onnx-go-linux/lib/x86_64-unknown-linux-gnu/ +cp -v ./linux/sherpa_onnx/lib/libsherpa-onnx-core.so sherpa-onnx-go-linux/lib/x86_64-unknown-linux-gnu/ + +echo "Copy sources for Linux x86_64" +cp sherpa-onnx/c-api/c-api.h sherpa-onnx-go-linux/ +cp scripts/go/sherpa_onnx.go sherpa-onnx-go-linux/ + +pushd sherpa-onnx-go-linux +tag=$(git describe --abbrev=0 --tags) +if [[ x"$VERSION" == x"auto" ]]; then + # this is a pre-release + if [[ $tag == ${SHERPA_ONNX_VERSION}* ]]; then + # echo we have already release pre-release before, so just increment it + last=$(echo $tag | rev | cut -d'.' -f 1 | rev) + new_last=$((last+1)) + new_tag=${SHERPA_ONNX_VERSION}-alpha.${new_last} + else + new_tag=${SHERPA_ONNX_VERSION}-alpha.1 + fi +else + new_tag=$VERSION +fi + +echo "new_tag: $new_tag" +git add . +git status +git commit -m "Release $new_tag" && \ +git tag $new_tag && \ +git push origin $new_tag || true + +popd +echo "=========================================================================" + +git clone git@github.com:k2-fsa/sherpa-onnx-go-macos.git + +echo "Copy libs for macOS x86_64" +rm -rf sherpa-onnx-go-macos/lib/x86_64-apple-darwin/lib* +cp -v ./macos-x86_64/libkaldi-native-fbank-core.dylib sherpa-onnx-go-macos/lib/x86_64-apple-darwin +cp -v ./macos-x86_64/libonnxruntime* sherpa-onnx-go-macos/lib/x86_64-apple-darwin +cp -v ./macos-x86_64/libsherpa-onnx-c-api.dylib sherpa-onnx-go-macos/lib/x86_64-apple-darwin +cp -v ./macos-x86_64/libsherpa-onnx-core.dylib sherpa-onnx-go-macos/lib/x86_64-apple-darwin + +echo "Copy libs for macOS arm64" +rm -rf sherpa-onnx-go-macos/lib/aarch64-apple-darwin/lib* +cp -v ./macos-arm64/libkaldi-native-fbank-core.dylib sherpa-onnx-go-macos/lib/aarch64-apple-darwin +cp -v ./macos-arm64/libonnxruntime* sherpa-onnx-go-macos/lib/aarch64-apple-darwin +cp -v ./macos-arm64/libsherpa-onnx-c-api.dylib sherpa-onnx-go-macos/lib/aarch64-apple-darwin +cp -v ./macos-arm64/libsherpa-onnx-core.dylib sherpa-onnx-go-macos/lib/aarch64-apple-darwin + +echo "Copy sources for macOS" +cp sherpa-onnx/c-api/c-api.h sherpa-onnx-go-macos/ +cp scripts/go/sherpa_onnx.go sherpa-onnx-go-macos/ + +pushd sherpa-onnx-go-macos +tag=$(git describe --abbrev=0 --tags) +if [[ x"$VERSION" == x"auto" ]]; then + # this is a pre-release + if [[ $tag == ${SHERPA_ONNX_VERSION}* ]]; then + # echo we have already release pre-release before, so just increment it + last=$(echo $tag | rev | cut -d'.' -f 1 | rev) + new_last=$((last+1)) + new_tag=${SHERPA_ONNX_VERSION}-alpha.${new_last} + else + new_tag=${SHERPA_ONNX_VERSION}-alpha.1 + fi +else + new_tag=$VERSION +fi + +echo "new_tag: $new_tag" +git add . +git status +git commit -m "Release $new_tag" && \ +git tag $new_tag && \ +git push origin $new_tag || true + +popd +echo "=========================================================================" + +git clone git@github.com:k2-fsa/sherpa-onnx-go-windows.git +echo "Copy libs for Windows x86_64" +rm -fv sherpa-onnx-go-windows/lib/x86_64-pc-windows-gnu/* +cp -v ./windows-x64/kaldi-native-fbank-core.dll sherpa-onnx-go-windows/lib/x86_64-pc-windows-gnu +cp -v ./windows-x64/onnxruntime.dll sherpa-onnx-go-windows/lib/x86_64-pc-windows-gnu +cp -v ./windows-x64/sherpa-onnx-c-api.dll sherpa-onnx-go-windows/lib/x86_64-pc-windows-gnu +cp -v ./windows-x64/sherpa-onnx-core.dll sherpa-onnx-go-windows/lib/x86_64-pc-windows-gnu + +echo "Copy libs for Windows x86" +rm -fv sherpa-onnx-go-windows/lib/i686-pc-windows-gnu/* +cp -v ./windows-win32/kaldi-native-fbank-core.dll sherpa-onnx-go-windows/lib/i686-pc-windows-gnu +cp -v ./windows-win32/onnxruntime.dll sherpa-onnx-go-windows/lib/i686-pc-windows-gnu +cp -v ./windows-win32/sherpa-onnx-c-api.dll sherpa-onnx-go-windows/lib/i686-pc-windows-gnu +cp -v ./windows-win32/sherpa-onnx-core.dll sherpa-onnx-go-windows/lib/i686-pc-windows-gnu + +echo "Copy sources for Windows" +cp sherpa-onnx/c-api/c-api.h sherpa-onnx-go-windows/ +cp scripts/go/sherpa_onnx.go sherpa-onnx-go-windows/ + +pushd sherpa-onnx-go-windows +tag=$(git describe --abbrev=0 --tags) +if [[ x"$VERSION" == x"auto" ]]; then + # this is a pre-release + if [[ $tag == ${SHERPA_ONNX_VERSION}* ]]; then + # echo we have already release pre-release before, so just increment it + last=$(echo $tag | rev | cut -d'.' -f 1 | rev) + new_last=$((last+1)) + new_tag=${SHERPA_ONNX_VERSION}-alpha.${new_last} + else + new_tag=${SHERPA_ONNX_VERSION}-alpha.1 + fi +else + new_tag=$VERSION +fi + +echo "new_tag: $new_tag" +git add . +git status +git commit -m "Release $new_tag" && \ +git tag $new_tag && \ +git push origin $new_tag || true + +popd + +echo "=========================================================================" + + +rm -fv ~/.ssh/github diff --git a/scripts/go/sherpa_onnx.go b/scripts/go/sherpa_onnx.go new file mode 100644 index 00000000..9320af5d --- /dev/null +++ b/scripts/go/sherpa_onnx.go @@ -0,0 +1,443 @@ +/* +Speech recognition with [Next-gen Kaldi]. + +[sherpa-onnx] is an open-source speech recognition framework for [Next-gen Kaldi]. +It depends only on [onnxruntime], supporting both streaming and non-streaming +speech recognition. + +It does not need to access the network during recognition and everything +runs locally. + +It supports a variety of platforms, such as Linux (x86_64, aarch64, arm), +Windows (x86_64, x86), macOS (x86_64, arm64), etc. + +Usage examples: + + 1. Real-time speech recognition from a microphone + + Please see + https://github.com/k2-fsa/sherpa-onnx/tree/master/go-api-examples/real-time-speech-recognition-from-microphone + + 2. Decode files using a non-streaming model + + Please see + https://github.com/k2-fsa/sherpa-onnx/tree/master/go-api-examples/non-streaming-decode-files + + 3. Decode files using a streaming model + + Please see + https://github.com/k2-fsa/sherpa-onnx/tree/master/go-api-examples/streaming-decode-files + +[sherpa-onnx]: https://github.com/k2-fsa/sherpa-onnx +[onnxruntime]: https://github.com/microsoft/onnxruntime +[Next-gen Kaldi]: https://github.com/k2-fsa/ +*/ +package sherpa_onnx + +// #include +// #include "c-api.h" +import "C" +import "unsafe" + +// Configuration for online/streaming transducer models +// +// Please refer to +// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/index.html +// to download pre-trained models +type OnlineTransducerModelConfig struct { + Encoder string // Path to the encoder model, e.g., encoder.onnx or encoder.int8.onnx + Decoder string // Path to the decoder model. + Joiner string // Path to the joiner model. + Tokens string // Path to tokens.txt + NumThreads int // Number of threads to use for neural network computation + Provider string // Optional. Valid values are: cpu, cuda, coreml + Debug int // 1 to show model meta information while loading it. + ModelType string // Optional. You can specify it for faster model initialization +} + +// Configuration for the feature extractor +type FeatureConfig struct { + // Sample rate expected by the model. It is 16000 for all + // pre-trained models provided by us + SampleRate int + // Feature dimension expected by the model. It is 80 for all + // pre-trained models provided by us + FeatureDim int +} + +// Configuration for the online/streaming recognizer. +type OnlineRecognizerConfig struct { + FeatConfig FeatureConfig + ModelConfig OnlineTransducerModelConfig + + // Valid decoding methods: greedy_search, modified_beam_search + DecodingMethod string + + // Used only when DecodingMethod is modified_beam_search. It specifies + // the maximum number of paths to keep during the search + MaxActivePaths int + + EnableEndpoint int // 1 to enable endpoint detection. + + // Please see + // https://k2-fsa.github.io/sherpa/ncnn/endpoint.html + // for the meaning of Rule1MinTrailingSilence, Rule2MinTrailingSilence + // and Rule3MinUtteranceLength. + Rule1MinTrailingSilence float32 + Rule2MinTrailingSilence float32 + Rule3MinUtteranceLength float32 +} + +// It contains the recognition result for a online stream. +type OnlineRecognizerResult struct { + Text string +} + +// The online recognizer class. It wraps a pointer from C. +type OnlineRecognizer struct { + impl *C.struct_SherpaOnnxOnlineRecognizer +} + +// The online stream class. It wraps a pointer from C. +type OnlineStream struct { + impl *C.struct_SherpaOnnxOnlineStream +} + +// Free the internal pointer inside the recognizer to avoid memory leak. +func DeleteOnlineRecognizer(recognizer *OnlineRecognizer) { + C.DestroyOnlineRecognizer(recognizer.impl) + recognizer.impl = nil +} + +// The user is responsible to invoke [DeleteOnlineRecognizer]() to free +// the returned recognizer to avoid memory leak +func NewOnlineRecognizer(config *OnlineRecognizerConfig) *OnlineRecognizer { + c := C.struct_SherpaOnnxOnlineRecognizerConfig{} + c.feat_config.sample_rate = C.int(config.FeatConfig.SampleRate) + c.feat_config.feature_dim = C.int(config.FeatConfig.FeatureDim) + + c.model_config.encoder = C.CString(config.ModelConfig.Encoder) + defer C.free(unsafe.Pointer(c.model_config.encoder)) + + c.model_config.decoder = C.CString(config.ModelConfig.Decoder) + defer C.free(unsafe.Pointer(c.model_config.decoder)) + + c.model_config.joiner = C.CString(config.ModelConfig.Joiner) + defer C.free(unsafe.Pointer(c.model_config.joiner)) + + c.model_config.tokens = C.CString(config.ModelConfig.Tokens) + defer C.free(unsafe.Pointer(c.model_config.tokens)) + + c.model_config.num_threads = C.int(config.ModelConfig.NumThreads) + + c.model_config.provider = C.CString(config.ModelConfig.Provider) + defer C.free(unsafe.Pointer(c.model_config.provider)) + + c.model_config.debug = C.int(config.ModelConfig.Debug) + + c.model_config.model_type = C.CString(config.ModelConfig.ModelType) + defer C.free(unsafe.Pointer(c.model_config.model_type)) + + c.decoding_method = C.CString(config.DecodingMethod) + defer C.free(unsafe.Pointer(c.decoding_method)) + + c.max_active_paths = C.int(config.MaxActivePaths) + c.enable_endpoint = C.int(config.EnableEndpoint) + c.rule1_min_trailing_silence = C.float(config.Rule1MinTrailingSilence) + c.rule2_min_trailing_silence = C.float(config.Rule2MinTrailingSilence) + c.rule3_min_utterance_length = C.float(config.Rule3MinUtteranceLength) + + recognizer := &OnlineRecognizer{} + recognizer.impl = C.CreateOnlineRecognizer(&c) + + return recognizer +} + +// Delete the internal pointer inside the stream to avoid memory leak. +func DeleteOnlineStream(stream *OnlineStream) { + C.DestroyOnlineStream(stream.impl) + stream.impl = nil +} + +// The user is responsible to invoke [DeleteOnlineStream]() to free +// the returned stream to avoid memory leak +func NewOnlineStream(recognizer *OnlineRecognizer) *OnlineStream { + stream := &OnlineStream{} + stream.impl = C.CreateOnlineStream(recognizer.impl) + return stream +} + +// Input audio samples for the stream. +// +// sampleRate is the actual sample rate of the input audio samples. If it +// is different from the sample rate expected by the feature extractor, we will +// do resampling inside. +// +// samples contains audio samples. Each sample is in the range [-1, 1] +func (s *OnlineStream) AcceptWaveform(sampleRate int, samples []float32) { + C.AcceptWaveform(s.impl, C.int(sampleRate), (*C.float)(&samples[0]), C.int(len(samples))) +} + +// Signal that there will be no incoming audio samples. +// After calling this function, you cannot call [OnlineStream.AcceptWaveform] any longer. +// +// The main purpose of this function is to flush the remaining audio samples +// buffered inside for feature extraction. +func (s *OnlineStream) InputFinished() { + C.InputFinished(s.impl) +} + +// Check whether the stream has enough feature frames for decoding. +// Return true if this stream is ready for decoding. Return false otherwise. +// +// You will usually use it like below: +// +// for recognizer.IsReady(s) { +// recognizer.Decode(s) +// } +func (recognizer *OnlineRecognizer) IsReady(s *OnlineStream) bool { + return C.IsOnlineStreamReady(recognizer.impl, s.impl) == 1 +} + +// Return true if an endpoint is detected. +// +// You usually use it like below: +// +// if recognizer.IsEndpoint(s) { +// // do your own stuff after detecting an endpoint +// +// recognizer.Reset(s) +// } +func (recognizer *OnlineRecognizer) IsEndpoint(s *OnlineStream) bool { + return C.IsEndpoint(recognizer.impl, s.impl) == 1 +} + +// After calling this function, the internal neural network model states +// are reset and IsEndpoint(s) would return false. GetResult(s) would also +// return an empty string. +func (recognizer *OnlineRecognizer) Reset(s *OnlineStream) { + C.Reset(recognizer.impl, s.impl) +} + +// Decode the stream. Before calling this function, you have to ensure +// that recognizer.IsReady(s) returns true. Otherwise, you will be SAD. +// +// You usually use it like below: +// +// for recognizer.IsReady(s) { +// recognizer.Decode(s) +// } +func (recognizer *OnlineRecognizer) Decode(s *OnlineStream) { + C.DecodeOnlineStream(recognizer.impl, s.impl) +} + +// Decode multiple streams in parallel, i.e., in batch. +// You have to ensure that each stream is ready for decoding. Otherwise, +// you will be SAD. +func (recognizer *OnlineRecognizer) DecodeStreams(s []*OnlineStream) { + ss := make([]*C.struct_SherpaOnnxOnlineStream, len(s)) + for i, v := range s { + ss[i] = v.impl + } + + C.DecodeMultipleOnlineStreams(recognizer.impl, &ss[0], C.int(len(s))) +} + +// Get the current result of stream since the last invoke of Reset() +func (recognizer *OnlineRecognizer) GetResult(s *OnlineStream) *OnlineRecognizerResult { + p := C.GetOnlineStreamResult(recognizer.impl, s.impl) + defer C.DestroyOnlineRecognizerResult(p) + result := &OnlineRecognizerResult{} + result.Text = C.GoString(p.text) + + return result +} + +// Configuration for offline/non-streaming transducer. +// +// Please refer to +// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-transducer/index.html +// to download pre-trained models +type OfflineTransducerModelConfig struct { + Encoder string // Path to the encoder model, i.e., encoder.onnx or encoder.int8.onnx + Decoder string // Path to the decoder model + Joiner string // Path to the joiner model +} + +// Configuration for offline/non-streaming paraformer. +// +// please refer to +// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-paraformer/index.html +// to download pre-trained models +type OfflineParaformerModelConfig struct { + Model string // Path to the model, e.g., model.onnx or model.int8.onnx +} + +// Configuration for offline/non-streaming NeMo CTC models. +// +// Please refer to +// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-ctc/index.html +// to download pre-trained models +type OfflineNemoEncDecCtcModelConfig struct { + Model string // Path to the model, e.g., model.onnx or model.int8.onnx +} + +// Configuration for offline LM. +type OfflineLMConfig struct { + Model string // Path to the model + Scale float32 // scale for LM score +} + +type OfflineModelConfig struct { + Transducer OfflineTransducerModelConfig + Paraformer OfflineParaformerModelConfig + NemoCTC OfflineNemoEncDecCtcModelConfig + Tokens string // Path to tokens.txt + + // Number of threads to use for neural network computation + NumThreads int + + // 1 to print model meta information while loading + Debug int + + // Optional. Valid values: cpu, cuda, coreml + Provider string + + // Optional. Specify it for faster model initialization. + ModelType string +} + +// Configuration for the offline/non-streaming recognizer. +type OfflineRecognizerConfig struct { + FeatConfig FeatureConfig + ModelConfig OfflineModelConfig + LmConfig OfflineLMConfig + + // Valid decoding method: greedy_search, modified_beam_search + DecodingMethod string + + // Used only when DecodingMethod is modified_beam_search. + MaxActivePaths int +} + +// It wraps a pointer from C +type OfflineRecognizer struct { + impl *C.struct_SherpaOnnxOfflineRecognizer +} + +// It wraps a pointer from C +type OfflineStream struct { + impl *C.struct_SherpaOnnxOfflineStream +} + +// It contains recognition result of an offline stream. +type OfflineRecognizerResult struct { + Text string +} + +// Frees the internal pointer of the recognition to avoid memory leak. +func DeleteOfflineRecognizer(recognizer *OfflineRecognizer) { + C.DestroyOfflineRecognizer(recognizer.impl) + recognizer.impl = nil +} + +// The user is responsible to invoke [DeleteOfflineRecognizer]() to free +// the returned recognizer to avoid memory leak +func NewOfflineRecognizer(config *OfflineRecognizerConfig) *OfflineRecognizer { + c := C.struct_SherpaOnnxOfflineRecognizerConfig{} + c.feat_config.sample_rate = C.int(config.FeatConfig.SampleRate) + c.feat_config.feature_dim = C.int(config.FeatConfig.FeatureDim) + + c.model_config.transducer.encoder = C.CString(config.ModelConfig.Transducer.Encoder) + defer C.free(unsafe.Pointer(c.model_config.transducer.encoder)) + + c.model_config.transducer.decoder = C.CString(config.ModelConfig.Transducer.Decoder) + defer C.free(unsafe.Pointer(c.model_config.transducer.decoder)) + + c.model_config.transducer.joiner = C.CString(config.ModelConfig.Transducer.Joiner) + defer C.free(unsafe.Pointer(c.model_config.transducer.joiner)) + + c.model_config.paraformer.model = C.CString(config.ModelConfig.Paraformer.Model) + defer C.free(unsafe.Pointer(c.model_config.paraformer.model)) + + c.model_config.nemo_ctc.model = C.CString(config.ModelConfig.NemoCTC.Model) + defer C.free(unsafe.Pointer(c.model_config.nemo_ctc.model)) + + c.model_config.tokens = C.CString(config.ModelConfig.Tokens) + defer C.free(unsafe.Pointer(c.model_config.tokens)) + + c.model_config.num_threads = C.int(config.ModelConfig.NumThreads) + + c.model_config.debug = C.int(config.ModelConfig.Debug) + + c.model_config.provider = C.CString(config.ModelConfig.Provider) + defer C.free(unsafe.Pointer(c.model_config.provider)) + + c.model_config.model_type = C.CString(config.ModelConfig.ModelType) + defer C.free(unsafe.Pointer(c.model_config.model_type)) + + c.lm_config.model = C.CString(config.LmConfig.Model) + defer C.free(unsafe.Pointer(c.lm_config.model)) + + c.lm_config.scale = C.float(config.LmConfig.Scale) + + c.decoding_method = C.CString(config.DecodingMethod) + defer C.free(unsafe.Pointer(c.decoding_method)) + + c.max_active_paths = C.int(config.MaxActivePaths) + + recognizer := &OfflineRecognizer{} + recognizer.impl = C.CreateOfflineRecognizer(&c) + + return recognizer +} + +// Frees the internal pointer of the stream to avoid memory leak. +func DeleteOfflineStream(stream *OfflineStream) { + C.DestroyOfflineStream(stream.impl) + stream.impl = nil +} + +// The user is responsible to invoke [DeleteOfflineStream]() to free +// the returned stream to avoid memory leak +func NewOfflineStream(recognizer *OfflineRecognizer) *OfflineStream { + stream := &OfflineStream{} + stream.impl = C.CreateOfflineStream(recognizer.impl) + return stream +} + +// Input audio samples for the offline stream. +// Please only call it once. That is, input all samples at once. +// +// sampleRate is the sample rate of the input audio samples. If it is different +// from the value expected by the feature extractor, we will do resampling inside. +// +// samples contains the actual audio samples. Each sample is in the range [-1, 1]. +func (s *OfflineStream) AcceptWaveform(sampleRate int, samples []float32) { + C.AcceptWaveformOffline(s.impl, C.int(sampleRate), (*C.float)(&samples[0]), C.int(len(samples))) +} + +// Decode the offline stream. +func (recognizer *OfflineRecognizer) Decode(s *OfflineStream) { + C.DecodeOfflineStream(recognizer.impl, s.impl) +} + +// Decode multiple streams in parallel, i.e., in batch. +func (recognizer *OfflineRecognizer) DecodeStreams(s []*OfflineStream) { + ss := make([]*C.struct_SherpaOnnxOfflineStream, len(s)) + for i, v := range s { + ss[i] = v.impl + } + + C.DecodeMultipleOfflineStreams(recognizer.impl, &ss[0], C.int(len(s))) +} + +// Get the recognition result of the offline stream. +func (s *OfflineStream) GetResult() *OfflineRecognizerResult { + p := C.GetOfflineStreamResult(s.impl) + defer C.DestroyOfflineRecognizerResult(p) + result := &OfflineRecognizerResult{} + result.Text = C.GoString(p.text) + + return result +} diff --git a/scripts/go/ssh_config b/scripts/go/ssh_config new file mode 100644 index 00000000..7dd48fe2 --- /dev/null +++ b/scripts/go/ssh_config @@ -0,0 +1,5 @@ +Host github.com + Hostname github.com + User git + IdentityFile ~/.ssh/github + StrictHostKeyChecking no