add streaming-server with web client (#164)

* add streaming-server with web client

* small fixes
This commit is contained in:
Fangjun Kuang
2023-05-30 22:46:52 +08:00
committed by GitHub
parent d7114da441
commit 5e2dc5ceea
26 changed files with 2228 additions and 1 deletions

3
python-api-examples/web/.gitignore vendored Normal file
View File

@@ -0,0 +1,3 @@
*.pem
*.key
*.crt

View File

@@ -0,0 +1,34 @@
# How to use
```bash
git clone https://github.com/k2-fsa/sherpa
cd sherpa/sherpa/bin/web
python3 -m http.server 6009
```
and then go to <http://localhost:6009>
You will see a page like the following screenshot:
![Screenshot if you visit http://localhost:6009](./pic/web-ui.png)
If your server is listening at the port *6006* with address **localhost**,
then you can either click **Upload**, **Streaming_Record** or **Offline_Record** to play with it.
## File descriptions
### ./css/bootstrap.min.css
It is downloaded from https://cdn.jsdelivr.net/npm/bootstrap@4.3.1/dist/css/bootstrap.min.css
### ./js/jquery-3.6.0.min.js
It is downloaded from https://code.jquery.com/jquery-3.6.0.min.js
### ./js/popper.min.js
It is downloaded from https://cdn.jsdelivr.net/npm/popper.js@1.14.7/dist/umd/popper.min.js
### ./js/bootstrap.min.js
It is download from https://cdn.jsdelivr.net/npm/bootstrap@4.3.1/dist/js/bootstrap.min.js

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,89 @@
#!/usr/bin/env python3
"""
pip install pyopenssl
"""
from OpenSSL import crypto
# The code in this file is modified from
# https://stackoverflow.com/questions/27164354/create-a-self-signed-x509-certificate-in-python
"""
This script generates 3 files:
- private.key
- selfsigned.crt
- cert.pem
You need cert.pem when you start a https server
or a secure websocket server.
Note: You need to change serialNumber if you want to generate
a new certificate as two different certificates cannot share
the same serial number if they are issued by the same organization.
Otherwise, you may get the following error from within you browser:
An error occurred during a connection to 127.0.0.1:6007. You have received an
invalid certificate. Please contact the server administrator or email
correspondent and give them the following information: Your certificate
contains the same serial number as another certificate issued by the
certificate authority. Please get a new certificate containing a unique
serial number. Error code: SEC_ERROR_REUSED_ISSUER_AND_SERIAL
"""
def cert_gen(
emailAddress="https://github.com/k2-fsa/k2",
commonName="sherpa",
countryName="CN",
localityName="k2-fsa",
stateOrProvinceName="k2-fsa",
organizationName="k2-fsa",
organizationUnitName="k2-fsa",
serialNumber=3,
validityStartInSeconds=0,
validityEndInSeconds=10 * 365 * 24 * 60 * 60,
KEY_FILE="private.key",
CERT_FILE="selfsigned.crt",
ALL_IN_ONE_FILE="cert.pem",
):
# can look at generated file using openssl:
# openssl x509 -inform pem -in selfsigned.crt -noout -text
# create a key pair
k = crypto.PKey()
k.generate_key(crypto.TYPE_RSA, 4096)
# create a self-signed cert
cert = crypto.X509()
cert.get_subject().C = countryName
cert.get_subject().ST = stateOrProvinceName
cert.get_subject().L = localityName
cert.get_subject().O = organizationName # noqa
cert.get_subject().OU = organizationUnitName
cert.get_subject().CN = commonName
cert.get_subject().emailAddress = emailAddress
cert.set_serial_number(serialNumber)
cert.gmtime_adj_notBefore(0)
cert.gmtime_adj_notAfter(validityEndInSeconds)
cert.set_issuer(cert.get_subject())
cert.set_pubkey(k)
cert.sign(k, "sha512")
with open(CERT_FILE, "wt") as f:
f.write(
crypto.dump_certificate(crypto.FILETYPE_PEM, cert).decode("utf-8")
)
with open(KEY_FILE, "wt") as f:
f.write(crypto.dump_privatekey(crypto.FILETYPE_PEM, k).decode("utf-8"))
with open(ALL_IN_ONE_FILE, "wt") as f:
f.write(crypto.dump_privatekey(crypto.FILETYPE_PEM, k).decode("utf-8"))
f.write(
crypto.dump_certificate(crypto.FILETYPE_PEM, cert).decode("utf-8")
)
print(f"Generated {CERT_FILE}")
print(f"Generated {KEY_FILE}")
print(f"Generated {ALL_IN_ONE_FILE}")
cert_gen()

View File

@@ -0,0 +1,71 @@
<!doctype html>
<html lang="en">
<head>
<!-- Required meta tags -->
<meta charset="utf-8"></meta>
<meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"></meta>
<!-- Bootstrap CSS -->
<link rel="stylesheet"
href="./css/bootstrap.min.css"
integrity="sha384-ggOyR0iXCbMQv3Xipma34MD+dH/1fQ784/j6cY/iJTQUOhcWr7x9JvoRxT2MZw1T"
crossorigin="anonymous">
</link>
<link rel="icon"
type="image/png"
href="./k2-logo.png">
<script src="./js/jquery-3.6.0.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script>
<title>Next-gen Kaldi demo</title>
</head>
<body>
<div id="nav"></div>
<script>
$(function(){
$("#nav").load("nav-partial.html");
});
</script>
<ul class="list-unstyled">
<li class="media">
<div class="media-body">
<h5 class="mt-0 mb-1">Upload</h5>
<p>Recognition from a selected file</p>
</div>
<li>
<li class="media">
<div class="media-body">
<h5 class="mt-0 mb-1">Streaming_Record</h5>
<p>Recognition from real-time recordings</p>
</div>
</li>
<li class="media">
<div class="media-body">
<h5 class="mt-0 mb-1">Offline_Record</h5>
<p>Recognition from offline recordings</p>
</div>
</li>
</ul>
Code is available at
<a href="https://github.com/k2-fsa/sherpa"> https://github.com/k2-fsa/sherpa</a>
<!-- Optional JavaScript -->
<!-- jQuery first, then Popper.js, then Bootstrap JS -->
<script src="./js/popper.min.js"
integrity="sha384-UO2eT0CpHqdSJQ6hJty5KVphtPhzWj9WO1clHTMGa3JDZwrnQq4sF86dIHNDz0W1"
crossorigin="anonymous">
</script>
<script src="./js/bootstrap.min.js"
integrity="sha384-JjSmVgyd0p3pXB1rRibZUAYoIIy6OrQ6VrjIEaFf/nJGzIxFDsf4x0xIM+B07jRM"
crossorigin="anonymous">
</script>
</body>
</html>

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,395 @@
// This file copies and modifies code
// from https://mdn.github.io/web-dictaphone/scripts/app.js
// and https://gist.github.com/meziantou/edb7217fddfbb70e899e
var socket;
const serverIpInput = document.getElementById('server-ip');
const serverPortInput = document.getElementById('server-port');
const connectBtn = document.getElementById('connect');
const uploadBtn = document.getElementById('file');
function initWebSocket() {
let protocol = 'ws://';
if (window.location.protocol == 'https:') {
protocol = 'wss://'
}
let server_ip = serverIpInput.value;
let server_port = serverPortInput.value;
console.log('protocol: ', protocol);
console.log('server_ip: ', server_ip);
console.log('server_port: ', server_port);
let uri = protocol + server_ip + ':' + server_port;
console.log('uri', uri);
socket = new WebSocket(uri);
// Connection opened
socket.addEventListener('open', function(event) {
console.log('connected');
recordBtn.disabled = false;
connectBtn.disabled = true;
connectBtn.innerHTML = 'Connected!';
});
// Connection closed
socket.addEventListener('close', function(event) {
console.log('disconnected');
recordBtn.disabled = true;
stopBtn.disabled = true;
connectBtn.disabled = false;
connectBtn.innerHTML = 'Click me to connect!';
});
// Listen for messages
socket.addEventListener('message', function(event) {
console.log('Received message: ', event.data);
document.getElementById('results').value = event.data;
socket.send('Done');
console.log('Sent Done');
socket.close();
});
}
const recordBtn = document.getElementById('offline_record');
const stopBtn = document.getElementById('offline_stop');
const clearBtn = document.getElementById('clear');
const soundClips = document.getElementById('sound-clips');
const canvas = document.getElementById('canvas');
const mainSection = document.querySelector('.container');
stopBtn.disabled = true;
window.onload = (event) => {
console.log('page is fully loaded');
console.log('protocol', window.location.protocol);
console.log('port', window.location.port);
if (window.location.protocol == 'https:') {
document.getElementById('ws-protocol').textContent = 'wss://';
}
serverIpInput.value = window.location.hostname;
serverPortInput.value = window.location.port;
};
connectBtn.onclick = function() {
initWebSocket();
};
let audioCtx;
const canvasCtx = canvas.getContext('2d');
let mediaStream;
let analyser;
let expectedSampleRate = 16000;
let recordSampleRate; // the sampleRate of the microphone
let recorder = null; // the microphone
let leftchannel = []; // TODO: Use a single channel
let recordingLength = 0; // number of samples so far
clearBtn.onclick = function() {
document.getElementById('results').value = '';
};
function send_header(n) {
const header = new ArrayBuffer(4);
new DataView(header).setInt32(0, n, true /* littleEndian */);
socket.send(new Int32Array(header, 0, 1));
}
// copied/modified from https://mdn.github.io/web-dictaphone/
// and
// https://gist.github.com/meziantou/edb7217fddfbb70e899e
if (navigator.mediaDevices.getUserMedia) {
console.log('getUserMedia supported.');
// see https://w3c.github.io/mediacapture-main/#dom-mediadevices-getusermedia
const constraints = {audio: true};
let onSuccess = function(stream) {
if (!audioCtx) {
audioCtx = new AudioContext();
}
console.log(audioCtx);
recordSampleRate = audioCtx.sampleRate;
console.log('sample rate ' + recordSampleRate);
// creates an audio node from the microphone incoming stream
mediaStream = audioCtx.createMediaStreamSource(stream);
console.log(mediaStream);
// https://developer.mozilla.org/en-US/docs/Web/API/AudioContext/createScriptProcessor
// bufferSize: the onaudioprocess event is called when the buffer is full
var bufferSize = 2048;
var numberOfInputChannels = 2;
var numberOfOutputChannels = 2;
if (audioCtx.createScriptProcessor) {
recorder = audioCtx.createScriptProcessor(
bufferSize, numberOfInputChannels, numberOfOutputChannels);
} else {
recorder = audioCtx.createJavaScriptNode(
bufferSize, numberOfInputChannels, numberOfOutputChannels);
}
console.log(recorder);
recorder.onaudioprocess = function(e) {
let samples = new Float32Array(e.inputBuffer.getChannelData(0))
samples = downsampleBuffer(samples, expectedSampleRate);
let buf = new Int16Array(samples.length);
for (var i = 0; i < samples.length; ++i) {
let s = samples[i];
if (s >= 1)
s = 1;
else if (s <= -1)
s = -1;
buf[i] = s * 32767;
}
leftchannel.push(buf);
recordingLength += bufferSize;
};
visualize(stream);
mediaStream.connect(analyser);
recordBtn.onclick = function() {
mediaStream.connect(recorder);
mediaStream.connect(analyser);
recorder.connect(audioCtx.destination);
console.log('recorder started');
recordBtn.style.background = 'red';
stopBtn.disabled = false;
recordBtn.disabled = true;
};
stopBtn.onclick = function() {
console.log('recorder stopped');
// stopBtn recording
recorder.disconnect(audioCtx.destination);
mediaStream.disconnect(recorder);
mediaStream.disconnect(analyser);
recordBtn.style.background = '';
recordBtn.style.color = '';
// mediaRecorder.requestData();
stopBtn.disabled = true;
recordBtn.disabled = false;
const clipName =
prompt('Enter a name for your sound clip?', 'My unnamed clip');
const clipContainer = document.createElement('article');
const clipLabel = document.createElement('p');
const audio = document.createElement('audio');
const deleteButton = document.createElement('button');
clipContainer.classList.add('clip');
audio.setAttribute('controls', '');
deleteButton.textContent = 'Delete';
deleteButton.className = 'delete';
if (clipName === null) {
clipLabel.textContent = 'My unnamed clip';
} else {
clipLabel.textContent = clipName;
}
clipContainer.appendChild(audio);
clipContainer.appendChild(clipLabel);
clipContainer.appendChild(deleteButton);
soundClips.appendChild(clipContainer);
audio.controls = true;
let samples = flatten(leftchannel);
let buf = new Float32Array(samples.length);
for (var i = 0; i < samples.length; ++i) {
let s = samples[i];
buf[i] = s / 32767.0;
}
const blob = toWav(samples);
leftchannel = [];
const audioURL = window.URL.createObjectURL(blob);
audio.src = audioURL;
console.log('recorder stopped');
deleteButton.onclick = function(e) {
let evtTgt = e.target;
evtTgt.parentNode.parentNode.removeChild(evtTgt.parentNode);
};
clipLabel.onclick = function() {
const existingName = clipLabel.textContent;
const newClipName = prompt('Enter a new name for your sound clip?');
if (newClipName === null) {
clipLabel.textContent = existingName;
} else {
clipLabel.textContent = newClipName;
}
};
buf = buf.buffer
let n = 1024 * 4; // send this number of bytes per request.
console.log('buf length, ' + buf.byteLength);
send_header(buf.byteLength);
for (let start = 0; start < buf.byteLength; start += n) {
socket.send(buf.slice(start, start + n));
}
};
};
let onError = function(err) {
console.log('The following error occured: ' + err);
};
navigator.mediaDevices.getUserMedia(constraints).then(onSuccess, onError);
} else {
console.log('getUserMedia not supported on your browser!');
alert('getUserMedia not supported on your browser!');
}
function visualize(stream) {
if (!audioCtx) {
audioCtx = new AudioContext();
}
const source = audioCtx.createMediaStreamSource(stream);
if (!analyser) {
analyser = audioCtx.createAnalyser();
analyser.fftSize = 2048;
}
const bufferLength = analyser.frequencyBinCount;
const dataArray = new Uint8Array(bufferLength);
// source.connect(analyser);
// analyser.connect(audioCtx.destination);
draw()
function draw() {
const WIDTH = canvas.width
const HEIGHT = canvas.height;
requestAnimationFrame(draw);
analyser.getByteTimeDomainData(dataArray);
canvasCtx.fillStyle = 'rgb(200, 200, 200)';
canvasCtx.fillRect(0, 0, WIDTH, HEIGHT);
canvasCtx.lineWidth = 2;
canvasCtx.strokeStyle = 'rgb(0, 0, 0)';
canvasCtx.beginPath();
let sliceWidth = WIDTH * 1.0 / bufferLength;
let x = 0;
for (let i = 0; i < bufferLength; i++) {
let v = dataArray[i] / 128.0;
let y = v * HEIGHT / 2;
if (i === 0) {
canvasCtx.moveTo(x, y);
} else {
canvasCtx.lineTo(x, y);
}
x += sliceWidth;
}
canvasCtx.lineTo(canvas.width, canvas.height / 2);
canvasCtx.stroke();
}
}
window.onresize = function() {
canvas.width = mainSection.offsetWidth;
};
window.onresize();
// this function is copied/modified from
// https://gist.github.com/meziantou/edb7217fddfbb70e899e
function flatten(listOfSamples) {
let n = 0;
for (let i = 0; i < listOfSamples.length; ++i) {
n += listOfSamples[i].length;
}
let ans = new Int16Array(n);
let offset = 0;
for (let i = 0; i < listOfSamples.length; ++i) {
ans.set(listOfSamples[i], offset);
offset += listOfSamples[i].length;
}
return ans;
}
// this function is copied/modified from
// https://gist.github.com/meziantou/edb7217fddfbb70e899e
function toWav(samples) {
let buf = new ArrayBuffer(44 + samples.length * 2);
var view = new DataView(buf);
// http://soundfile.sapp.org/doc/WaveFormat/
// F F I R
view.setUint32(0, 0x46464952, true); // chunkID
view.setUint32(4, 36 + samples.length * 2, true); // chunkSize
// E V A W
view.setUint32(8, 0x45564157, true); // format
//
// t m f
view.setUint32(12, 0x20746d66, true); // subchunk1ID
view.setUint32(16, 16, true); // subchunk1Size, 16 for PCM
view.setUint32(20, 1, true); // audioFormat, 1 for PCM
view.setUint16(22, 1, true); // numChannels: 1 channel
view.setUint32(24, expectedSampleRate, true); // sampleRate
view.setUint32(28, expectedSampleRate * 2, true); // byteRate
view.setUint16(32, 2, true); // blockAlign
view.setUint16(34, 16, true); // bitsPerSample
view.setUint32(36, 0x61746164, true); // Subchunk2ID
view.setUint32(40, samples.length * 2, true); // subchunk2Size
let offset = 44;
for (let i = 0; i < samples.length; ++i) {
view.setInt16(offset, samples[i], true);
offset += 2;
}
return new Blob([view], {type: 'audio/wav'});
}
// this function is copied from
// https://github.com/awslabs/aws-lex-browser-audio-capture/blob/master/lib/worker.js#L46
function downsampleBuffer(buffer, exportSampleRate) {
if (exportSampleRate === recordSampleRate) {
return buffer;
}
var sampleRateRatio = recordSampleRate / exportSampleRate;
var newLength = Math.round(buffer.length / sampleRateRatio);
var result = new Float32Array(newLength);
var offsetResult = 0;
var offsetBuffer = 0;
while (offsetResult < result.length) {
var nextOffsetBuffer = Math.round((offsetResult + 1) * sampleRateRatio);
var accum = 0, count = 0;
for (var i = offsetBuffer; i < nextOffsetBuffer && i < buffer.length; i++) {
accum += buffer[i];
count++;
}
result[offsetResult] = accum / count;
offsetResult++;
offsetBuffer = nextOffsetBuffer;
}
return result;
};

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,401 @@
// This file copies and modifies code
// from https://mdn.github.io/web-dictaphone/scripts/app.js
// and https://gist.github.com/meziantou/edb7217fddfbb70e899e
var socket;
var recognition_text = [];
function getDisplayResult() {
let i = 0;
let ans = '';
for (let s in recognition_text) {
if (recognition_text[s] == '') continue;
ans += '' + i + ': ' + recognition_text[s] + '\n';
i += 1;
}
return ans;
}
function initWebSocket() {
console.log('Creating websocket')
let protocol = 'ws://';
if (window.location.protocol == 'https:') {
protocol = 'wss://'
}
let server_ip = serverIpInput.value;
let server_port = serverPortInput.value;
console.log('protocol: ', protocol);
console.log('server_ip: ', server_ip);
console.log('server_port: ', server_port);
let uri = protocol + server_ip + ':' + server_port;
console.log('uri', uri);
socket = new WebSocket(uri);
// socket = new WebSocket('wss://localhost:6006/');
// Connection opened
socket.addEventListener('open', function(event) {
console.log('connected');
recordBtn.disabled = false;
connectBtn.disabled = true;
connectBtn.innerHTML = 'Connected!';
});
// Connection closed
socket.addEventListener('close', function(event) {
console.log('disconnected');
recordBtn.disabled = true;
connectBtn.disabled = false;
connectBtn.innerHTML = 'Click me to connect!';
});
// Listen for messages
socket.addEventListener('message', function(event) {
let message = JSON.parse(event.data);
if (message.segment in recognition_text) {
recognition_text[message.segment] = message.text;
} else {
recognition_text.push(message.text);
}
let text_area = document.getElementById('results');
text_area.value = getDisplayResult();
text_area.scrollTop = text_area.scrollHeight; // auto scroll
console.log('Received message: ', event.data);
});
}
window.onload = (event) => {
console.log('page is fully loaded');
console.log('protocol', window.location.protocol);
console.log('port', window.location.port);
if (window.location.protocol == 'https:') {
document.getElementById('ws-protocol').textContent = 'wss://';
}
serverIpInput.value = window.location.hostname;
serverPortInput.value = window.location.port;
};
const serverIpInput = document.getElementById('server-ip');
const serverPortInput = document.getElementById('server-port');
const connectBtn = document.getElementById('connect');
const recordBtn = document.getElementById('streaming_record');
const stopBtn = document.getElementById('streaming_stop');
const clearBtn = document.getElementById('clear');
const soundClips = document.getElementById('sound-clips');
const canvas = document.getElementById('canvas');
const mainSection = document.querySelector('.container');
stopBtn.disabled = true;
let audioCtx;
const canvasCtx = canvas.getContext('2d');
let mediaStream;
let analyser;
let expectedSampleRate = 16000;
let recordSampleRate; // the sampleRate of the microphone
let recorder = null; // the microphone
let leftchannel = []; // TODO: Use a single channel
let recordingLength = 0; // number of samples so far
clearBtn.onclick = function() {
document.getElementById('results').value = '';
recognition_text = [];
};
connectBtn.onclick = function() {
initWebSocket();
};
// copied/modified from https://mdn.github.io/web-dictaphone/
// and
// https://gist.github.com/meziantou/edb7217fddfbb70e899e
if (navigator.mediaDevices.getUserMedia) {
console.log('getUserMedia supported.');
// see https://w3c.github.io/mediacapture-main/#dom-mediadevices-getusermedia
const constraints = {audio: true};
let onSuccess = function(stream) {
if (!audioCtx) {
audioCtx = new AudioContext();
}
console.log(audioCtx);
recordSampleRate = audioCtx.sampleRate;
console.log('sample rate ' + recordSampleRate);
// creates an audio node from the microphone incoming stream
mediaStream = audioCtx.createMediaStreamSource(stream);
console.log(mediaStream);
// https://developer.mozilla.org/en-US/docs/Web/API/AudioContext/createScriptProcessor
// bufferSize: the onaudioprocess event is called when the buffer is full
var bufferSize = 2048;
var numberOfInputChannels = 2;
var numberOfOutputChannels = 2;
if (audioCtx.createScriptProcessor) {
recorder = audioCtx.createScriptProcessor(
bufferSize, numberOfInputChannels, numberOfOutputChannels);
} else {
recorder = audioCtx.createJavaScriptNode(
bufferSize, numberOfInputChannels, numberOfOutputChannels);
}
console.log(recorder);
recorder.onaudioprocess = function(e) {
let samples = new Float32Array(e.inputBuffer.getChannelData(0))
samples = downsampleBuffer(samples, expectedSampleRate);
let buf = new Int16Array(samples.length);
for (var i = 0; i < samples.length; ++i) {
let s = samples[i];
if (s >= 1)
s = 1;
else if (s <= -1)
s = -1;
samples[i] = s;
buf[i] = s * 32767;
}
socket.send(samples);
leftchannel.push(buf);
recordingLength += bufferSize;
};
visualize(stream);
mediaStream.connect(analyser);
recordBtn.onclick = function() {
mediaStream.connect(recorder);
mediaStream.connect(analyser);
recorder.connect(audioCtx.destination);
console.log('recorder started');
recordBtn.style.background = 'red';
stopBtn.disabled = false;
recordBtn.disabled = true;
};
stopBtn.onclick = function() {
console.log('recorder stopped');
socket.send('Done');
console.log('Sent Done');
socket.close();
// stopBtn recording
recorder.disconnect(audioCtx.destination);
mediaStream.disconnect(recorder);
mediaStream.disconnect(analyser);
recordBtn.style.background = '';
recordBtn.style.color = '';
// mediaRecorder.requestData();
stopBtn.disabled = true;
recordBtn.disabled = false;
const clipName =
prompt('Enter a name for your sound clip?', 'My unnamed clip');
const clipContainer = document.createElement('article');
const clipLabel = document.createElement('p');
const audio = document.createElement('audio');
const deleteButton = document.createElement('button');
clipContainer.classList.add('clip');
audio.setAttribute('controls', '');
deleteButton.textContent = 'Delete';
deleteButton.className = 'delete';
if (clipName === null) {
clipLabel.textContent = 'My unnamed clip';
} else {
clipLabel.textContent = clipName;
}
clipContainer.appendChild(audio);
clipContainer.appendChild(clipLabel);
clipContainer.appendChild(deleteButton);
soundClips.appendChild(clipContainer);
audio.controls = true;
let samples = flatten(leftchannel);
const blob = toWav(samples);
leftchannel = [];
const audioURL = window.URL.createObjectURL(blob);
audio.src = audioURL;
console.log('recorder stopped');
deleteButton.onclick = function(e) {
let evtTgt = e.target;
evtTgt.parentNode.parentNode.removeChild(evtTgt.parentNode);
};
clipLabel.onclick = function() {
const existingName = clipLabel.textContent;
const newClipName = prompt('Enter a new name for your sound clip?');
if (newClipName === null) {
clipLabel.textContent = existingName;
} else {
clipLabel.textContent = newClipName;
}
};
};
};
let onError = function(err) {
console.log('The following error occured: ' + err);
};
navigator.mediaDevices.getUserMedia(constraints).then(onSuccess, onError);
} else {
console.log('getUserMedia not supported on your browser!');
alert('getUserMedia not supported on your browser!');
}
function visualize(stream) {
if (!audioCtx) {
audioCtx = new AudioContext();
}
const source = audioCtx.createMediaStreamSource(stream);
if (!analyser) {
analyser = audioCtx.createAnalyser();
analyser.fftSize = 2048;
}
const bufferLength = analyser.frequencyBinCount;
const dataArray = new Uint8Array(bufferLength);
// source.connect(analyser);
// analyser.connect(audioCtx.destination);
draw()
function draw() {
const WIDTH = canvas.width
const HEIGHT = canvas.height;
requestAnimationFrame(draw);
analyser.getByteTimeDomainData(dataArray);
canvasCtx.fillStyle = 'rgb(200, 200, 200)';
canvasCtx.fillRect(0, 0, WIDTH, HEIGHT);
canvasCtx.lineWidth = 2;
canvasCtx.strokeStyle = 'rgb(0, 0, 0)';
canvasCtx.beginPath();
let sliceWidth = WIDTH * 1.0 / bufferLength;
let x = 0;
for (let i = 0; i < bufferLength; i++) {
let v = dataArray[i] / 128.0;
let y = v * HEIGHT / 2;
if (i === 0) {
canvasCtx.moveTo(x, y);
} else {
canvasCtx.lineTo(x, y);
}
x += sliceWidth;
}
canvasCtx.lineTo(canvas.width, canvas.height / 2);
canvasCtx.stroke();
}
}
window.onresize = function() {
canvas.width = mainSection.offsetWidth;
};
window.onresize();
// this function is copied/modified from
// https://gist.github.com/meziantou/edb7217fddfbb70e899e
function flatten(listOfSamples) {
let n = 0;
for (let i = 0; i < listOfSamples.length; ++i) {
n += listOfSamples[i].length;
}
let ans = new Int16Array(n);
let offset = 0;
for (let i = 0; i < listOfSamples.length; ++i) {
ans.set(listOfSamples[i], offset);
offset += listOfSamples[i].length;
}
return ans;
}
// this function is copied/modified from
// https://gist.github.com/meziantou/edb7217fddfbb70e899e
function toWav(samples) {
let buf = new ArrayBuffer(44 + samples.length * 2);
var view = new DataView(buf);
// http://soundfile.sapp.org/doc/WaveFormat/
// F F I R
view.setUint32(0, 0x46464952, true); // chunkID
view.setUint32(4, 36 + samples.length * 2, true); // chunkSize
// E V A W
view.setUint32(8, 0x45564157, true); // format
//
// t m f
view.setUint32(12, 0x20746d66, true); // subchunk1ID
view.setUint32(16, 16, true); // subchunk1Size, 16 for PCM
view.setUint32(20, 1, true); // audioFormat, 1 for PCM
view.setUint16(22, 1, true); // numChannels: 1 channel
view.setUint32(24, expectedSampleRate, true); // sampleRate
view.setUint32(28, expectedSampleRate * 2, true); // byteRate
view.setUint16(32, 2, true); // blockAlign
view.setUint16(34, 16, true); // bitsPerSample
view.setUint32(36, 0x61746164, true); // Subchunk2ID
view.setUint32(40, samples.length * 2, true); // subchunk2Size
let offset = 44;
for (let i = 0; i < samples.length; ++i) {
view.setInt16(offset, samples[i], true);
offset += 2;
}
return new Blob([view], {type: 'audio/wav'});
}
// this function is copied from
// https://github.com/awslabs/aws-lex-browser-audio-capture/blob/master/lib/worker.js#L46
function downsampleBuffer(buffer, exportSampleRate) {
if (exportSampleRate === recordSampleRate) {
return buffer;
}
var sampleRateRatio = recordSampleRate / exportSampleRate;
var newLength = Math.round(buffer.length / sampleRateRatio);
var result = new Float32Array(newLength);
var offsetResult = 0;
var offsetBuffer = 0;
while (offsetResult < result.length) {
var nextOffsetBuffer = Math.round((offsetResult + 1) * sampleRateRatio);
var accum = 0, count = 0;
for (var i = offsetBuffer; i < nextOffsetBuffer && i < buffer.length; i++) {
accum += buffer[i];
count++;
}
result[offsetResult] = accum / count;
offsetResult++;
offsetBuffer = nextOffsetBuffer;
}
return result;
};

View File

@@ -0,0 +1,136 @@
/**
References
https://developer.mozilla.org/en-US/docs/Web/API/FileList
https://developer.mozilla.org/en-US/docs/Web/API/FileReader
https://javascript.info/arraybuffer-binary-arrays
https://developer.mozilla.org/zh-CN/docs/Web/API/WebSocket
https://developer.mozilla.org/en-US/docs/Web/API/WebSocket/send
*/
var socket;
const serverIpInput = document.getElementById('server-ip');
const serverPortInput = document.getElementById('server-port');
const connectBtn = document.getElementById('connect');
const uploadBtn = document.getElementById('file');
function initWebSocket() {
let protocol = 'ws://';
if (window.location.protocol == 'https:') {
protocol = 'wss://'
}
let server_ip = serverIpInput.value;
let server_port = serverPortInput.value;
console.log('protocol: ', protocol);
console.log('server_ip: ', server_ip);
console.log('server_port: ', server_port);
let uri = protocol + server_ip + ':' + server_port;
console.log('uri', uri);
socket = new WebSocket(uri);
// Connection opened
socket.addEventListener('open', function(event) {
console.log('connected');
uploadBtn.disabled = false;
connectBtn.disabled = true;
connectBtn.innerHTML = 'Connected!';
});
// Connection closed
socket.addEventListener('close', function(event) {
console.log('disconnected');
uploadBtn.disabled = true;
connectBtn.disabled = false;
connectBtn.innerHTML = 'Click me to connect!';
});
// Listen for messages
socket.addEventListener('message', function(event) {
console.log('Received message: ', event.data);
document.getElementById('results').value = event.data;
socket.send('Done');
console.log('Sent Done');
socket.close();
});
}
window.onload = (event) => {
console.log('page is fully loaded');
console.log('protocol', window.location.protocol);
console.log('port', window.location.port);
if (window.location.protocol == 'https:') {
document.getElementById('ws-protocol').textContent = 'wss://';
}
serverIpInput.value = window.location.hostname;
serverPortInput.value = window.location.port;
};
connectBtn.onclick = function() {
initWebSocket();
};
function send_header(n) {
const header = new ArrayBuffer(4);
new DataView(header).setInt32(0, n, true /* littleEndian */);
socket.send(new Int32Array(header, 0, 1));
}
function onFileChange() {
var files = document.getElementById('file').files;
if (files.length == 0) {
console.log('No file selected');
return;
}
console.log('files: ' + files);
const file = files[0];
console.log(file);
console.log('file.name ' + file.name);
console.log('file.type ' + file.type);
console.log('file.size ' + file.size);
let reader = new FileReader();
reader.onload = function() {
console.log('reading file!');
let view = new Int16Array(reader.result);
// we assume the input file is a wav file.
// TODO: add some checks here.
let int16_samples = view.subarray(22); // header has 44 bytes == 22 shorts
let num_samples = int16_samples.length;
let float32_samples = new Float32Array(num_samples);
console.log('num_samples ' + num_samples)
for (let i = 0; i < num_samples; ++i) {
float32_samples[i] = int16_samples[i] / 32768.
}
// Send 1024 audio samples per request.
//
// It has two purposes:
// (1) Simulate streaming
// (2) There is a limit on the number of bytes in the payload that can be
// sent by websocket, which is 1MB, I think. We can send a large
// audio file for decoding in this approach.
let buf = float32_samples.buffer
let n = 1024 * 4; // send this number of bytes per request.
console.log('buf length, ' + buf.byteLength);
send_header(buf.byteLength);
for (let start = 0; start < buf.byteLength; start += n) {
socket.send(buf.slice(start, start + n));
}
};
reader.readAsArrayBuffer(file);
}
const clearBtn = document.getElementById('clear');
clearBtn.onclick = function() {
console.log('clicked');
document.getElementById('results').value = '';
};

Binary file not shown.

After

Width:  |  Height:  |  Size: 69 KiB

View File

@@ -0,0 +1,26 @@
<nav class="navbar navbar-expand-lg navbar-light bg-light">
<a class="navbar-brand" href="index.html">Next-gen Kaldi demo</a>
<button class="navbar-toggler" type="button" data-toggle="collapse" data-target="#navbarSupportedContent" aria-controls="navbarSupportedContent" aria-expanded="false" aria-label="Toggle navigation">
<span class="navbar-toggler-icon"></span>
</button>
<div class="collapse navbar-collapse" id="navbarSupportedContent">
<ul class="navbar-nav mr-auto">
<li class="nav-item active">
<a class="nav-link" href="index.html">Home <span class="sr-only">(current)</span></a>
</li>
<li class="nav-item">
<a class="nav-link" href="upload.html">Upload</a>
</li>
<li class="nav-item">
<a class="nav-link" href="streaming_record.html">Streaming-Record</a>
</li>
<li class="nav-item">
<a class="nav-link" href="offline_record.html">Offline-Record</a>
</li>
</ul>
</div>
</nav>

View File

@@ -0,0 +1,81 @@
<!doctype html>
<html lang="en">
<head>
<!-- Required meta tags -->
<meta charset="utf-8"></meta>
<meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"></meta>
<!-- Bootstrap CSS -->
<link rel="stylesheet"
href="./css/bootstrap.min.css"
integrity="sha384-ggOyR0iXCbMQv3Xipma34MD+dH/1fQ784/j6cY/iJTQUOhcWr7x9JvoRxT2MZw1T"
crossorigin="anonymous">
</link>
<script src="./js/jquery-3.6.0.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script>
<title>Next-gen Kaldi demo (Upload file for recognition)</title>
</head>
<body>
<div id="nav"></div>
<script>
$(function(){
$("#nav").load("nav-partial.html");
});
</script>
<h3>Recognition from offline recordings</h3>
<div class="container">
<div class="input-group mb-1">
<div class="input-group-prepend">
<button class="btn btn-block btn-primary" type="button" id="connect">Click me to connect</button>
</div>
<span class="input-group-text" id="ws-protocol">ws://</span>
<input type="text" id="server-ip" class="form-control" placeholder="Sherpa server IP, e.g., localhost" aria-label="sherpa server IP">
<span class="input-group-text">:</span>
<input type="text" id="server-port" class="form-control" placeholder="Sherpa server port, e.g., 6006" aria-label="sherpa server port">
</div>
<div class="row">
<div class="col-12">
<canvas id="canvas" height="60px" display="block" margin-bottom="0.5rem"></canvas>
</div>
</div>
<div class="row">
<div class="col">
<button class="btn btn-primary btn-block" id="offline_record">Offline-Record</button>
</div>
<div class="col">
<button class="btn btn-primary btn-block" id="offline_stop">Offline-Stop</button>
</div>
</div>
</div>
<div class="mb-3">
<label for="results" class="form-label">Recognition results</label>
<textarea class="form-control" id="results" rows="8"></textarea>
</div>
<button class="btn btn-primary btn-block" id="clear">Clear results</button>
<section flex="1" overflow="auto" id="sound-clips">
</section>
<!-- Optional JavaScript -->
<!-- jQuery first, then Popper.js, then Bootstrap JS -->
<script src="./js/popper.min.js"
integrity="sha384-UO2eT0CpHqdSJQ6hJty5KVphtPhzWj9WO1clHTMGa3JDZwrnQq4sF86dIHNDz0W1"
crossorigin="anonymous">
</script>
<script src="./js/bootstrap.min.js"
integrity="sha384-JjSmVgyd0p3pXB1rRibZUAYoIIy6OrQ6VrjIEaFf/nJGzIxFDsf4x0xIM+B07jRM"
crossorigin="anonymous">
</script>
<script src="./js/offline_record.js"> </script>
</body>
</html>

Binary file not shown.

After

Width:  |  Height:  |  Size: 46 KiB

View File

@@ -0,0 +1,75 @@
#!/usr/bin/env python3
# Code in this file is modified from
# https://stackoverflow.com/questions/19705785/python-3-simple-https-server
import argparse
import http.server
import ssl
import sys
from pathlib import Path
"""
Usage:
./start-https-server.py \
--server-address 0.0.0.0 \
--server-port 6007 \
--cert ./cert.pem
"""
def get_args():
parser = argparse.ArgumentParser()
parser.add_argument(
"--server-address",
type=str,
default="0.0.0.0",
help="""IP address which this server will bind to""",
)
parser.add_argument(
"--server-port",
type=int,
default=6007,
help="""Port number on which this server will listen""",
)
parser.add_argument(
"--certificate",
type=str,
default="cert.pem",
help="""Path to the X.509 certificate. You can use
./generate-certificate.py to generate it""",
)
return parser.parse_args()
def main():
args = get_args()
print(f"{vars(args)}")
server_address = (args.server_address, args.server_port)
httpd = http.server.HTTPServer(
server_address, http.server.SimpleHTTPRequestHandler
)
if not Path(args.certificate).is_file():
print("Please run ./generate-certificate.py to generate a certificate")
sys.exit(-1)
httpd.socket = ssl.wrap_socket(
httpd.socket,
server_side=True,
certfile=args.certificate,
ssl_version=ssl.PROTOCOL_TLS,
)
print(
"The server is listening at the following address:\n"
f"https://{args.server_address}:{args.server_port}"
)
httpd.serve_forever()
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,81 @@
<!doctype html>
<html lang="en">
<head>
<!-- Required meta tags -->
<meta charset="utf-8"></meta>
<meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"></meta>
<!-- Bootstrap CSS -->
<link rel="stylesheet"
href="./css/bootstrap.min.css"
integrity="sha384-ggOyR0iXCbMQv3Xipma34MD+dH/1fQ784/j6cY/iJTQUOhcWr7x9JvoRxT2MZw1T"
crossorigin="anonymous">
</link>
<script src="./js/jquery-3.6.0.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script>
<title>Next-gen Kaldi demo (Upload file for recognition)</title>
</head>
<body>
<div id="nav"></div>
<script>
$(function(){
$("#nav").load("nav-partial.html");
});
</script>
<h3>Recognition from real-time recordings</h3>
<div class="container">
<div class="input-group mb-1">
<div class="input-group-prepend">
<button class="btn btn-block btn-primary" type="button" id="connect">Click me to connect</button>
</div>
<span class="input-group-text" id="ws-protocol">ws://</span>
<input type="text" id="server-ip" class="form-control" placeholder="Sherpa server IP, e.g., localhost" aria-label="sherpa server IP">
<span class="input-group-text">:</span>
<input type="text" id="server-port" class="form-control" placeholder="Sherpa server port, e.g., 6006" aria-label="sherpa server port">
</div>
<div class="row">
<div class="col-12">
<canvas id="canvas" height="60px" display="block" margin-bottom="0.5rem"></canvas>
</div>
</div>
<div class="row">
<div class="col">
<button class="btn btn-primary btn-block" id="streaming_record">Streaming-Record</button>
</div>
<div class="col">
<button class="btn btn-primary btn-block" id="streaming_stop">Streaming-Stop</button>
</div>
</div>
</div>
<div class="mb-3">
<label for="results" class="form-label">Recognition results</label>
<textarea class="form-control" id="results" rows="8"></textarea>
</div>
<button class="btn btn-primary btn-block" id="clear">Clear results</button>
<section flex="1" overflow="auto" id="sound-clips">
</section>
<!-- Optional JavaScript -->
<!-- jQuery first, then Popper.js, then Bootstrap JS -->
<script src="./js/popper.min.js"
integrity="sha384-UO2eT0CpHqdSJQ6hJty5KVphtPhzWj9WO1clHTMGa3JDZwrnQq4sF86dIHNDz0W1"
crossorigin="anonymous">
</script>
<script src="./js/bootstrap.min.js"
integrity="sha384-JjSmVgyd0p3pXB1rRibZUAYoIIy6OrQ6VrjIEaFf/nJGzIxFDsf4x0xIM+B07jRM"
crossorigin="anonymous">
</script>
<script src="./js/streaming_record.js"> </script>
</body>
</html>

View File

@@ -0,0 +1,68 @@
<!doctype html>
<html lang="en">
<head>
<!-- Required meta tags -->
<meta charset="utf-8"></meta>
<meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"></meta>
<!-- Bootstrap CSS -->
<link rel="stylesheet"
href="./css/bootstrap.min.css"
integrity="sha384-ggOyR0iXCbMQv3Xipma34MD+dH/1fQ784/j6cY/iJTQUOhcWr7x9JvoRxT2MZw1T"
crossorigin="anonymous">
</link>
<script src="./js/jquery-3.6.0.min.js" integrity="sha256-/xUj+3OJU5yExlq6GSYGSHk7tPXikynS7ogEvDej/m4=" crossorigin="anonymous"></script>
<title>Next-gen Kaldi demo (Upload file for recognition)</title>
</head>
<body>
<div id="nav"></div>
<script>
$(function(){
$("#nav").load("nav-partial.html");
});
</script>
<h3>Recognition from a selected file</h3>
<div class="input-group mb-1">
<div class="input-group-prepend">
<button class="btn btn-block btn-primary" type="button" id="connect">Click me to connect</button>
</div>
<span class="input-group-text" id="ws-protocol">ws://</span>
<input type="text" id="server-ip" class="form-control" placeholder="Sherpa server IP, e.g., localhost" aria-label="sherpa server IP">
<span class="input-group-text">:</span>
<input type="text" id="server-port" class="form-control" placeholder="Sherpa server port, e.g., 6006" aria-label="sherpa server port">
</div>
<form>
<div class="mb-3">
<label for="file" class="form-label">Select file</label>
<input class="form-control" type="file" id="file" accept=".wav" onchange="onFileChange()" disabled="true"></input>
</div>
<div class="mb-3">
<label for="results" class="form-label">Recognition results</label>
<textarea class="form-control" id="results" rows="8"></textarea>
</div>
<button class="btn btn-primary btn-block" id="clear">Clear results</button>
</form>
<!-- Optional JavaScript -->
<!-- jQuery first, then Popper.js, then Bootstrap JS -->
<script src="./js/popper.min.js"
integrity="sha384-UO2eT0CpHqdSJQ6hJty5KVphtPhzWj9WO1clHTMGa3JDZwrnQq4sF86dIHNDz0W1"
crossorigin="anonymous">
</script>
<script src="./js/bootstrap.min.js"
integrity="sha384-JjSmVgyd0p3pXB1rRibZUAYoIIy6OrQ6VrjIEaFf/nJGzIxFDsf4x0xIM+B07jRM"
crossorigin="anonymous">
</script>
<script src="./js/upload.js"> </script>
</body>
</html>