Added tokens, tokens_arr and json for offline recongnizer result (#936)
Co-authored-by: leo <webmaster@360converter.com>
This commit is contained in:
@@ -444,14 +444,49 @@ const SherpaOnnxOfflineRecognizerResult *GetOfflineStreamResult(
|
|||||||
pText[text.size()] = 0;
|
pText[text.size()] = 0;
|
||||||
r->text = pText;
|
r->text = pText;
|
||||||
|
|
||||||
if (!result.timestamps.empty()) {
|
// copy json
|
||||||
r->timestamps = new float[result.timestamps.size()];
|
const auto &json = result.AsJsonString();
|
||||||
std::copy(result.timestamps.begin(), result.timestamps.end(),
|
char *pJson = new char[json.size() + 1];
|
||||||
r->timestamps);
|
std::copy(json.begin(), json.end(), pJson);
|
||||||
r->count = result.timestamps.size();
|
pJson[json.size()] = 0;
|
||||||
|
r->json = pJson;
|
||||||
|
|
||||||
|
// copy tokens
|
||||||
|
auto count = result.tokens.size();
|
||||||
|
if (count > 0) {
|
||||||
|
size_t total_length = 0;
|
||||||
|
for (const auto &token : result.tokens) {
|
||||||
|
// +1 for the null character at the end of each token
|
||||||
|
total_length += token.size() + 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
r->count = count;
|
||||||
|
// Each word ends with nullptr
|
||||||
|
char *tokens = new char[total_length]{};
|
||||||
|
char **tokens_temp = new char *[r->count];
|
||||||
|
int32_t pos = 0;
|
||||||
|
for (int32_t i = 0; i < r->count; ++i) {
|
||||||
|
tokens_temp[i] = tokens + pos;
|
||||||
|
memcpy(tokens + pos, result.tokens[i].c_str(), result.tokens[i].size());
|
||||||
|
// +1 to move past the null character
|
||||||
|
pos += result.tokens[i].size() + 1;
|
||||||
|
}
|
||||||
|
r->tokens_arr = tokens_temp;
|
||||||
|
|
||||||
|
if (!result.timestamps.empty()) {
|
||||||
|
r->timestamps = new float[r->count];
|
||||||
|
std::copy(result.timestamps.begin(), result.timestamps.end(),
|
||||||
|
r->timestamps);
|
||||||
|
} else {
|
||||||
|
r->timestamps = nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
r->tokens = tokens;
|
||||||
} else {
|
} else {
|
||||||
r->timestamps = nullptr;
|
|
||||||
r->count = 0;
|
r->count = 0;
|
||||||
|
r->timestamps = nullptr;
|
||||||
|
r->tokens = nullptr;
|
||||||
|
r->tokens_arr = nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
return r;
|
return r;
|
||||||
@@ -462,6 +497,9 @@ void DestroyOfflineRecognizerResult(
|
|||||||
if (r) {
|
if (r) {
|
||||||
delete[] r->text;
|
delete[] r->text;
|
||||||
delete[] r->timestamps;
|
delete[] r->timestamps;
|
||||||
|
delete[] r->tokens;
|
||||||
|
delete[] r->tokens_arr;
|
||||||
|
delete[] r->json;
|
||||||
delete r;
|
delete r;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -481,7 +481,27 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineRecognizerResult {
|
|||||||
|
|
||||||
// number of entries in timestamps
|
// number of entries in timestamps
|
||||||
int32_t count;
|
int32_t count;
|
||||||
// TODO(fangjun): Add more fields
|
|
||||||
|
// Pointer to continuous memory which holds string based tokens
|
||||||
|
// which are separated by \0
|
||||||
|
const char *tokens;
|
||||||
|
|
||||||
|
// a pointer array containing the address of the first item in tokens
|
||||||
|
const char *const *tokens_arr;
|
||||||
|
|
||||||
|
/** Return a json string.
|
||||||
|
*
|
||||||
|
* The returned string contains:
|
||||||
|
* {
|
||||||
|
* "text": "The recognition result",
|
||||||
|
* "tokens": [x, x, x],
|
||||||
|
* "timestamps": [x, x, x],
|
||||||
|
* "segment": x,
|
||||||
|
* "start_time": x,
|
||||||
|
* "is_final": true|false
|
||||||
|
* }
|
||||||
|
*/
|
||||||
|
const char *json;
|
||||||
} SherpaOnnxOfflineRecognizerResult;
|
} SherpaOnnxOfflineRecognizerResult;
|
||||||
|
|
||||||
/// Get the result of the offline stream.
|
/// Get the result of the offline stream.
|
||||||
|
|||||||
Reference in New Issue
Block a user