Files
enginex-mr_series-asr/utils/model.py
2025-08-20 14:29:42 +08:00

64 lines
1.6 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

from pydantic import BaseModel, Field
from typing import (
Optional,
List,
Any
)
class WordModel(BaseModel):
text: str
start_time: int # 或 float取决时间戳格式
end_time: int
segment: Optional[Any] = Field(default=None, exclude=True) # 所属文段
# receive_time: Optional[Any] = None # 所属文段接收到的时间偏移这里为了处理时方便记录了ASRResultModel中的receive_time
class Config:
fields = {
'segment': {'exclude': True}
}
class SegmentModel(BaseModel):
# 文段接收到的时间
receive_time: Optional[Any] = None
language: str
para_seq: int
final_result: bool
text: str
start_time: int # 或者 float如果时间戳是毫秒精度
end_time: int
words: List[WordModel] # 补充 words 字段
def summary(self) -> str:
duration = (self.end_time - self.start_time) / 1000 # 秒
return (
f"\n"
f"language{self.language} \n"
f"para_seq{self.para_seq} \n"
f"final_result {self.final_result}\n"
f"text{self.text}\n"
f"words[{', '.join(w.text for w in self.words)}]\n"
f"start_time{self.start_time}\n"
f"end_time{self.end_time}\n"
)
class ASRResponseModel(BaseModel):
asr_results: SegmentModel
class VoiceSegment(BaseModel):
answer: str
start: float
end: float
class AudioItem(BaseModel):
audio_length: float
duration: Optional[float] = None
file: str
orig_file: Optional[str] = None
voice: List[VoiceSegment]
absolute_path: Optional[str] = None