64 lines
1.6 KiB
Python
64 lines
1.6 KiB
Python
|
|
from pydantic import BaseModel, Field
|
|||
|
|
from typing import (
|
|||
|
|
Optional,
|
|||
|
|
List,
|
|||
|
|
Any
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
|
|||
|
|
class WordModel(BaseModel):
|
|||
|
|
text: str
|
|||
|
|
start_time: int # 或 float,取决时间戳格式
|
|||
|
|
end_time: int
|
|||
|
|
segment: Optional[Any] = Field(default=None, exclude=True) # 所属文段
|
|||
|
|
# receive_time: Optional[Any] = None # 所属文段接收到的时间偏移,这里为了处理时方便,记录了ASRResultModel中的receive_time
|
|||
|
|
class Config:
|
|||
|
|
fields = {
|
|||
|
|
'segment': {'exclude': True}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
class SegmentModel(BaseModel):
|
|||
|
|
# 文段接收到的时间
|
|||
|
|
receive_time: Optional[Any] = None
|
|||
|
|
language: str
|
|||
|
|
para_seq: int
|
|||
|
|
final_result: bool
|
|||
|
|
text: str
|
|||
|
|
start_time: int # 或者 float,如果时间戳是毫秒精度
|
|||
|
|
end_time: int
|
|||
|
|
words: List[WordModel] # 补充 words 字段
|
|||
|
|
|
|||
|
|
def summary(self) -> str:
|
|||
|
|
duration = (self.end_time - self.start_time) / 1000 # 秒
|
|||
|
|
return (
|
|||
|
|
f"\n"
|
|||
|
|
f"language:{self.language} \n"
|
|||
|
|
f"para_seq:{self.para_seq} \n"
|
|||
|
|
f"final_result {self.final_result}\n"
|
|||
|
|
f"text:{self.text}\n"
|
|||
|
|
f"words:[{', '.join(w.text for w in self.words)}]\n"
|
|||
|
|
f"start_time:{self.start_time}\n"
|
|||
|
|
f"end_time:{self.end_time}\n"
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
|
|||
|
|
class ASRResponseModel(BaseModel):
|
|||
|
|
asr_results: SegmentModel
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
class VoiceSegment(BaseModel):
|
|||
|
|
answer: str
|
|||
|
|
start: float
|
|||
|
|
end: float
|
|||
|
|
|
|||
|
|
|
|||
|
|
class AudioItem(BaseModel):
|
|||
|
|
audio_length: float
|
|||
|
|
duration: Optional[float] = None
|
|||
|
|
file: str
|
|||
|
|
orig_file: Optional[str] = None
|
|||
|
|
voice: List[VoiceSegment]
|
|||
|
|
absolute_path: Optional[str] = None
|
|||
|
|
|