from pydantic import BaseModel, Field from typing import ( Optional, List, Any ) class WordModel(BaseModel): text: str start_time: int # 或 float,取决时间戳格式 end_time: int segment: Optional[Any] = Field(default=None, exclude=True) # 所属文段 # receive_time: Optional[Any] = None # 所属文段接收到的时间偏移,这里为了处理时方便,记录了ASRResultModel中的receive_time class Config: fields = { 'segment': {'exclude': True} } class SegmentModel(BaseModel): # 文段接收到的时间 receive_time: Optional[Any] = None language: str para_seq: int final_result: bool text: str start_time: int # 或者 float,如果时间戳是毫秒精度 end_time: int words: List[WordModel] # 补充 words 字段 def summary(self) -> str: duration = (self.end_time - self.start_time) / 1000 # 秒 return ( f"\n" f"language:{self.language} \n" f"para_seq:{self.para_seq} \n" f"final_result {self.final_result}\n" f"text:{self.text}\n" f"words:[{', '.join(w.text for w in self.words)}]\n" f"start_time:{self.start_time}\n" f"end_time:{self.end_time}\n" ) class ASRResponseModel(BaseModel): asr_results: SegmentModel class VoiceSegment(BaseModel): answer: str start: float end: float class AudioItem(BaseModel): audio_length: float duration: Optional[float] = None file: str orig_file: Optional[str] = None voice: List[VoiceSegment] absolute_path: Optional[str] = None