64 lines
1.6 KiB
Python
64 lines
1.6 KiB
Python
from pydantic import BaseModel, Field
|
||
from typing import (
|
||
Optional,
|
||
List,
|
||
Any
|
||
)
|
||
|
||
|
||
class WordModel(BaseModel):
|
||
text: str
|
||
start_time: int # 或 float,取决时间戳格式
|
||
end_time: int
|
||
segment: Optional[Any] = Field(default=None, exclude=True) # 所属文段
|
||
# receive_time: Optional[Any] = None # 所属文段接收到的时间偏移,这里为了处理时方便,记录了ASRResultModel中的receive_time
|
||
class Config:
|
||
fields = {
|
||
'segment': {'exclude': True}
|
||
}
|
||
|
||
class SegmentModel(BaseModel):
|
||
# 文段接收到的时间
|
||
receive_time: Optional[Any] = None
|
||
language: str
|
||
para_seq: int
|
||
final_result: bool
|
||
text: str
|
||
start_time: int # 或者 float,如果时间戳是毫秒精度
|
||
end_time: int
|
||
words: List[WordModel] # 补充 words 字段
|
||
|
||
def summary(self) -> str:
|
||
duration = (self.end_time - self.start_time) / 1000 # 秒
|
||
return (
|
||
f"\n"
|
||
f"language:{self.language} \n"
|
||
f"para_seq:{self.para_seq} \n"
|
||
f"final_result {self.final_result}\n"
|
||
f"text:{self.text}\n"
|
||
f"words:[{', '.join(w.text for w in self.words)}]\n"
|
||
f"start_time:{self.start_time}\n"
|
||
f"end_time:{self.end_time}\n"
|
||
)
|
||
|
||
|
||
class ASRResponseModel(BaseModel):
|
||
asr_results: SegmentModel
|
||
|
||
|
||
|
||
class VoiceSegment(BaseModel):
|
||
answer: str
|
||
start: float
|
||
end: float
|
||
|
||
|
||
class AudioItem(BaseModel):
|
||
audio_length: float
|
||
duration: Optional[float] = None
|
||
file: str
|
||
orig_file: Optional[str] = None
|
||
voice: List[VoiceSegment]
|
||
absolute_path: Optional[str] = None
|
||
|