初始化项目,由ModelHub XC社区提供模型

Model: kotoba-tech/kotoba-whisper-v2.1
Source: Original Platform
This commit is contained in:
ModelHub XC
2026-05-15 00:23:31 +08:00
commit 3e8e1a4b75
17 changed files with 182706 additions and 0 deletions

154
pipeline/test_pipeline.py Normal file
View File

@@ -0,0 +1,154 @@
from pprint import pprint
from datasets import load_dataset
from transformers.pipelines import pipeline
model_alias = "kotoba-tech/kotoba-whisper-v1.1"
print("""### P + S ###""")
pipe = pipeline(model=model_alias,
punctuator=True,
stable_ts=True,
chunk_length_s=15,
batch_size=16,
trust_remote_code=True)
dataset = load_dataset("kotoba-tech/kotoba-whisper-eval", split="train")
for i in dataset:
if i["audio"]["path"] == "long_interview_1.mp3":
i["audio"]["array"] = i["audio"]["array"][:7938000]
prediction = pipe(
i["audio"],
return_timestamps=True,
generate_kwargs={"language": "japanese", "task": "transcribe"}
)
pprint(prediction)
break
print("""### P ###""")
pipe = pipeline(model=model_alias,
punctuator=True,
stable_ts=False,
chunk_length_s=15,
batch_size=16,
trust_remote_code=True)
dataset = load_dataset("kotoba-tech/kotoba-whisper-eval", split="train")
for i in dataset:
if i["audio"]["path"] == "long_interview_1.mp3":
i["audio"]["array"] = i["audio"]["array"][:7938000]
prediction = pipe(
i["audio"],
return_timestamps=True,
generate_kwargs={"language": "japanese", "task": "transcribe"}
)
pprint(prediction)
break
print("""### S ###""")
pipe = pipeline(model=model_alias,
punctuator=False,
stable_ts=True,
chunk_length_s=15,
batch_size=16,
trust_remote_code=True)
dataset = load_dataset("kotoba-tech/kotoba-whisper-eval", split="train")
for i in dataset:
if i["audio"]["path"] == "long_interview_1.mp3":
i["audio"]["array"] = i["audio"]["array"][:7938000]
prediction = pipe(
i["audio"],
return_timestamps=True,
generate_kwargs={"language": "japanese", "task": "transcribe"}
)
pprint(prediction)
break
print("""### RAW ###""")
pipe = pipeline(model=model_alias,
punctuator=False,
stable_ts=False,
chunk_length_s=15,
batch_size=16,
trust_remote_code=True)
dataset = load_dataset("kotoba-tech/kotoba-whisper-eval", split="train")
for i in dataset:
if i["audio"]["path"] == "long_interview_1.mp3":
i["audio"]["array"] = i["audio"]["array"][:7938000]
prediction = pipe(
i["audio"],
return_timestamps=True,
generate_kwargs={"language": "japanese", "task": "transcribe"}
)
pprint(prediction)
break
print("""### P + S ###""")
pipe = pipeline(model=model_alias,
punctuator=True,
stable_ts=True,
chunk_length_s=15,
batch_size=16,
trust_remote_code=True)
dataset = load_dataset("kotoba-tech/kotoba-whisper-eval", split="train")
for i in dataset:
if i["audio"]["path"] == "long_interview_1.mp3":
i["audio"]["array"] = i["audio"]["array"][:7938000]
prediction = pipe(
i["audio"],
generate_kwargs={"language": "japanese", "task": "transcribe"}
)
pprint(prediction)
break
print("""### P ###""")
pipe = pipeline(model=model_alias,
punctuator=True,
stable_ts=False,
chunk_length_s=15,
batch_size=16,
trust_remote_code=True)
dataset = load_dataset("kotoba-tech/kotoba-whisper-eval", split="train")
for i in dataset:
if i["audio"]["path"] == "long_interview_1.mp3":
i["audio"]["array"] = i["audio"]["array"][:7938000]
prediction = pipe(
i["audio"],
generate_kwargs={"language": "japanese", "task": "transcribe"}
)
pprint(prediction)
break
print("""### S ###""")
pipe = pipeline(model=model_alias,
punctuator=False,
stable_ts=True,
chunk_length_s=15,
batch_size=16,
trust_remote_code=True)
dataset = load_dataset("kotoba-tech/kotoba-whisper-eval", split="train")
for i in dataset:
if i["audio"]["path"] == "long_interview_1.mp3":
i["audio"]["array"] = i["audio"]["array"][:7938000]
prediction = pipe(
i["audio"],
generate_kwargs={"language": "japanese", "task": "transcribe"}
)
pprint(prediction)
break
print("""### RAW ###""")
pipe = pipeline(model=model_alias,
punctuator=False,
stable_ts=False,
chunk_length_s=15,
batch_size=16,
trust_remote_code=True)
dataset = load_dataset("kotoba-tech/kotoba-whisper-eval", split="train")
for i in dataset:
if i["audio"]["path"] == "long_interview_1.mp3":
i["audio"]["array"] = i["audio"]["array"][:7938000]
prediction = pipe(
i["audio"],
generate_kwargs={"language": "japanese", "task": "transcribe"}
)
pprint(prediction)
break