初始化项目,由ModelHub XC社区提供模型
Model: kotoba-tech/kotoba-whisper-v2.1 Source: Original Platform
This commit is contained in:
154
pipeline/test_pipeline.py
Normal file
154
pipeline/test_pipeline.py
Normal file
@@ -0,0 +1,154 @@
|
||||
from pprint import pprint
|
||||
from datasets import load_dataset
|
||||
from transformers.pipelines import pipeline
|
||||
|
||||
model_alias = "kotoba-tech/kotoba-whisper-v1.1"
|
||||
|
||||
print("""### P + S ###""")
|
||||
pipe = pipeline(model=model_alias,
|
||||
punctuator=True,
|
||||
stable_ts=True,
|
||||
chunk_length_s=15,
|
||||
batch_size=16,
|
||||
trust_remote_code=True)
|
||||
dataset = load_dataset("kotoba-tech/kotoba-whisper-eval", split="train")
|
||||
for i in dataset:
|
||||
if i["audio"]["path"] == "long_interview_1.mp3":
|
||||
i["audio"]["array"] = i["audio"]["array"][:7938000]
|
||||
prediction = pipe(
|
||||
i["audio"],
|
||||
return_timestamps=True,
|
||||
generate_kwargs={"language": "japanese", "task": "transcribe"}
|
||||
)
|
||||
pprint(prediction)
|
||||
break
|
||||
|
||||
print("""### P ###""")
|
||||
pipe = pipeline(model=model_alias,
|
||||
punctuator=True,
|
||||
stable_ts=False,
|
||||
chunk_length_s=15,
|
||||
batch_size=16,
|
||||
trust_remote_code=True)
|
||||
dataset = load_dataset("kotoba-tech/kotoba-whisper-eval", split="train")
|
||||
for i in dataset:
|
||||
if i["audio"]["path"] == "long_interview_1.mp3":
|
||||
i["audio"]["array"] = i["audio"]["array"][:7938000]
|
||||
prediction = pipe(
|
||||
i["audio"],
|
||||
return_timestamps=True,
|
||||
generate_kwargs={"language": "japanese", "task": "transcribe"}
|
||||
)
|
||||
pprint(prediction)
|
||||
break
|
||||
|
||||
print("""### S ###""")
|
||||
pipe = pipeline(model=model_alias,
|
||||
punctuator=False,
|
||||
stable_ts=True,
|
||||
chunk_length_s=15,
|
||||
batch_size=16,
|
||||
trust_remote_code=True)
|
||||
dataset = load_dataset("kotoba-tech/kotoba-whisper-eval", split="train")
|
||||
for i in dataset:
|
||||
if i["audio"]["path"] == "long_interview_1.mp3":
|
||||
i["audio"]["array"] = i["audio"]["array"][:7938000]
|
||||
prediction = pipe(
|
||||
i["audio"],
|
||||
return_timestamps=True,
|
||||
generate_kwargs={"language": "japanese", "task": "transcribe"}
|
||||
)
|
||||
pprint(prediction)
|
||||
break
|
||||
|
||||
print("""### RAW ###""")
|
||||
pipe = pipeline(model=model_alias,
|
||||
punctuator=False,
|
||||
stable_ts=False,
|
||||
chunk_length_s=15,
|
||||
batch_size=16,
|
||||
trust_remote_code=True)
|
||||
dataset = load_dataset("kotoba-tech/kotoba-whisper-eval", split="train")
|
||||
for i in dataset:
|
||||
if i["audio"]["path"] == "long_interview_1.mp3":
|
||||
i["audio"]["array"] = i["audio"]["array"][:7938000]
|
||||
prediction = pipe(
|
||||
i["audio"],
|
||||
return_timestamps=True,
|
||||
generate_kwargs={"language": "japanese", "task": "transcribe"}
|
||||
)
|
||||
pprint(prediction)
|
||||
break
|
||||
|
||||
print("""### P + S ###""")
|
||||
pipe = pipeline(model=model_alias,
|
||||
punctuator=True,
|
||||
stable_ts=True,
|
||||
chunk_length_s=15,
|
||||
batch_size=16,
|
||||
trust_remote_code=True)
|
||||
dataset = load_dataset("kotoba-tech/kotoba-whisper-eval", split="train")
|
||||
for i in dataset:
|
||||
if i["audio"]["path"] == "long_interview_1.mp3":
|
||||
i["audio"]["array"] = i["audio"]["array"][:7938000]
|
||||
prediction = pipe(
|
||||
i["audio"],
|
||||
generate_kwargs={"language": "japanese", "task": "transcribe"}
|
||||
)
|
||||
pprint(prediction)
|
||||
break
|
||||
|
||||
print("""### P ###""")
|
||||
pipe = pipeline(model=model_alias,
|
||||
punctuator=True,
|
||||
stable_ts=False,
|
||||
chunk_length_s=15,
|
||||
batch_size=16,
|
||||
trust_remote_code=True)
|
||||
dataset = load_dataset("kotoba-tech/kotoba-whisper-eval", split="train")
|
||||
for i in dataset:
|
||||
if i["audio"]["path"] == "long_interview_1.mp3":
|
||||
i["audio"]["array"] = i["audio"]["array"][:7938000]
|
||||
prediction = pipe(
|
||||
i["audio"],
|
||||
generate_kwargs={"language": "japanese", "task": "transcribe"}
|
||||
)
|
||||
pprint(prediction)
|
||||
break
|
||||
|
||||
print("""### S ###""")
|
||||
pipe = pipeline(model=model_alias,
|
||||
punctuator=False,
|
||||
stable_ts=True,
|
||||
chunk_length_s=15,
|
||||
batch_size=16,
|
||||
trust_remote_code=True)
|
||||
dataset = load_dataset("kotoba-tech/kotoba-whisper-eval", split="train")
|
||||
for i in dataset:
|
||||
if i["audio"]["path"] == "long_interview_1.mp3":
|
||||
i["audio"]["array"] = i["audio"]["array"][:7938000]
|
||||
prediction = pipe(
|
||||
i["audio"],
|
||||
generate_kwargs={"language": "japanese", "task": "transcribe"}
|
||||
)
|
||||
pprint(prediction)
|
||||
break
|
||||
|
||||
print("""### RAW ###""")
|
||||
pipe = pipeline(model=model_alias,
|
||||
punctuator=False,
|
||||
stable_ts=False,
|
||||
chunk_length_s=15,
|
||||
batch_size=16,
|
||||
trust_remote_code=True)
|
||||
dataset = load_dataset("kotoba-tech/kotoba-whisper-eval", split="train")
|
||||
for i in dataset:
|
||||
if i["audio"]["path"] == "long_interview_1.mp3":
|
||||
i["audio"]["array"] = i["audio"]["array"][:7938000]
|
||||
prediction = pipe(
|
||||
i["audio"],
|
||||
generate_kwargs={"language": "japanese", "task": "transcribe"}
|
||||
)
|
||||
pprint(prediction)
|
||||
break
|
||||
|
||||
Reference in New Issue
Block a user