Files
instructscore_caption/length_check.py

8 lines
175 B
Python
Raw Permalink Normal View History

from datasets import load_dataset
from transformers import AutoTokenizer
data_cnn = load_dataset("cnn_dailymail", '3.0.0')
data_xsum = load_dataset("xsum")
print(data_xsum)