18 lines
422 B
Python
18 lines
422 B
Python
import sys
|
|
|
|
from tqdm import tqdm
|
|
|
|
from model.dataset import PinyinInputDataset
|
|
|
|
if sys.platform == "win32":
|
|
dataset_path = "data"
|
|
else:
|
|
dataset_path = "/home/songsenand/Data/corpus/CCI-Data/"
|
|
|
|
dataset = PinyinInputDataset(dataset_path, max_iter_length=20, max_workes=3)
|
|
for i, line in enumerate(dataset):
|
|
for k, v in line.items():
|
|
if isinstance(v, str):
|
|
continue
|
|
print(k, v.shape)
|