fix(dataset): 添加6%概率返回None以增强数据多样性
This commit is contained in:
parent
dfcce1f1ed
commit
1178f87713
|
|
@ -276,6 +276,9 @@ class PinyinInputDataset(IterableDataset):
|
|||
trunc_len = random.randint(1, max_len - 1)
|
||||
return pinyin[:trunc_len]
|
||||
"""
|
||||
rand_val = random.random()
|
||||
if rand_val <= 0.06:
|
||||
return None
|
||||
return pinyin
|
||||
|
||||
def process_pinyin_sequence(self, pinyin_list: List[str]) -> str:
|
||||
|
|
|
|||
|
|
@ -16,7 +16,7 @@ if __name__ == "__main__":
|
|||
|
||||
# 创建数据集
|
||||
dataset = PinyinInputDataset(
|
||||
data_dir="/root/autodl-tmp/data",
|
||||
data_dir="/home/songsenand/DataSet/data",
|
||||
query_engine=query_engine,
|
||||
tokenizer_name="iic/nlp_structbert_backbone_lite_std",
|
||||
max_len=88,
|
||||
|
|
|
|||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading…
Reference in New Issue