修复拼音组处理逻辑,避免未处理拼音导致的索引错误
This commit is contained in:
parent
8f58917d13
commit
51f9ddbc70
|
|
@ -440,7 +440,9 @@ class PinyinInputDataset(IterableDataset):
|
||||||
"char_id": torch.tensor([char_info["id"]]),
|
"char_id": torch.tensor([char_info["id"]]),
|
||||||
"char": char,
|
"char": char,
|
||||||
"freq": char_info["freq"],
|
"freq": char_info["freq"],
|
||||||
"pg": torch.tensor([self.pg_groups[char_info["pinyin"][0]]]),
|
"pg": torch.tensor(
|
||||||
|
[self.pg_groups[processed_pinyin[0]] if processed_pinyin else 8]
|
||||||
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
# 根据调整因子重复样本
|
# 根据调整因子重复样本
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue