修复拼音组处理逻辑,避免未处理拼音导致的索引错误

This commit is contained in:
songsenand 2026-02-21 22:01:28 +08:00
parent 8f58917d13
commit 51f9ddbc70
1 changed files with 3 additions and 1 deletions

View File

@ -440,7 +440,9 @@ class PinyinInputDataset(IterableDataset):
"char_id": torch.tensor([char_info["id"]]),
"char": char,
"freq": char_info["freq"],
"pg": torch.tensor([self.pg_groups[char_info["pinyin"][0]]]),
"pg": torch.tensor(
[self.pg_groups[processed_pinyin[0]] if processed_pinyin else 8]
),
}
# 根据调整因子重复样本