修复拼音组处理逻辑,避免未处理拼音导致的索引错误

This commit is contained in:
songsenand 2026-02-21 22:01:28 +08:00
parent 8f58917d13
commit 51f9ddbc70
1 changed files with 3 additions and 1 deletions

View File

@ -440,7 +440,9 @@ class PinyinInputDataset(IterableDataset):
"char_id": torch.tensor([char_info["id"]]), "char_id": torch.tensor([char_info["id"]]),
"char": char, "char": char,
"freq": char_info["freq"], "freq": char_info["freq"],
"pg": torch.tensor([self.pg_groups[char_info["pinyin"][0]]]), "pg": torch.tensor(
[self.pg_groups[processed_pinyin[0]] if processed_pinyin else 8]
),
} }
# 根据调整因子重复样本 # 根据调整因子重复样本