use anyhow::{Context, Result}; use serde::Deserialize; use std::collections::HashMap; use std::fs::File; use std::io::BufReader; use std::path::Path; /// 单个字符-拼音对的信息 #[derive(Debug, Deserialize, Clone)] pub struct CharInfo { pub id: u32, #[serde(rename = "char")] pub character: String, pub pinyin: String, pub count: u64, } /// JSON 根结构(仅包含需要的字段) #[derive(Debug, Deserialize)] struct RawStatistics { pairs: HashMap, // 键为字符串形式的 ID // 忽略其他元数据字段 } /// 字典查询引擎,提供 O(1) 的 ID 到信息的映射 pub struct Dictionary { id_to_charinfo: HashMap, } impl Dictionary { /// 从 JSON 文件加载字典 pub fn from_json_file>(path: P) -> Result { let file = File::open(path).context("无法打开字典文件")?; let reader = BufReader::new(file); let raw: RawStatistics = serde_json::from_reader(reader) .context("无法解析 JSON 字典")?; let mut id_to_charinfo = HashMap::with_capacity(raw.pairs.len()); for (id_str, info) in raw.pairs { let id = id_str .parse::() .with_context(|| format!("无效的 ID 字符串: {}", id_str))?; // 可选:验证 id 与 info.id 一致,此处忽略不一致的情况(信任输入数据) id_to_charinfo.insert(info.id, info); } Ok(Dictionary { id_to_charinfo }) } /// 通过 ID 获取汉字(用于填充 Candidate.text) pub fn get_char_by_id(&self, id: u32) -> Option<&str> { self.id_to_charinfo.get(&id).map(|info| info.character.as_str()) } /// 通过 ID 获取拼音 pub fn get_pinyin_by_id(&self, id: u32) -> Option<&str> { self.id_to_charinfo.get(&id).map(|info| info.pinyin.as_str()) } /// 通过 ID 获取出现次数 pub fn get_count_by_id(&self, id: u32) -> Option { self.id_to_charinfo.get(&id).map(|info| info.count) } /// 获取完整的 CharInfo 引用 pub fn get_char_info(&self, id: u32) -> Option<&CharInfo> { self.id_to_charinfo.get(&id) } /// 返回字典中存储的条目数量 pub fn len(&self) -> usize { self.id_to_charinfo.len() } }