74 lines
2.3 KiB
Rust
74 lines
2.3 KiB
Rust
use anyhow::{Context, Result};
|
||
use serde::Deserialize;
|
||
use std::collections::HashMap;
|
||
use std::fs::File;
|
||
use std::io::BufReader;
|
||
use std::path::Path;
|
||
|
||
/// 单个字符-拼音对的信息
|
||
#[derive(Debug, Deserialize, Clone)]
|
||
pub struct CharInfo {
|
||
pub id: u32,
|
||
#[serde(rename = "char")]
|
||
pub character: String,
|
||
pub pinyin: String,
|
||
pub count: u64,
|
||
}
|
||
|
||
/// JSON 根结构(仅包含需要的字段)
|
||
#[derive(Debug, Deserialize)]
|
||
struct RawStatistics {
|
||
pairs: HashMap<String, CharInfo>, // 键为字符串形式的 ID
|
||
// 忽略其他元数据字段
|
||
}
|
||
|
||
/// 字典查询引擎,提供 O(1) 的 ID 到信息的映射
|
||
pub struct Dictionary {
|
||
id_to_charinfo: HashMap<u32, CharInfo>,
|
||
}
|
||
|
||
impl Dictionary {
|
||
/// 从 JSON 文件加载字典
|
||
pub fn from_json_file<P: AsRef<Path>>(path: P) -> Result<Self> {
|
||
let file = File::open(path).context("无法打开字典文件")?;
|
||
let reader = BufReader::new(file);
|
||
let raw: RawStatistics = serde_json::from_reader(reader)
|
||
.context("无法解析 JSON 字典")?;
|
||
|
||
let mut id_to_charinfo = HashMap::with_capacity(raw.pairs.len());
|
||
for (id_str, info) in raw.pairs {
|
||
let id = id_str
|
||
.parse::<u32>()
|
||
.with_context(|| format!("无效的 ID 字符串: {}", id_str))?;
|
||
// 可选:验证 id 与 info.id 一致,此处忽略不一致的情况(信任输入数据)
|
||
id_to_charinfo.insert(info.id, info);
|
||
}
|
||
|
||
Ok(Dictionary { id_to_charinfo })
|
||
}
|
||
|
||
/// 通过 ID 获取汉字(用于填充 Candidate.text)
|
||
pub fn get_char_by_id(&self, id: u32) -> Option<&str> {
|
||
self.id_to_charinfo.get(&id).map(|info| info.character.as_str())
|
||
}
|
||
|
||
/// 通过 ID 获取拼音
|
||
pub fn get_pinyin_by_id(&self, id: u32) -> Option<&str> {
|
||
self.id_to_charinfo.get(&id).map(|info| info.pinyin.as_str())
|
||
}
|
||
|
||
/// 通过 ID 获取出现次数
|
||
pub fn get_count_by_id(&self, id: u32) -> Option<u64> {
|
||
self.id_to_charinfo.get(&id).map(|info| info.count)
|
||
}
|
||
|
||
/// 获取完整的 CharInfo 引用
|
||
pub fn get_char_info(&self, id: u32) -> Option<&CharInfo> {
|
||
self.id_to_charinfo.get(&id)
|
||
}
|
||
|
||
/// 返回字典中存储的条目数量
|
||
pub fn len(&self) -> usize {
|
||
self.id_to_charinfo.len()
|
||
}
|
||
} |