SUInput/test/test_query.py

151 lines
5.1 KiB
Python

# test_query_engine.py
import pytest
import tempfile
import os
import json
from suinput.query import QueryEngine
from suinput.char_info import CharInfo, PinyinCharPairsCounter
# 将测试数据保存为 JSON 文件
@pytest.fixture
def json_file_path():
yield "pinyin_char_statistics.json"
# 测试 QueryEngine 的基本功能
class TestQueryEngine:
def test_load_from_json(self, json_file_path):
"""测试从 JSON 文件加载数据"""
engine = QueryEngine()
metadata = engine.load(json_file_path)
assert engine.is_loaded() is True
assert metadata["format"] == "json"
assert metadata["pair_count"] == 20646
def test_query_by_id(self, json_file_path):
"""测试通过 ID 查询字符信息"""
engine = QueryEngine()
engine.load(json_file_path)
result = engine.query_by_id(8)
assert result is not None
assert result.char == ""
assert result.pinyin == "zhong"
assert result.count == 73927282
result = engine.query_by_id(100000) # 不存在的 ID
assert result is None
def test_query_by_char(self, json_file_path):
"""测试通过字符查询拼音信息"""
engine = QueryEngine()
engine.load(json_file_path)
results = engine.query_by_char("")
assert len(results) == 2
assert results[0] == (159, "zhang", 15424264)
assert results[1] == (414, "chang", 6663465)
results_limited = engine.query_by_char("", limit=1)
assert len(results_limited) == 1
assert results_limited[0] == (159, "zhang", 15424264)
results_empty = engine.query_by_char("X") # 不存在的字符
assert results_empty == []
def test_query_by_pinyin(self, json_file_path):
"""测试通过拼音查询字符信息"""
engine = QueryEngine()
engine.load(json_file_path)
results = engine.query_by_pinyin("zhong")
assert len(results) == 57
assert results[0] == (8, "", 73927282)
results_empty = engine.query_by_pinyin("xxx") # 不存在的拼音
assert results_empty == []
def test_get_char_frequency(self, json_file_path):
"""测试获取字符总频率"""
engine = QueryEngine()
engine.load(json_file_path)
freq = engine.get_char_frequency("")
assert freq == 73927282
freq_zero = engine.get_char_frequency("X") # 不存在的字符
assert freq_zero == 0
def test_get_pinyin_frequency(self, json_file_path):
"""测试获取拼音总频率"""
engine = QueryEngine()
engine.load(json_file_path)
freq = engine.get_pinyin_frequency("zhong")
assert freq == 136246123
freq_zero = engine.get_pinyin_frequency("xxx") # 不存在的拼音
assert freq_zero == 0
def test_get_char_pinyin_count(self, json_file_path):
"""测试获取字符-拼音对的出现次数"""
engine = QueryEngine()
engine.load(json_file_path)
count = engine.get_char_pinyin_count("", "zhong")
assert count == 73927282
count_zero = engine.get_char_pinyin_count("", "xxx") # 不存在的拼音
assert count_zero == 0
def test_batch_query_by_ids(self, json_file_path):
"""测试批量 ID 查询"""
engine = QueryEngine()
engine.load(json_file_path)
results = engine.batch_query_by_ids([8, 9, 10000000])
assert len(results) == 3
assert results[9].char == ""
def test_search_chars_by_prefix(self, json_file_path):
"""测试根据字符前缀搜索"""
engine = QueryEngine()
engine.load(json_file_path)
results = engine.search_chars_by_prefix("")
assert len(results) == 1
assert results[0] == ("", 73927282)
results_empty = engine.search_chars_by_prefix("X") # 不存在的前缀
assert results_empty == []
def test_get_statistics(self, json_file_path):
"""测试获取统计信息"""
engine = QueryEngine()
engine.load(json_file_path)
stats = engine.get_statistics()
assert stats["status"] == "loaded"
assert stats["total_pairs"] == 20646
assert stats["total_characters"] == 18240
assert stats["top_chars"][0] == ("", 439524694)
def test_clear(self, json_file_path):
"""测试清除数据"""
engine = QueryEngine()
engine.load(json_file_path)
assert engine.is_loaded() is True
engine.clear()
assert engine.is_loaded() is False
assert engine.get_statistics()["status"] == "not_loaded"
def test_batch_get_char_pinyin_info(self, json_file_path):
engine = QueryEngine()
engine.load(json_file_path)
assert engine.is_loaded() is True
pairs = engine.batch_get_char_pinyin_info([("", "wo"), ("", "ni"), ("", "ta")])
assert pairs[("", "wo")] == engine.get_char_info_by_char_pinyin("", "wo")
assert pairs[("", "ni")] == engine.get_char_info_by_char_pinyin("", "ni")
assert pairs[("", "ta")] == engine.get_char_info_by_char_pinyin("", "ta")