SUimeModelTraner/resign_stat.py

96 lines
3.0 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import json
from pathlib import Path
from typing import Dict, Any
import sys
def modify_pinyin_statistics(file_path: Path) -> None:
"""
一次性修改拼音统计JSON文件。
核心操作:
1. 将pairs字典的所有键字符串转换为整数后+1
2. 对应的id值+1
3. 在位置0插入终止符记录其count为原key="0"的count+1
"""
# 1. 加载原数据
try:
with open(file_path, 'r', encoding='utf-8') as f:
data: Dict[str, Any] = json.load(f)
except FileNotFoundError:
print(f"错误:文件不存在 {file_path}", file=sys.stderr)
return
except json.JSONDecodeError:
print(f"错误:文件格式无效 {file_path}", file=sys.stderr)
return
# 2. 获取原'0'键的count值用于计算新值
original_pairs = data["pairs"]
original_zero_count = original_pairs.get("0", {}).get("count", 0)
# 3. 构建新的pairs字典
new_pairs = {}
# 3.1 首先插入终止符记录到位置0
new_pairs["0"] = {
"id": 0,
"char": "",
"pinyin": "",
"count": original_zero_count + 1 # 原count + 1
}
# 3.2 处理其他所有记录键和id都+1
for old_key_str, value_dict in original_pairs.items():
old_key_int = int(old_key_str)
new_key_int = old_key_int + 1
new_value_dict = value_dict.copy() # 避免修改原字典
new_value_dict["id"] = new_value_dict["id"] + 1
new_pairs[str(new_key_int)] = new_value_dict
# 4. 更新数据并写回文件
data["pairs"] = new_pairs
# 可选更新timestamp根据你的需求决定是否保留原时间戳
# 这里保持原时间戳不变,因为是一次性修改
# 写回文件,保持可读格式
backup_path = file_path.with_suffix('.json.bak')
try:
# 先备份原文件
import shutil
shutil.copy2(file_path, backup_path)
print(f"已创建备份: {backup_path}")
# 写入新数据
with open(file_path, 'w', encoding='utf-8') as f:
json.dump(data, f, ensure_ascii=False, indent=2)
print(f"修改完成!")
print(f"原pairs条目数: {len(original_pairs)}")
print(f"新pairs条目数: {len(new_pairs)}")
print(f"新终止符count值: {original_zero_count + 1}")
except Exception as e:
print(f"写入文件时出错: {e}", file=sys.stderr)
if backup_path.exists():
print("已保留备份文件", file=sys.stderr)
# 使用示例
if __name__ == "__main__":
# 假设你的JSON文件在当前目录
json_file = Path("./src/model/assets/pinyin_char_statistics.json")
# 执行修改
modify_pinyin_statistics(json_file)
# 验证修改:读取并显示前几条记录
print("\n验证前5条记录:")
with open(json_file, 'r', encoding='utf-8') as f:
data = json.load(f)
for i in range(5):
key = str(i)
if key in data["pairs"]:
print(f"key={key}: {data['pairs'][key]}")