From c0489e538c38470adbc53e7c5d0137339803d7a6 Mon Sep 17 00:00:00 2001 From: songsenand Date: Sun, 5 Apr 2026 19:38:30 +0800 Subject: [PATCH] =?UTF-8?q?feat(docs):=20=E6=B7=BB=E5=8A=A0=E5=9F=BA?= =?UTF-8?q?=E4=BA=8EJSON=E6=97=81=E8=B7=AF=E8=AE=B0=E5=BD=95=E6=B3=95?= =?UTF-8?q?=E7=9A=84=E7=A7=BB=E5=8A=A8=E7=AB=AF=E7=9B=91=E6=8E=A7=E6=96=B9?= =?UTF-8?q?=E6=A1=88=E6=96=87=E6=A1=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 185 +++++++++- pyproject.toml | 4 + src/model/dataset.py | 2 + src/model/monitor.py | 344 ++++++++++++++++++ src/model/trainer.py | 251 +++++++++++-- src/model/training_monitor.py | 650 ++++++++++++++++++++++++++++++++++ test.py | 104 +++++- test.py.backup | 31 ++ uv.lock | 299 ++++++++++++++++ 9 files changed, 1821 insertions(+), 49 deletions(-) create mode 100644 src/model/monitor.py create mode 100644 src/model/training_monitor.py create mode 100644 test.py.backup diff --git a/README.md b/README.md index ff70a0a..ec7f7d8 100644 --- a/README.md +++ b/README.md @@ -475,6 +475,34 @@ train-model train \ --learning-rate 1e-5 ``` +#### 检查点恢复训练 + +要从检查点恢复训练(保持原有的训练状态): + +```bash +train-model train \ + --train-data-path "path/to/train/dataset" \ + --eval-data-path "path/to/eval/dataset" \ + --resume-from "./output/checkpoints/latest_checkpoint.pt" +``` + +#### 重置训练状态 + +如果只想加载模型权重,从头开始训练(学习率、epoch等都重新开始): + +```bash +train-model train \ + --train-data-path "path/to/train/dataset" \ + --eval-data-path "path/to/eval/dataset" \ + --resume-from "./output/checkpoints/best_model.pt" \ + --reset-training-state +``` + +这个功能在以下场景非常有用: +- 想要用预训练权重初始化模型,但用新的训练计划重新训练 +- 需要调整学习率策略或训练时长 +- 在现有模型基础上进行迁移学习 + #### 学习率建议 根据模型架构和超参数配置(4层Transformer,512维度),推荐使用以下学习率范围: - **标准范围**: 1e-4 ~ 5e-4 @@ -517,6 +545,7 @@ train-model train \ - `--mixed-precision/--no-mixed-precision`: 是否使用混合精度训练(默认:启用) - `--tensorboard/--no-tensorboard`: 是否使用TensorBoard(默认:启用) - `--resume-from`: 从检查点恢复训练(可选) +- `--reset-training-state`: 重置训练状态,只加载模型权重从头开始训练(默认:False) - `--seed`: 随机种子(默认:42) ### 6.5 监控训练进度 @@ -533,7 +562,159 @@ train-model train \ tensorboard --logdir ./output/tensorboard ``` -### 6.6 评估模型(开发中) +### 6.6 基于JSON旁路记录法的移动端监控方案 + +为了提供移动端友好的训练监控体验,我们实现了基于JSON旁路记录法的监控方案。该方案在保持TensorBoard记录的同时,额外写入一份JSON状态文件,并通过Streamlit提供移动端友好的Web界面。 + +#### 方案特点 + +**📱 移动端体验** +- Streamlit自动生成响应式界面,完美适配手机屏幕 +- 图表支持双指缩放和滑动操作 +- 大字体显示核心指标,触控操作便捷 + +**🚀 低耦合架构** +- 训练和监控通过文件系统解耦 +- 监控服务重启不影响训练进程 +- 训练脚本只需几行代码修改即可支持 + +**🔒 安全稳定** +- 纯文本JSON文件,无文件锁冲突问题 +- 读写速度快,稳定性高 +- 不会影响原有的TensorBoard记录流程 + +**📊 实时监控** +- 默认每5秒自动刷新数据 +- 实时显示训练进度和指标趋势 +- 数据新鲜度状态指示(实时/较新/较旧/陈旧) + +#### 使用方法 + +**启动监控服务** +```bash +# 启动监控服务(默认端口8501) +monitor-training monitor + +# 指定状态文件路径和端口 +monitor-training monitor --status-file ./output/training_status.json --port 8080 + +# 不自动打开浏览器 +monitor-training monitor --no-browser + +# 指定自定义Streamlit脚本 +monitor-training monitor --streamlit-script ./custom_monitor.py +``` + +**查看训练状态** +```bash +# 查看最近10条训练记录 +monitor-training view + +# 查看最近50条记录(原始JSON格式) +monitor-training view --limit 50 --raw + +# 查看指定状态文件 +monitor-training view /path/to/status.json +``` + +**检查状态文件** +```bash +# 检查状态文件状态 +monitor-training check + +# 检查指定文件 +monitor-training check ./output/training_status.json +``` + +#### 监控界面功能 + +**📊 核心指标看板** +- 当前步数、轮次、训练损失、准确率 +- 评估损失和准确率 +- 当前学习率、最后更新时间 + +**📈 趋势图表** +- 损失曲线(训练损失 + 评估损失) +- 准确率曲线(训练准确率 + 评估准确率) +- 学习率变化图(对数坐标) + +**📋 数据详情** +- 完整的训练记录表格 +- 数据统计信息(总数据点、训练时长、总步数) +- 训练进度条 + +**⚙️ 配置选项** +- 状态文件路径(支持环境变量 `TRAINING_STATUS_FILE`) +- 自动刷新间隔(1-30秒可调) +- 显示数据点数量(10-1000条可调) + +#### 技术实现 + +**训练端改造** +- 在 `Trainer.__init__` 中添加 `status_file` 参数 +- 实现 `_write_training_status()` 方法,在每次评估时写入JSON文件 +- 支持从现有状态文件恢复,避免数据丢失 + +**监控端搭建** +- 使用Streamlit构建移动端友好的Web界面 +- 采用Plotly图表库,支持触控交互 +- 自动刷新机制,实时更新训练状态 + +**命令行工具** +- 提供 `monitor`、`view`、`check` 三个子命令 +- 自动检测Streamlit可用性 +- 支持环境变量传递 + +#### 访问方式 + +**本地访问** +```bash +# 启动监控服务后,通过浏览器访问 +http://localhost:8501 +``` + +**局域网访问** +```bash +# 启动服务时指定主机地址 +monitor-training monitor --host 0.0.0.0 --port 8080 + +# 手机浏览器访问(同一局域网) +http://192.168.1.100:8080 +``` + +**公网访问**(需端口转发) +```bash +# 确保服务器防火墙开放对应端口 +# 通过域名或公网IP访问 +http://your-server.com:8501 +``` + +#### 状态文件格式 + +状态文件 `training_status.json` 位于训练输出目录,格式如下: +```json +[ + { + "step": 100, + "epoch": 1, + "timestamp": "2024-01-01T12:00:00", + "train/loss": 2.345, + "train/accuracy": 0.456, + "eval/loss": 2.123, + "eval/accuracy": 0.512, + "train/learning_rate": 0.0001 + }, + ... +] +``` + +#### 注意事项 +1. 首次监控时如果状态文件不存在,会自动创建空文件 +2. 需要安装 `plotly` 依赖用于图表绘制:`pip install plotly>=5.0.0` +3. 从检查点恢复训练时会自动加载已有的状态数据 +4. 建议将监控服务与训练服务部署在同一服务器,避免网络延迟 + +### 6.7 评估模型(开发中) 当前评估功能尚在开发中: @@ -549,7 +730,7 @@ train-model evaluate \ - 在评估数据集上计算准确率、困惑度等指标 - 生成详细的性能报告 -### 6.7 导出模型(开发中) +### 6.8 导出模型(开发中) 当前导出功能尚在开发中: diff --git a/pyproject.toml b/pyproject.toml index ac9c52b..f5d8fa2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,17 +13,21 @@ dependencies = [ "numpy>=2.4.2", "onnxruntime>=1.24.2", "pandas>=3.0.0", + "plotly>=5.0.0", "pypinyin>=0.55.0", "requests>=2.32.5", "rich>=14.3.1", + "streamlit>=1.56.0", "tensorboard>=2.20.0", "torch>=2.10.0", + "torchdata>=0.11.0", "transformers==5.1.0", "typer>=0.21.1", ] [project.scripts] train-model = "model.trainer:app" +monitor-training = "model.monitor:app" [tool.uv] # 设置当前项目的默认索引源 diff --git a/src/model/dataset.py b/src/model/dataset.py index 0b8ddb5..db17f61 100644 --- a/src/model/dataset.py +++ b/src/model/dataset.py @@ -311,6 +311,8 @@ class PinyinInputDataset(IterableDataset): pinyin_list[i : i + pinyin_len], ) ] + if random.random() <= 0.1: + labels.append(0) encoded = self.tokenizer( f"{part4}|{part1}", diff --git a/src/model/monitor.py b/src/model/monitor.py new file mode 100644 index 0000000..409cf6f --- /dev/null +++ b/src/model/monitor.py @@ -0,0 +1,344 @@ +import json +import os +import subprocess +import sys +import time +import webbrowser +from datetime import datetime +from pathlib import Path +from typing import Optional, Union + +import typer + +app = typer.Typer(help="AI模型训练监控工具 - 基于JSON旁路记录法的移动端友好监控方案") + + +def get_package_dir() -> Path: + """获取包目录路径""" + return Path(__file__).parent + + +def check_streamlit_available() -> bool: + """检查Streamlit是否可用""" + try: + import streamlit + + return True + except ImportError: + return False + + +def start_streamlit_server( + status_file: str, + port: int, + host: str, + open_browser: bool, + streamlit_script: Optional[Union[str, Path]] = None, +) -> int: + """ + 启动Streamlit服务器 + + Args: + status_file: 状态文件路径 + port: 端口号 + host: 主机地址 + open_browser: 是否自动打开浏览器 + streamlit_script: Streamlit脚本路径,如果为None则使用默认 + + Returns: + 进程退出码 + """ + # 确定Streamlit脚本路径 + if streamlit_script is None: + # 使用包内的training_monitor.py + package_dir = get_package_dir() + script_path = package_dir / "training_monitor.py" + streamlit_script = str(script_path) + if not script_path.exists(): + typer.echo(f"❌ 错误: 找不到Streamlit脚本: {streamlit_script}") + typer.echo("请确保training_monitor.py文件存在。") + return 1 + + # 设置环境变量,传递状态文件路径 + env = os.environ.copy() + env["TRAINING_STATUS_FILE"] = os.path.abspath(status_file) + + # 构建Streamlit命令 + cmd = [ + sys.executable, + "-m", + "streamlit", + "run", + str(streamlit_script), + "--server.port", + str(port), + "--server.address", + host, + "--server.headless", + "true" if not open_browser else "false", + "--theme.base", + "light", + "--browser.serverAddress", + host, + "--browser.gatherUsageStats", + "false", + ] + + typer.echo("🚀 启动训练监控服务...") + typer.echo(f"📁 状态文件: {os.path.abspath(status_file)}") + typer.echo(f"🌐 监控地址: http://{host}:{port}") + typer.echo(f"📊 Streamlit脚本: {streamlit_script}") + + if open_browser: + # 等待服务器启动后打开浏览器 + time.sleep(2) + webbrowser.open(f"http://{host}:{port}") + typer.echo("🌐 正在打开浏览器...") + + typer.echo("\n按 Ctrl+C 停止监控服务\n") + + try: + # 启动Streamlit进程 + process = subprocess.Popen(cmd, env=env) + process.wait() + return process.returncode + except KeyboardInterrupt: + typer.echo("\n🛑 监控服务已停止") + return 0 + except Exception as e: + typer.echo(f"❌ 启动监控服务时出错: {e}") + return 1 + + +@app.command(name="monitor") +def monitor_training( + status_file: str = typer.Option( + "./output/training_status.json", + "--status-file", + "-s", + help="训练状态JSON文件路径", + ), + port: int = typer.Option(8501, "--port", "-p", help="监控服务端口号"), + host: str = typer.Option("0.0.0.0", "--host", help="监控服务主机地址"), + open_browser: bool = typer.Option(False, "--open-browser", help="自动打开浏览器"), + streamlit_script: Optional[str] = typer.Option( + None, "--streamlit-script", help="自定义Streamlit脚本路径" + ), +): + """ + 启动AI模型训练监控服务 + + 基于JSON旁路记录法,提供移动端友好的训练监控界面。 + 服务启动后,可通过浏览器访问 http://: 查看实时训练指标。 + """ + # 检查状态文件是否存在 + if not os.path.exists(status_file): + typer.echo(f"⚠️ 警告: 状态文件不存在: {status_file}") + typer.echo("开始训练后,训练脚本会自动创建此文件。") + typer.echo("您可以先启动监控服务,然后开始训练。") + + # 创建目录(如果不存在) + os.makedirs(os.path.dirname(status_file), exist_ok=True) + + # 创建空的JSON文件 + with open(status_file, "w", encoding="utf-8") as f: + json.dump([], f) + typer.echo(f"✅ 已创建空状态文件: {status_file}") + + # 检查Streamlit是否可用 + if not check_streamlit_available(): + typer.echo("❌ 错误: Streamlit未安装") + typer.echo("请安装Streamlit: pip install streamlit") + typer.echo("或在pyproject.toml中添加streamlit依赖") + raise typer.Exit(code=1) + + # 启动Streamlit服务器 + return_code = start_streamlit_server( + status_file=status_file, + port=port, + host=host, + open_browser=open_browser, + streamlit_script=streamlit_script, + ) + + raise typer.Exit(code=return_code) + + +@app.command(name="view") +def view_status( + status_file: str = typer.Argument( + "./output/training_status.json", help="训练状态JSON文件路径" + ), + limit: int = typer.Option(10, "--limit", "-l", help="显示最近的数据条数"), + raw: bool = typer.Option(False, "--raw", help="显示原始JSON数据"), +): + """ + 查看训练状态文件内容 + + 快速查看训练状态JSON文件中的最新数据。 + """ + if not os.path.exists(status_file): + typer.echo(f"❌ 错误: 状态文件不存在: {status_file}") + raise typer.Exit(code=1) + + try: + with open(status_file, "r", encoding="utf-8") as f: + data = json.load(f) + + if not data: + typer.echo("ℹ️ 状态文件为空,暂无训练数据。") + return + + typer.echo(f"📊 训练状态文件: {status_file}") + typer.echo(f"📈 数据总数: {len(data)} 条") + typer.echo("") + + if raw: + # 显示原始JSON + typer.echo(json.dumps(data[-limit:], indent=2, ensure_ascii=False)) + else: + # 显示格式化数据 + display_data = data[-limit:] if limit > 0 else data + + for i, record in enumerate(reversed(display_data)): + idx = len(display_data) - i + typer.echo(f"📌 记录 #{idx} (步数: {record.get('step', 'N/A')})") + typer.echo(f" 轮次: {record.get('epoch', 'N/A')}") + + if "timestamp" in record: + typer.echo(f" 时间: {record['timestamp']}") + + # 训练指标 + if "train/loss" in record: + typer.echo(f" 训练损失: {record['train/loss']:.6f}") + if "train/accuracy" in record: + typer.echo(f" 训练准确率: {record['train/accuracy']:.6f}") + + # 评估指标 + if "eval/loss" in record: + typer.echo(f" 评估损失: {record['eval/loss']:.6f}") + if "eval/accuracy" in record: + typer.echo(f" 评估准确率: {record['eval/accuracy']:.6f}") + + # 学习率 + if "train/learning_rate" in record: + typer.echo(f" 学习率: {record['train/learning_rate']:.2e}") + + if i < len(display_data) - 1: + typer.echo(" ---") + typer.echo("") + + # 显示最新数据的摘要 + latest = data[-1] + typer.echo("🎯 最新数据摘要:") + typer.echo(f" 当前步数: {latest.get('step', 'N/A')}") + typer.echo(f" 当前轮次: {latest.get('epoch', 'N/A')}") + if "train/loss" in latest: + typer.echo(f" 训练损失: {latest['train/loss']:.6f}") + if "eval/loss" in latest: + typer.echo(f" 评估损失: {latest['eval/loss']:.6f}") + + except json.JSONDecodeError: + typer.echo("❌ 错误: 状态文件格式不正确,不是有效的JSON") + typer.echo("可能是文件正在写入中,请稍后再试。") + raise typer.Exit(code=1) + except Exception as e: + typer.echo(f"❌ 错误: 读取状态文件时出错: {e}") + raise typer.Exit(code=1) + + +@app.command(name="check") +def check_status( + status_file: str = typer.Argument( + "./output/training_status.json", help="训练状态JSON文件路径" + ), +): + """ + 检查训练状态文件 + + 检查状态文件是否存在、格式是否正确,并显示基本信息。 + """ + if not os.path.exists(status_file): + typer.echo(f"❌ 状态文件不存在: {status_file}") + typer.echo("💡 提示: 开始训练后会自动创建此文件") + raise typer.Exit(code=1) + + try: + # 检查文件大小 + file_size = os.path.getsize(status_file) + file_mtime = time.ctime(os.path.getmtime(status_file)) + + typer.echo(f"✅ 状态文件: {status_file}") + typer.echo(f" 文件大小: {file_size:,} 字节") + typer.echo(f" 修改时间: {file_mtime}") + + # 检查JSON格式 + with open(status_file, "r", encoding="utf-8") as f: + data = json.load(f) + + if isinstance(data, list): + typer.echo(f"✅ JSON格式正确,包含 {len(data)} 条记录") + + if data: + latest = data[-1] + typer.echo(f" 最新步数: {latest.get('step', 'N/A')}") + typer.echo(f" 最新轮次: {latest.get('epoch', 'N/A')}") + + # 检查关键指标 + metrics = ["train/loss", "eval/loss", "train/accuracy", "eval/accuracy"] + available_metrics = [m for m in metrics if m in latest] + typer.echo( + f" 可用指标: {', '.join(available_metrics) if available_metrics else '无'}" + ) + + # 检查时间戳 + if "timestamp" in latest: + typer.echo(f" 最后更新时间: {latest['timestamp']}") + + # 检查数据新鲜度 + if "timestamp" in latest: + try: + last_update = datetime.fromisoformat( + latest["timestamp"].replace("Z", "+00:00") + ) + now = datetime.now() + diff = (now - last_update).total_seconds() + + if diff < 60: + typer.echo(f" 数据状态: 🟢 实时 (最近 {int(diff)} 秒)") + elif diff < 300: + typer.echo( + f" 数据状态: 🟡 较新 (最近 {int(diff / 60)} 分钟)" + ) + elif diff < 3600: + typer.echo( + f" 数据状态: 🟠 较旧 (最近 {int(diff / 60)} 分钟)" + ) + else: + typer.echo( + f" 数据状态: 🔴 陈旧 (最近 {int(diff / 3600)} 小时)" + ) + except Exception: + typer.echo(" 数据状态: ⚠️ 时间戳格式异常") + else: + typer.echo(" 数据状态: 文件为空,等待训练数据...") + else: + typer.echo("❌ JSON格式不正确: 根元素应为列表") + raise typer.Exit(code=1) + + except json.JSONDecodeError: + typer.echo("❌ JSON格式错误: 文件内容不是有效的JSON") + typer.echo("可能是文件正在写入中,请稍后再试。") + raise typer.Exit(code=1) + except Exception as e: + typer.echo(f"❌ 检查状态文件时出错: {e}") + raise typer.Exit(code=1) + + +def main(): + """主函数""" + app() + + +if __name__ == "__main__": + main() diff --git a/src/model/trainer.py b/src/model/trainer.py index 61e77f8..b952ef4 100644 --- a/src/model/trainer.py +++ b/src/model/trainer.py @@ -28,6 +28,16 @@ from torch.amp.grad_scaler import GradScaler from torch.utils.data import DataLoader from torch.utils.tensorboard import SummaryWriter +# Try to import DataLoader2 for better streaming dataset support +try: + from torchdata.dataloader2 import DataLoader2, MultiProcessingReadingService + + DATA_LOADER2_AVAILABLE = True +except ImportError: + DATA_LOADER2_AVAILABLE = False + DataLoader2 = None + MultiProcessingReadingService = None + from .dataset import PinyinInputDataset # 导入模型和数据 @@ -67,6 +77,7 @@ class Trainer: save_frequency: int = 10000, mixed_precision: bool = True, device: Optional[torch.device] = None, + status_file: str = "training_status.json", use_tensorboard: bool = True, ): """ @@ -158,6 +169,12 @@ class Trainer: else: self.writer = None + # 设置状态文件 + self.use_tensorboard = use_tensorboard + self.status_file = self.output_dir / status_file + # 如果状态文件已存在,则加载已有数据 + self.training_status_data = self._load_existing_status_data() + # 初始化Rich控制台 self.console = Console() @@ -352,35 +369,131 @@ class Trainer: torch.save(checkpoint, best_path) logger.info(f"Best model saved to {best_path}") - def load_checkpoint(self, checkpoint_path: Union[str, Path]): + def load_checkpoint( + self, checkpoint_path: Union[str, Path], reset_training_state: bool = False + ): """ 加载检查点 Args: checkpoint_path: 检查点文件路径 + reset_training_state: 是否重置训练状态(只加载模型权重,从头开始训练) """ checkpoint = torch.load(checkpoint_path, map_location=self.device) self.model.load_state_dict(checkpoint["model_state_dict"]) - self.optimizer.load_state_dict(checkpoint["optimizer_state_dict"]) - self.scaler.load_state_dict(checkpoint["scaler_state_dict"]) - self.current_step = checkpoint["step"] - self.current_epoch = checkpoint["epoch"] - self.best_eval_loss = checkpoint["best_eval_loss"] + if not reset_training_state: + self.optimizer.load_state_dict(checkpoint["optimizer_state_dict"]) + self.scaler.load_state_dict(checkpoint["scaler_state_dict"]) - logger.info(f"Checkpoint loaded from {checkpoint_path}") - logger.info( - f"Resuming from step {self.current_step}, epoch {self.current_epoch}" - ) + self.current_step = checkpoint["step"] + self.current_epoch = checkpoint["epoch"] + self.best_eval_loss = checkpoint["best_eval_loss"] + + logger.info(f"Checkpoint loaded from {checkpoint_path}") + logger.info( + f"Resuming from step {self.current_step}, epoch {self.current_epoch}" + ) + else: + # 重置训练状态 + self.current_step = 0 + self.current_epoch = 0 + self.best_eval_loss = float("inf") + + logger.info( + f"Checkpoint loaded from {checkpoint_path} (training state reset)" + ) + logger.info("Training state reset: starting from step 0, epoch 0") def _log_to_tensorboard(self, metrics: Dict[str, float], step: int): - """将指标记录到TensorBoard""" - if self.writer is None: - return + """将指标记录到TensorBoard和JSON状态文件""" + if self.writer is not None: + for key, value in metrics.items(): + self.writer.add_scalar(key, value, step) - for key, value in metrics.items(): - self.writer.add_scalar(key, value, step) + # 同时记录到JSON状态文件 + self._write_training_status(metrics, step) + + def _load_existing_status_data(self) -> List[Dict]: + """从文件加载已有的训练状态数据""" + try: + if self.status_file.exists(): + with open(self.status_file, "r", encoding="utf-8") as f: + data = json.load(f) + if isinstance(data, list): + logger.info( + f"Loaded {len(data)} existing training status records from {self.status_file}" + ) + return data + else: + logger.warning( + f"Status file {self.status_file} does not contain a list, starting fresh" + ) + return [] + else: + logger.info( + f"Status file {self.status_file} does not exist, starting fresh" + ) + return [] + except json.JSONDecodeError: + logger.warning( + f"Status file {self.status_file} has invalid JSON format, starting fresh" + ) + return [] + except Exception as e: + logger.error( + f"Failed to load existing status data from {self.status_file}: {e}" + ) + return [] + + def _write_training_status(self, metrics: Dict[str, float], step: int): + """将训练状态写入JSON文件""" + try: + # 创建状态记录 + status_record = { + "step": step, + "epoch": self.current_epoch + 1, + "timestamp": datetime.now().isoformat(), + } + + # 添加所有指标 + for key, value in metrics.items(): + status_record[key] = float(value) + + # 检查是否已存在相同步数的记录(避免重复) + existing_indices = [ + i + for i, record in enumerate(self.training_status_data) + if record.get("step") == step + ] + if existing_indices: + # 替换现有记录 + for idx in existing_indices: + self.training_status_data[idx] = status_record + else: + # 添加到内存缓存 + self.training_status_data.append(status_record) + + # 限制内存中的数据量,只保留最近1000条记录 + if len(self.training_status_data) > 1000: + self.training_status_data = self.training_status_data[-1000:] + + # 确保数据是列表格式 + if not isinstance(self.training_status_data, list): + logger.warning( + f"training_status_data is not a list (type: {type(self.training_status_data).__name__}), converting to list" + ) + self.training_status_data = ( + [self.training_status_data] if self.training_status_data else [] + ) + + # 写入文件 + with open(self.status_file, "w", encoding="utf-8") as f: + json.dump(self.training_status_data, f, indent=2, ensure_ascii=False) + + except Exception as e: + logger.error(f"Failed to write training status: {e}") def _create_progress_bar(self) -> Progress: """创建Rich进度条""" @@ -417,16 +530,19 @@ class Trainer: self.console.print(info_table) - def train(self, resume_from: Optional[str] = None): + def train( + self, resume_from: Optional[str] = None, reset_training_state: bool = False + ): """ 主训练循环 Args: resume_from: 从哪个检查点恢复训练(可选) + reset_training_state: 是否重置训练状态(只加载模型权重,从头开始训练) """ # 如果提供了检查点,则恢复训练 if resume_from is not None: - self.load_checkpoint(resume_from) + self.load_checkpoint(resume_from, reset_training_state=reset_training_state) # 打印训练信息 self._print_training_info() @@ -624,6 +740,72 @@ def collate_fn(batch: List[Dict[str, Any]]) -> Dict[str, Any]: # Typer CLI应用 +def create_dataloader( + dataset: PinyinInputDataset, + batch_size: int, + num_workers: int = 2, + pin_memory: bool = True, + shuffle: bool = False, + max_iter_length: Optional[int] = None, +) -> Any: + """ + 创建数据加载器,优先使用DataLoader2,如果不可用则回退到DataLoader。 + 专门针对流式数据集优化。 + + Args: + dataset: PinyinInputDataset实例 + batch_size: 批次大小 + num_workers: worker数量(对于流式数据集建议为2) + pin_memory: 是否固定内存 + shuffle: 是否打乱(流式数据集内部处理打乱) + max_iter_length: 最大迭代长度,用于计算总步数 + + Returns: + 数据加载器实例 + """ + if ( + DATA_LOADER2_AVAILABLE + and DataLoader2 is not None + and MultiProcessingReadingService is not None + ): + try: + # DataLoader2配置,针对流式数据集优化 + reading_service = MultiProcessingReadingService( + num_workers=num_workers, + prefetch_factor=2, # 减少预取以避免内存问题 + persistent_workers=True, + pin_memory=pin_memory, + worker_init_fn=worker_init_fn, + ) + + dataloader = DataLoader2( + dataset, + reading_service=reading_service, + batch_size=batch_size, + collate_fn=collate_fn, + shuffle=shuffle, + ) + logger.info(f"✅ 使用DataLoader2创建数据加载器,worker数量: {num_workers}") + return dataloader + except Exception as e: + logger.warning(f"⚠️ DataLoader2创建失败: {e},回退到标准DataLoader") + + # 回退到标准DataLoader + logger.info(f"📊 使用标准DataLoader,worker数量: {num_workers}") + dataloader = DataLoader( + dataset, + batch_size=batch_size, + num_workers=num_workers, + pin_memory=pin_memory, + worker_init_fn=worker_init_fn, + collate_fn=collate_fn, + prefetch_factor=2, # 减少预取以避免内存问题 + persistent_workers=True, + shuffle=shuffle, + ) + return dataloader + + app = typer.Typer(help="输入法模型训练命令行工具", add_completion=False) @@ -672,12 +854,18 @@ def train( mixed_precision: bool = typer.Option( True, "--mixed-precision/--no-mixed-precision", help="是否使用混合精度训练" ), + num_workers: int = typer.Option( + 2, "--num-workers", help="数据加载worker数量(流式数据集建议为2)" + ), use_tensorboard: bool = typer.Option( True, "--tensorboard/--no-tensorboard", help="是否使用TensorBoard" ), resume_from: Optional[str] = typer.Option( None, "--resume-from", help="从检查点恢复训练" ), + reset_training_state: bool = typer.Option( + False, "--reset-training-state", help="重置训练状态,只加载模型权重从头开始训练" + ), seed: int = typer.Option(42, "--seed", help="随机种子"), ): """ @@ -707,6 +895,7 @@ def train( config_table.add_row("数据", "评估数据路径", eval_data_path) config_table.add_row("数据", "输出目录", output_dir) config_table.add_row("数据", "批次大小", str(batch_size)) + config_table.add_row("数据", "Worker数量", str(num_workers)) config_table.add_row("模型", "词汇表大小", str(vocab_size)) config_table.add_row("模型", "拼音词汇表", str(pinyin_vocab_size)) @@ -748,6 +937,7 @@ def train( "max_seq_len": max_seq_len, "use_pinyin": use_pinyin, "batch_size": batch_size, + "num_workers": num_workers, "num_epochs": num_epochs, "learning_rate": learning_rate, "min_learning_rate": min_learning_rate, @@ -788,15 +978,12 @@ def train( # 训练数据加载器 # 注意:PinyinInputDataset是IterableDataset,所以不能使用shuffle参数 # 多worker配置:每个worker处理数据集的一个分片,由dataset.__iter__中的shard处理 - train_dataloader = DataLoader( - train_dataset, + train_dataloader = create_dataloader( + dataset=train_dataset, batch_size=batch_size, - num_workers=2, + num_workers=num_workers, pin_memory=torch.cuda.is_available(), - worker_init_fn=worker_init_fn, - collate_fn=collate_fn, - prefetch_factor=64, # 每个worker预取64个batch,适合大内存场景 - persistent_workers=True, # 保持worker进程存活,避免重建开销 + max_iter_length=max_iter_length, ) # 评估数据集(使用相同的设置,但可以调整参数) @@ -811,15 +998,12 @@ def train( length_weights={1: 10, 2: 50, 3: 50, 4: 40, 5: 15, 6: 10, 7: 5, 8: 2}, ) - eval_dataloader = DataLoader( - eval_dataset, + eval_dataloader = create_dataloader( + dataset=eval_dataset, batch_size=batch_size, - num_workers=2, + num_workers=1, # 评估使用较少的worker pin_memory=torch.cuda.is_available(), - worker_init_fn=worker_init_fn, - collate_fn=collate_fn, - prefetch_factor=64, # 每个worker预取64个batch - persistent_workers=True, # 保持worker进程存活 + max_iter_length=batch_size * 64, ) console.print("[bold cyan]正在创建模型...[/bold cyan]") @@ -858,6 +1042,7 @@ def train( save_frequency=save_frequency, mixed_precision=mixed_precision, use_tensorboard=use_tensorboard, + status_file="training_status.json", ) console.print("[green]✓ 训练器创建完成[/green]") @@ -866,7 +1051,9 @@ def train( console.print("\n[bold cyan]开始训练...[/bold cyan]") console.print(f"开始时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") try: - trainer.train(resume_from=resume_from) + trainer.train( + resume_from=resume_from, reset_training_state=reset_training_state + ) except KeyboardInterrupt: console.print("[bold green]训练被终止[/bold green]") trainer.save_checkpoint("interrupted_model.pt") diff --git a/src/model/training_monitor.py b/src/model/training_monitor.py new file mode 100644 index 0000000..737921d --- /dev/null +++ b/src/model/training_monitor.py @@ -0,0 +1,650 @@ +import json +import os +import sys +import time +from datetime import datetime +from pathlib import Path + +import pandas as pd +import plotly.express as px +import plotly.graph_objects as go +import streamlit as st +from plotly.subplots import make_subplots + +# 添加项目路径到系统路径,以便导入模型 +sys.path.append(str(Path(__file__).parent.parent)) + +# 设置页面配置 - 移动端友好 +st.set_page_config( + page_title="AI模型训练监控看板", + page_icon="📈", + layout="wide", + initial_sidebar_state="collapsed", # 移动端默认收起侧边栏 +) + +# 自定义CSS样式,优化移动端体验 +st.markdown( + """ + +""", + unsafe_allow_html=True, +) + + +def load_training_data(file_path): + """加载训练状态数据""" + try: + if not os.path.exists(file_path): + st.warning(f"文件不存在: {file_path}") + return pd.DataFrame() + + with open(file_path, "r", encoding="utf-8") as f: + data = json.load(f) + + # 检测是否是配置文件(检查是否有典型的配置键) + config_keys = [ + "train_data_path", + "eval_data_path", + "output_dir", + "vocab_size", + "batch_size", + "num_epochs", + ] + if isinstance(data, dict): + # 检查是否是配置文件 + if any(key in data for key in config_keys): + st.error("❌ 检测到配置文件,请检查文件路径是否正确") + return pd.DataFrame() + + # 如果是单个训练状态字典,包装成列表 + data = [data] + elif isinstance(data, list): + # 检查列表中的第一个元素是否是配置文件 + if data and isinstance(data[0], dict): + if any(key in data[0] for key in config_keys): + st.error("❌ 检测到配置文件,请检查文件路径是否正确") + return pd.DataFrame() + + # 已经是列表,检查是否为空 + if len(data) == 0: + return pd.DataFrame() + else: + return pd.DataFrame() + + # 确保data是列表(经过上面的处理,它应该是列表) + if not isinstance(data, list): + return pd.DataFrame() + + # 清理数据:确保所有元素都是字典 + cleaned_data = [] + for i, item in enumerate(data): + if isinstance(item, dict): + # 检查是否是训练状态数据(包含训练指标) + if "step" in item or "train/loss" in item or "timestamp" in item: + cleaned_data.append(item) + else: + continue + + if len(cleaned_data) == 0: + return pd.DataFrame() + + # 使用清理后的数据创建DataFrame + try: + df = pd.DataFrame(cleaned_data, index=range(len(cleaned_data))) + except Exception: + try: + df = pd.DataFrame.from_records(cleaned_data) + except Exception: + return pd.DataFrame() + + # 确保时间戳为datetime类型 + if "timestamp" in df.columns: + try: + df["timestamp"] = pd.to_datetime(df["timestamp"]) + except Exception: + pass + + return df + + except json.JSONDecodeError: + return pd.DataFrame() + except Exception: + return pd.DataFrame() + + +def create_metric_card(label, value, delta=None, help_text=None): + """创建指标卡片""" + col1, col2 = st.columns([3, 1]) + + with col1: + if delta is not None: + st.metric(label=label, value=value, delta=f"{delta:+.4f}") + else: + st.metric(label=label, value=value) + + if help_text: + st.caption(help_text) + + return col1, col2 + + +def create_loss_chart(df): + """创建损失图表""" + fig = go.Figure() + + if df.empty: + return fig + + # 训练损失 + if "train/loss" in df.columns: + fig.add_trace( + go.Scatter( + x=df["step"], + y=df["train/loss"], + mode="lines+markers", + name="训练损失", + line=dict(color="#1f77b4", width=2), + marker=dict(size=4), + ) + ) + + # 评估损失 + if "eval/loss" in df.columns: + fig.add_trace( + go.Scatter( + x=df["step"], + y=df["eval/loss"], + mode="lines+markers", + name="评估损失", + line=dict(color="#ff7f0e", width=2, dash="dash"), + marker=dict(size=4), + ) + ) + + fig.update_layout( + title="损失曲线", + xaxis_title="训练步数", + yaxis_title="损失值", + hovermode="x unified", + template="plotly_white", + height=400, + margin=dict(l=40, r=40, t=60, b=40), + legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1), + ) + + # 添加网格 + fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor="LightGray") + fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor="LightGray") + + return fig + + +def create_accuracy_chart(df): + """创建准确率图表""" + fig = go.Figure() + + if df.empty: + return fig + + # 训练准确率 + if "train/accuracy" in df.columns: + fig.add_trace( + go.Scatter( + x=df["step"], + y=df["train/accuracy"], + mode="lines+markers", + name="训练准确率", + line=dict(color="#2ca02c", width=2), + marker=dict(size=4), + ) + ) + + # 评估准确率 + if "eval/accuracy" in df.columns: + fig.add_trace( + go.Scatter( + x=df["step"], + y=df["eval/accuracy"], + mode="lines+markers", + name="评估准确率", + line=dict(color="#d62728", width=2, dash="dash"), + marker=dict(size=4), + ) + ) + + fig.update_layout( + title="准确率曲线", + xaxis_title="训练步数", + yaxis_title="准确率", + hovermode="x unified", + template="plotly_white", + height=400, + margin=dict(l=40, r=40, t=60, b=40), + legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1), + ) + + # 添加网格 + fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor="LightGray") + fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor="LightGray") + + return fig + + +def create_learning_rate_chart(df): + """创建学习率图表""" + if "train/learning_rate" not in df.columns: + return None + + fig = go.Figure() + + fig.add_trace( + go.Scatter( + x=df["step"], + y=df["train/learning_rate"], + mode="lines+markers", + name="学习率", + line=dict(color="#9467bd", width=2), + marker=dict(size=4), + ) + ) + + fig.update_layout( + title="学习率变化", + xaxis_title="训练步数", + yaxis_title="学习率", + hovermode="x unified", + template="plotly_white", + height=300, + margin=dict(l=40, r=40, t=60, b=40), + yaxis_type="log", # 对数坐标,适合学习率 + ) + + # 添加网格 + fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor="LightGray") + fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor="LightGray") + + return fig + + +def create_training_summary(df): + """创建训练摘要""" + if df.empty: + return None + + latest = df.iloc[-1] + summary = {} + + # 基础信息 + summary["当前步数"] = int(latest["step"]) if "step" in latest else 0 + summary["当前轮次"] = int(latest["epoch"]) if "epoch" in latest else 0 + + # 训练指标 + if "train/loss" in latest: + summary["训练损失"] = float(latest["train/loss"]) + if "train/accuracy" in latest: + summary["训练准确率"] = float(latest["train/accuracy"]) + + # 评估指标 + if "eval/loss" in latest: + summary["评估损失"] = float(latest["eval/loss"]) + if "eval/accuracy" in latest: + summary["评估准确率"] = float(latest["eval/accuracy"]) + + # 学习率 + if "train/learning_rate" in latest: + summary["当前学习率"] = float(latest["train/learning_rate"]) + + # 时间信息 + if "timestamp" in latest: + summary["最后更新时间"] = pd.to_datetime(latest["timestamp"]).strftime( + "%Y-%m-%d %H:%M:%S" + ) + + return summary + + +def main(): + """主函数""" + # 标题 + st.title("📈 AI模型训练实时监控看板") + st.markdown("基于JSON旁路记录法的移动端友好监控方案") + + # 侧边栏配置 + with st.sidebar: + st.header("⚙️ 监控设置") + + # 文件路径选择 + default_status_file = os.environ.get( + "TRAINING_STATUS_FILE", "./output/training_status.json" + ) + status_file = st.text_input( + "状态文件路径", + value=default_status_file, + help="训练过程中生成的JSON状态文件路径", + ) + + # 刷新间隔 + refresh_interval = st.slider( + "自动刷新间隔(秒)", + min_value=1, + max_value=30, + value=5, + help="数据自动刷新间隔时间", + ) + + # 数据限制 + max_data_points = st.slider( + "显示数据点数量", + min_value=10, + max_value=1000, + value=500, + help="图表中显示的最大数据点数量", + ) + + # 手动刷新按钮 + if st.button("🔄 手动刷新数据"): + st.rerun() + + st.divider() + + # 状态信息 + st.subheader("📊 系统状态") + current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S") + st.text(f"当前时间: {current_time}") + + if os.path.exists(status_file): + file_size = os.path.getsize(status_file) + file_mtime = datetime.fromtimestamp(os.path.getmtime(status_file)) + st.text(f"文件大小: {file_size:,} 字节") + st.text(f"最后修改: {file_mtime.strftime('%Y-%m-%d %H:%M:%S')}") + st.success("✅ 状态文件正常") + else: + st.warning("⚠️ 状态文件不存在") + + # 主内容区域 + # 使用列布局适应移动端 + col1, col2 = st.columns([1, 1]) + + # 加载数据 + df = load_training_data(status_file) + + if df.empty: + st.warning("暂无训练数据,请检查状态文件路径是否正确。") + st.info("开始训练后,数据将自动显示在这里。") + + # 显示示例数据格式 + with st.expander("📝 数据格式示例"): + st.code( + """[ + { + "step": 100, + "epoch": 1, + "timestamp": "2024-01-01T12:00:00", + "train/loss": 2.345, + "train/accuracy": 0.456, + "eval/loss": 2.123, + "eval/accuracy": 0.512, + "train/learning_rate": 0.0001 + }, + ... +]""", + language="json", + ) + return + + # 数据处理 + df_display = df.tail(max_data_points).copy() + + # 计算变化量(用于指标卡片) + if len(df_display) >= 2: + prev = df_display.iloc[-2] + latest = df_display.iloc[-1] + + loss_delta = None + acc_delta = None + lr_delta = None + + if "train/loss" in latest and "train/loss" in prev: + loss_delta = float(latest["train/loss"]) - float(prev["train/loss"]) + + if "train/accuracy" in latest and "train/accuracy" in prev: + acc_delta = float(latest["train/accuracy"]) - float(prev["train/accuracy"]) + + if "train/learning_rate" in latest and "train/learning_rate" in prev: + lr_delta = float(latest["train/learning_rate"]) - float( + prev["train/learning_rate"] + ) + else: + loss_delta = None + acc_delta = None + lr_delta = None + + latest = df_display.iloc[-1] + + # 关键指标卡片 - 第一行 + st.subheader("📊 关键指标") + + metric_cols = st.columns(4) + with metric_cols[0]: + if "step" in latest: + st.metric("当前步数", f"{int(latest['step']):,}") + + with metric_cols[1]: + if "epoch" in latest: + st.metric("当前轮次", f"{int(latest['epoch'])}") + + with metric_cols[2]: + if "train/loss" in latest: + st.metric( + "训练损失", + f"{float(latest['train/loss']):.4f}", + delta=f"{loss_delta:+.4f}" if loss_delta is not None else None, + ) + + with metric_cols[3]: + if "train/accuracy" in latest: + st.metric( + "训练准确率", + f"{float(latest['train/accuracy']):.4f}", + delta=f"{acc_delta:+.4f}" if acc_delta is not None else None, + ) + + # 评估指标 - 第二行 + eval_cols = st.columns(4) + with eval_cols[0]: + if "eval/loss" in latest: + eval_loss = float(latest["eval/loss"]) + prev_eval_loss = ( + float(df_display.iloc[-2]["eval/loss"]) + if len(df_display) >= 2 and "eval/loss" in df_display.iloc[-2] + else None + ) + delta = eval_loss - prev_eval_loss if prev_eval_loss is not None else None + st.metric( + "评估损失", + f"{eval_loss:.4f}", + delta=f"{delta:+.4f}" if delta is not None else None, + ) + + with eval_cols[1]: + if "eval/accuracy" in latest: + eval_acc = float(latest["eval/accuracy"]) + prev_eval_acc = ( + float(df_display.iloc[-2]["eval/accuracy"]) + if len(df_display) >= 2 and "eval/accuracy" in df_display.iloc[-2] + else None + ) + delta = eval_acc - prev_eval_acc if prev_eval_acc is not None else None + st.metric( + "评估准确率", + f"{eval_acc:.4f}", + delta=f"{delta:+.4f}" if delta is not None else None, + ) + + with eval_cols[2]: + if "train/learning_rate" in latest: + st.metric( + "学习率", + f"{float(latest['train/learning_rate']):.2e}", + delta=f"{lr_delta:+.2e}" if lr_delta is not None else None, + ) + + with eval_cols[3]: + if "timestamp" in latest: + timestamp = pd.to_datetime(latest["timestamp"]) + st.metric("最后更新", timestamp.strftime("%H:%M:%S")) + + st.divider() + + # 图表区域 + st.subheader("📈 训练曲线") + + # 损失图表 + loss_fig = create_loss_chart(df_display) + st.plotly_chart(loss_fig, width="stretch", config={"responsive": True}) + + # 准确率图表 + acc_fig = create_accuracy_chart(df_display) + st.plotly_chart(acc_fig, width="stretch", config={"responsive": True}) + + # 学习率图表 + lr_fig = create_learning_rate_chart(df_display) + if lr_fig: + st.plotly_chart(lr_fig, width="stretch", config={"responsive": True}) + + st.divider() + + # 数据详情 + with st.expander("📋 数据详情", expanded=False): + st.dataframe( + df_display, + width="stretch", + hide_index=True, + column_config={ + "step": st.column_config.NumberColumn("步数", format="%d"), + "epoch": st.column_config.NumberColumn("轮次", format="%d"), + "timestamp": st.column_config.DatetimeColumn("时间"), + "train/loss": st.column_config.NumberColumn("训练损失", format="%.4f"), + "train/accuracy": st.column_config.NumberColumn( + "训练准确率", format="%.4f" + ), + "eval/loss": st.column_config.NumberColumn("评估损失", format="%.4f"), + "eval/accuracy": st.column_config.NumberColumn( + "评估准确率", format="%.4f" + ), + "train/learning_rate": st.column_config.NumberColumn( + "学习率", format="%.2e" + ), + }, + ) + + # 统计数据 + st.subheader("📊 统计信息") + stats_cols = st.columns(3) + + with stats_cols[0]: + st.metric("总数据点", f"{len(df):,}") + + with stats_cols[1]: + if not df.empty and "timestamp" in df.columns: + start_time = df["timestamp"].min() + end_time = df["timestamp"].max() + duration = (end_time - start_time).total_seconds() / 3600 # 小时 + st.metric("训练时长", f"{duration:.2f} 小时") + + with stats_cols[2]: + if not df.empty and "step" in df.columns: + total_steps = df["step"].max() - df["step"].min() + st.metric("总步数", f"{total_steps:,}") + + # 训练进度信息 + if "step" in df.columns and "epoch" in df.columns: + current_step = df["step"].max() + current_epoch = df["epoch"].max() + + progress_text = f"训练进度: 第 {current_epoch} 轮,第 {current_step:,} 步" + st.progress(min(current_step / (current_step + 1000), 1.0), text=progress_text) + + # 底部状态栏 + st.divider() + + footer_cols = st.columns([3, 1]) + with footer_cols[0]: + st.caption( + f"监控服务运行中 | 数据文件: {status_file} | 最后刷新: {datetime.now().strftime('%H:%M:%S')}" + ) + + with footer_cols[1]: + if st.button("🔄 立即刷新"): + st.rerun() + + # 自动刷新 + time.sleep(refresh_interval) + st.rerun() + + +if __name__ == "__main__": + main() diff --git a/test.py b/test.py index ebe7663..f74f6c1 100644 --- a/test.py +++ b/test.py @@ -7,23 +7,97 @@ from tqdm import tqdm from model.dataset import PinyinInputDataset from model.trainer import collate_fn, worker_init_fn +# Try to import DataLoader2 from torchdata, fallback to standard DataLoader +try: + from torchdata.dataloader2 import DataLoader2, MultiProcessingReadingService + + DATA_LOADER2_AVAILABLE = True + print("✅ Using DataLoader2 from torchdata") +except ImportError: + DATA_LOADER2_AVAILABLE = False + print("⚠️ torchdata not installed, falling back to standard DataLoader") + +max_iter_length = 128 * 128 +batch_size = 1024 + if sys.platform == "win32": dataset_path = "data" else: dataset_path = "/home/songsenand/Data/corpus/CCI-Data/" -dataset = PinyinInputDataset(dataset_path, max_iter_length=128 * 128) -dataloader = DataLoader( - dataset, - batch_size=128, - num_workers=2, - pin_memory=torch.cuda.is_available(), - worker_init_fn=worker_init_fn, - collate_fn=collate_fn, - prefetch_factor=64, # 每个worker预取64个batch,适合大内存场景 - persistent_workers=True, # 保持worker进程存活,避免重建开销 -) -dataloader = list([i for i in dataloader]) -print(len(dataloader[0]["labels"])) -for i, line in tqdm(enumerate(dataloader), total=128): - print(line["pinyin_ids"].squeeze(-1).shape) +dataset = PinyinInputDataset(dataset_path, max_iter_length=max_iter_length) + + +def create_dataloader(): + """ + Create dataloader with DataLoader2 if available, otherwise fallback to DataLoader. + This function tries to handle streaming datasets better with DataLoader2. + """ + if DATA_LOADER2_AVAILABLE: + try: + # DataLoader2 configuration for streaming datasets + # Use MultiProcessingReadingService with careful worker settings + reading_service = MultiProcessingReadingService( + num_workers=2, # Start with 2 workers for streaming dataset + prefetch_factor=2, # Reduced prefetch for better memory management + persistent_workers=True, + pin_memory=torch.cuda.is_available(), + worker_init_fn=worker_init_fn, + ) + + dataloader = DataLoader2( + dataset, + reading_service=reading_service, + batch_size=batch_size, + collate_fn=collate_fn, + shuffle=False, # Dataset handles shuffling internally + ) + print(f"✅ Created DataLoader2 with {2} workers") + return dataloader + except Exception as e: + print(f"⚠️ DataLoader2 creation failed: {e}, falling back to DataLoader") + + # Fallback to standard DataLoader + print("📊 Using standard DataLoader") + dataloader = DataLoader( + dataset, + batch_size=batch_size, + num_workers=2, # Limited to 2 for streaming dataset compatibility + pin_memory=torch.cuda.is_available(), + worker_init_fn=worker_init_fn, + collate_fn=collate_fn, + prefetch_factor=2, # Reduced from 64 to avoid memory issues + persistent_workers=True, + ) + return dataloader + + +# Create the dataloader +dataloader = create_dataloader() + +# Test the dataloader +print(f"🔍 Testing dataloader with batch_size={batch_size}") +print(f" Dataset max_iter_length: {max_iter_length}") +print(f" Expected batches: {max_iter_length / batch_size:.0f}") + +try: + # Convert to list to test loading (as in original code) + dataloader_list = list([i for i in dataloader]) + print(f"✅ Successfully loaded {len(dataloader_list)} batches") + + # Process batches + for i, line in tqdm(enumerate(dataloader_list), total=len(dataloader_list)): + zero_labels = (line["labels"] == 0).sum() + print(f"Batch {i}: labels==0 count = {zero_labels.item()}") + # Early exit for testing + if i >= 5: # Limit to 5 batches for quick testing + print("⚠️ Limited to 5 batches for testing") + break + +except Exception as e: + print(f"❌ Error during dataloader iteration: {e}") + import traceback + + traceback.print_exc() + +print("🏁 Test completed") diff --git a/test.py.backup b/test.py.backup new file mode 100644 index 0000000..67d0f69 --- /dev/null +++ b/test.py.backup @@ -0,0 +1,31 @@ +import sys + +import torch +from torch.utils.data import DataLoader +from tqdm import tqdm + +from model.dataset import PinyinInputDataset +from model.trainer import collate_fn, worker_init_fn + +max_iter_length = 128 * 128 +batch_size = 1024 + +if sys.platform == "win32": + dataset_path = "data" +else: + dataset_path = "/home/songsenand/Data/corpus/CCI-Data/" + +dataset = PinyinInputDataset(dataset_path, max_iter_length=max_iter_length) +dataloader = DataLoader( + dataset, + batch_size=batch_size, + num_workers=2, + pin_memory=torch.cuda.is_available(), + worker_init_fn=worker_init_fn, + collate_fn=collate_fn, + prefetch_factor=64, # 每个worker预取64个batch,适合大内存场景 + persistent_workers=True, # 保持worker进程存活,避免重建开销 +) +dataloader = list([i for i in dataloader]) +for i, line in tqdm(enumerate(dataloader), total=max_iter_length / batch_size): + print((line["labels"] == 0).sum()) diff --git a/uv.lock b/uv.lock index 421de57..9611c2b 100644 --- a/uv.lock +++ b/uv.lock @@ -133,6 +133,22 @@ wheels = [ { url = "https://mirrors.aliyun.com/pypi/packages/fb/76/641ae371508676492379f16e2fa48f4e2c11741bd63c48be4b12a6b09cba/aiosignal-1.4.0-py3-none-any.whl", hash = "sha256:053243f8b92b990551949e63930a839ff0cf0b0ebbe0597b0f3fb19e1a0fe82e" }, ] +[[package]] +name = "altair" +version = "6.0.0" +source = { registry = "https://mirrors.aliyun.com/pypi/simple/" } +dependencies = [ + { name = "jinja2" }, + { name = "jsonschema" }, + { name = "narwhals" }, + { name = "packaging" }, + { name = "typing-extensions", marker = "python_full_version < '3.15'" }, +] +sdist = { url = "https://mirrors.aliyun.com/pypi/packages/f7/c0/184a89bd5feba14ff3c41cfaf1dd8a82c05f5ceedbc92145e17042eb08a4/altair-6.0.0.tar.gz", hash = "sha256:614bf5ecbe2337347b590afb111929aa9c16c9527c4887d96c9bc7f6640756b4" } +wheels = [ + { url = "https://mirrors.aliyun.com/pypi/packages/db/33/ef2f2409450ef6daa61459d5de5c08128e7d3edb773fefd0a324d1310238/altair-6.0.0-py3-none-any.whl", hash = "sha256:09ae95b53d5fe5b16987dccc785a7af8588f2dca50de1e7a156efa8a461515f8" }, +] + [[package]] name = "annotated-doc" version = "0.0.4" @@ -187,6 +203,15 @@ dependencies = [ { name = "typer" }, ] +[[package]] +name = "blinker" +version = "1.9.0" +source = { registry = "https://mirrors.aliyun.com/pypi/simple/" } +sdist = { url = "https://mirrors.aliyun.com/pypi/packages/21/28/9b3f50ce0e048515135495f198351908d99540d69bfdc8c1d15b73dc55ce/blinker-1.9.0.tar.gz", hash = "sha256:b4ce2265a7abece45e7cc896e98dbebe6cead56bcf805a3d23136d145f5445bf" } +wheels = [ + { url = "https://mirrors.aliyun.com/pypi/packages/10/cb/f2ad4230dc2eb1a74edf38f1a38b9b52277f75bef262d8908e60d957e13c/blinker-1.9.0-py3-none-any.whl", hash = "sha256:ba0efaa9080b619ff2f3459d1d500c57bddea4a6b424b60a91141db6fd2f08bc" }, +] + [[package]] name = "bokeh" version = "3.9.0" @@ -207,6 +232,15 @@ wheels = [ { url = "https://mirrors.aliyun.com/pypi/packages/47/0b/bdf449df87be3f07b23091ceafee8c3ef569cf6d2fb7edec6e3b12b3faa4/bokeh-3.9.0-py3-none-any.whl", hash = "sha256:b252bfb16a505f0e0c57d532d0df308ae1667235bafc622aa9441fe9e7c5ce4a" }, ] +[[package]] +name = "cachetools" +version = "7.0.5" +source = { registry = "https://mirrors.aliyun.com/pypi/simple/" } +sdist = { url = "https://mirrors.aliyun.com/pypi/packages/af/dd/57fe3fdb6e65b25a5987fd2cdc7e22db0aef508b91634d2e57d22928d41b/cachetools-7.0.5.tar.gz", hash = "sha256:0cd042c24377200c1dcd225f8b7b12b0ca53cc2c961b43757e774ebe190fd990" } +wheels = [ + { url = "https://mirrors.aliyun.com/pypi/packages/06/f3/39cf3367b8107baa44f861dc802cbf16263c945b62d8265d36034fc07bea/cachetools-7.0.5-py3-none-any.whl", hash = "sha256:46bc8ebefbe485407621d0a4264b23c080cedd913921bad7ac3ed2f26c183114" }, +] + [[package]] name = "certifi" version = "2026.2.25" @@ -608,6 +642,30 @@ http = [ { name = "aiohttp" }, ] +[[package]] +name = "gitdb" +version = "4.0.12" +source = { registry = "https://mirrors.aliyun.com/pypi/simple/" } +dependencies = [ + { name = "smmap" }, +] +sdist = { url = "https://mirrors.aliyun.com/pypi/packages/72/94/63b0fc47eb32792c7ba1fe1b694daec9a63620db1e313033d18140c2320a/gitdb-4.0.12.tar.gz", hash = "sha256:5ef71f855d191a3326fcfbc0d5da835f26b13fbcba60c32c21091c349ffdb571" } +wheels = [ + { url = "https://mirrors.aliyun.com/pypi/packages/a0/61/5c78b91c3143ed5c14207f463aecfc8f9dbb5092fb2869baf37c273b2705/gitdb-4.0.12-py3-none-any.whl", hash = "sha256:67073e15955400952c6565cc3e707c554a4eea2e428946f7a4c162fab9bd9bcf" }, +] + +[[package]] +name = "gitpython" +version = "3.1.46" +source = { registry = "https://mirrors.aliyun.com/pypi/simple/" } +dependencies = [ + { name = "gitdb" }, +] +sdist = { url = "https://mirrors.aliyun.com/pypi/packages/df/b5/59d16470a1f0dfe8c793f9ef56fd3826093fc52b3bd96d6b9d6c26c7e27b/gitpython-3.1.46.tar.gz", hash = "sha256:400124c7d0ef4ea03f7310ac2fbf7151e09ff97f2a3288d64a440c584a29c37f" } +wheels = [ + { url = "https://mirrors.aliyun.com/pypi/packages/6a/09/e21df6aef1e1ffc0c816f0522ddc3f6dcded766c3261813131c78a704470/gitpython-3.1.46-py3-none-any.whl", hash = "sha256:79812ed143d9d25b6d176a10bb511de0f9c67b1fa641d82097b0ab90398a2058" }, +] + [[package]] name = "grpcio" version = "1.78.0" @@ -836,6 +894,33 @@ wheels = [ { url = "https://mirrors.aliyun.com/pypi/packages/67/8a/a342b2f0251f3dac4ca17618265d93bf244a2a4d089126e81e4c1056ac50/jiter-0.13.0-graalpy312-graalpy250_312_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7bb00b6d26db67a05fe3e12c76edc75f32077fb51deed13822dc648fa373bc19" }, ] +[[package]] +name = "jsonschema" +version = "4.26.0" +source = { registry = "https://mirrors.aliyun.com/pypi/simple/" } +dependencies = [ + { name = "attrs" }, + { name = "jsonschema-specifications" }, + { name = "referencing" }, + { name = "rpds-py" }, +] +sdist = { url = "https://mirrors.aliyun.com/pypi/packages/b3/fc/e067678238fa451312d4c62bf6e6cf5ec56375422aee02f9cb5f909b3047/jsonschema-4.26.0.tar.gz", hash = "sha256:0c26707e2efad8aa1bfc5b7ce170f3fccc2e4918ff85989ba9ffa9facb2be326" } +wheels = [ + { url = "https://mirrors.aliyun.com/pypi/packages/69/90/f63fb5873511e014207a475e2bb4e8b2e570d655b00ac19a9a0ca0a385ee/jsonschema-4.26.0-py3-none-any.whl", hash = "sha256:d489f15263b8d200f8387e64b4c3a75f06629559fb73deb8fdfb525f2dab50ce" }, +] + +[[package]] +name = "jsonschema-specifications" +version = "2025.9.1" +source = { registry = "https://mirrors.aliyun.com/pypi/simple/" } +dependencies = [ + { name = "referencing" }, +] +sdist = { url = "https://mirrors.aliyun.com/pypi/packages/19/74/a633ee74eb36c44aa6d1095e7cc5569bebf04342ee146178e2d36600708b/jsonschema_specifications-2025.9.1.tar.gz", hash = "sha256:b540987f239e745613c7a9176f3edb72b832a4ac465cf02712288397832b5e8d" } +wheels = [ + { url = "https://mirrors.aliyun.com/pypi/packages/41/45/1a4ed80516f02155c51f51e8cedb3c1902296743db0bbc66608a0db2814f/jsonschema_specifications-2025.9.1-py3-none-any.whl", hash = "sha256:98802fee3a11ee76ecaca44429fda8a41bff98b00a0f2838151b113f210cc6fe" }, +] + [[package]] name = "loguru" version = "0.7.3" @@ -1538,6 +1623,19 @@ wheels = [ { url = "https://mirrors.aliyun.com/pypi/packages/ec/d2/de599c95ba0a973b94410477f8bf0b6f0b5e67360eb89bcb1ad365258beb/pillow-12.1.1-cp314-cp314t-win_arm64.whl", hash = "sha256:7b03048319bfc6170e93bd60728a1af51d3dd7704935feb228c4d4faab35d334" }, ] +[[package]] +name = "plotly" +version = "6.6.0" +source = { registry = "https://mirrors.aliyun.com/pypi/simple/" } +dependencies = [ + { name = "narwhals" }, + { name = "packaging" }, +] +sdist = { url = "https://mirrors.aliyun.com/pypi/packages/24/fb/41efe84970cfddefd4ccf025e2cbfafe780004555f583e93dba3dac2cdef/plotly-6.6.0.tar.gz", hash = "sha256:b897f15f3b02028d69f755f236be890ba950d0a42d7dfc619b44e2d8cea8748c" } +wheels = [ + { url = "https://mirrors.aliyun.com/pypi/packages/52/d2/c6e44dba74f17c6216ce1b56044a9b93a929f1c2d5bdaff892512b260f5e/plotly-6.6.0-py3-none-any.whl", hash = "sha256:8d6daf0f87412e0c0bfe72e809d615217ab57cc715899a1e5145135a7800d1d0" }, +] + [[package]] name = "pluggy" version = "1.6.0" @@ -1775,6 +1873,19 @@ wheels = [ { url = "https://mirrors.aliyun.com/pypi/packages/f7/07/34573da085946b6a313d7c42f82f16e8920bfd730665de2d11c0c37a74b5/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:76d0819de158cd855d1cbb8fcafdf6f5cf1eb8e470abe056d5d161106e38062b" }, ] +[[package]] +name = "pydeck" +version = "0.9.1" +source = { registry = "https://mirrors.aliyun.com/pypi/simple/" } +dependencies = [ + { name = "jinja2" }, + { name = "numpy" }, +] +sdist = { url = "https://mirrors.aliyun.com/pypi/packages/a1/ca/40e14e196864a0f61a92abb14d09b3d3da98f94ccb03b49cf51688140dab/pydeck-0.9.1.tar.gz", hash = "sha256:f74475ae637951d63f2ee58326757f8d4f9cd9f2a457cf42950715003e2cb605" } +wheels = [ + { url = "https://mirrors.aliyun.com/pypi/packages/ab/4c/b888e6cf58bd9db9c93f40d1c6be8283ff49d88919231afe93a6bcf61626/pydeck-0.9.1-py2.py3-none-any.whl", hash = "sha256:b3f75ba0d273fc917094fa61224f3f6076ca8752b93d46faf3bcfd9f9d59b038" }, +] + [[package]] name = "pygments" version = "2.19.2" @@ -1867,6 +1978,20 @@ wheels = [ { url = "https://mirrors.aliyun.com/pypi/packages/f1/12/de94a39c2ef588c7e6455cfbe7343d3b2dc9d6b6b2f40c4c6565744c873d/pyyaml-6.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:ebc55a14a21cb14062aa4162f906cd962b28e2e9ea38f9b4391244cd8de4ae0b" }, ] +[[package]] +name = "referencing" +version = "0.37.0" +source = { registry = "https://mirrors.aliyun.com/pypi/simple/" } +dependencies = [ + { name = "attrs" }, + { name = "rpds-py" }, + { name = "typing-extensions", marker = "python_full_version < '3.13'" }, +] +sdist = { url = "https://mirrors.aliyun.com/pypi/packages/22/f5/df4e9027acead3ecc63e50fe1e36aca1523e1719559c499951bb4b53188f/referencing-0.37.0.tar.gz", hash = "sha256:44aefc3142c5b842538163acb373e24cce6632bd54bdb01b21ad5863489f50d8" } +wheels = [ + { url = "https://mirrors.aliyun.com/pypi/packages/2c/58/ca301544e1fa93ed4f80d724bf5b194f6e4b945841c5bfd555878eea9fcb/referencing-0.37.0-py3-none-any.whl", hash = "sha256:381329a9f99628c9069361716891d34ad94af76e461dcb0335825aecc7692231" }, +] + [[package]] name = "regex" version = "2026.3.32" @@ -1983,6 +2108,87 @@ wheels = [ { url = "https://mirrors.aliyun.com/pypi/packages/14/25/b208c5683343959b670dc001595f2f3737e051da617f66c31f7c4fa93abc/rich-14.3.3-py3-none-any.whl", hash = "sha256:793431c1f8619afa7d3b52b2cdec859562b950ea0d4b6b505397612db8d5362d" }, ] +[[package]] +name = "rpds-py" +version = "0.30.0" +source = { registry = "https://mirrors.aliyun.com/pypi/simple/" } +sdist = { url = "https://mirrors.aliyun.com/pypi/packages/20/af/3f2f423103f1113b36230496629986e0ef7e199d2aa8392452b484b38ced/rpds_py-0.30.0.tar.gz", hash = "sha256:dd8ff7cf90014af0c0f787eea34794ebf6415242ee1d6fa91eaba725cc441e84" } +wheels = [ + { url = "https://mirrors.aliyun.com/pypi/packages/03/e7/98a2f4ac921d82f33e03f3835f5bf3a4a40aa1bfdc57975e74a97b2b4bdd/rpds_py-0.30.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:a161f20d9a43006833cd7068375a94d035714d73a172b681d8881820600abfad" }, + { url = "https://mirrors.aliyun.com/pypi/packages/4d/a1/bca7fd3d452b272e13335db8d6b0b3ecde0f90ad6f16f3328c6fb150c889/rpds_py-0.30.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6abc8880d9d036ecaafe709079969f56e876fcf107f7a8e9920ba6d5a3878d05" }, + { url = "https://mirrors.aliyun.com/pypi/packages/65/1c/ae157e83a6357eceff62ba7e52113e3ec4834a84cfe07fa4b0757a7d105f/rpds_py-0.30.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ca28829ae5f5d569bb62a79512c842a03a12576375d5ece7d2cadf8abe96ec28" }, + { url = "https://mirrors.aliyun.com/pypi/packages/d4/36/eb2eb8515e2ad24c0bd43c3ee9cd74c33f7ca6430755ccdb240fd3144c44/rpds_py-0.30.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a1010ed9524c73b94d15919ca4d41d8780980e1765babf85f9a2f90d247153dd" }, + { url = "https://mirrors.aliyun.com/pypi/packages/d6/65/ad8dc1784a331fabbd740ef6f71ce2198c7ed0890dab595adb9ea2d775a1/rpds_py-0.30.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f8d1736cfb49381ba528cd5baa46f82fdc65c06e843dab24dd70b63d09121b3f" }, + { url = "https://mirrors.aliyun.com/pypi/packages/63/8e/0cfa7ae158e15e143fe03993b5bcd743a59f541f5952e1546b1ac1b5fd45/rpds_py-0.30.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d948b135c4693daff7bc2dcfc4ec57237a29bd37e60c2fabf5aff2bbacf3e2f1" }, + { url = "https://mirrors.aliyun.com/pypi/packages/60/1b/6f8f29f3f995c7ffdde46a626ddccd7c63aefc0efae881dc13b6e5d5bb16/rpds_py-0.30.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47f236970bccb2233267d89173d3ad2703cd36a0e2a6e92d0560d333871a3d23" }, + { url = "https://mirrors.aliyun.com/pypi/packages/6d/d5/a266341051a7a3ca2f4b750a3aa4abc986378431fc2da508c5034d081b70/rpds_py-0.30.0-cp312-cp312-manylinux_2_31_riscv64.whl", hash = "sha256:2e6ecb5a5bcacf59c3f912155044479af1d0b6681280048b338b28e364aca1f6" }, + { url = "https://mirrors.aliyun.com/pypi/packages/10/3b/71b725851df9ab7a7a4e33cf36d241933da66040d195a84781f49c50490c/rpds_py-0.30.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a8fa71a2e078c527c3e9dc9fc5a98c9db40bcc8a92b4e8858e36d329f8684b51" }, + { url = "https://mirrors.aliyun.com/pypi/packages/00/2b/e59e58c544dc9bd8bd8384ecdb8ea91f6727f0e37a7131baeff8d6f51661/rpds_py-0.30.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:73c67f2db7bc334e518d097c6d1e6fed021bbc9b7d678d6cc433478365d1d5f5" }, + { url = "https://mirrors.aliyun.com/pypi/packages/da/3e/a18e6f5b460893172a7d6a680e86d3b6bc87a54c1f0b03446a3c8c7b588f/rpds_py-0.30.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:5ba103fb455be00f3b1c2076c9d4264bfcb037c976167a6047ed82f23153f02e" }, + { url = "https://mirrors.aliyun.com/pypi/packages/5c/e2/714694e4b87b85a18e2c243614974413c60aa107fd815b8cbc42b873d1d7/rpds_py-0.30.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:7cee9c752c0364588353e627da8a7e808a66873672bcb5f52890c33fd965b394" }, + { url = "https://mirrors.aliyun.com/pypi/packages/6f/ab/d5d5e3bcedb0a77f4f613706b750e50a5a3ba1c15ccd3665ecc636c968fd/rpds_py-0.30.0-cp312-cp312-win32.whl", hash = "sha256:1ab5b83dbcf55acc8b08fc62b796ef672c457b17dbd7820a11d6c52c06839bdf" }, + { url = "https://mirrors.aliyun.com/pypi/packages/39/3b/f786af9957306fdc38a74cef405b7b93180f481fb48453a114bb6465744a/rpds_py-0.30.0-cp312-cp312-win_amd64.whl", hash = "sha256:a090322ca841abd453d43456ac34db46e8b05fd9b3b4ac0c78bcde8b089f959b" }, + { url = "https://mirrors.aliyun.com/pypi/packages/f3/d2/b91dc748126c1559042cfe41990deb92c4ee3e2b415f6b5234969ffaf0cc/rpds_py-0.30.0-cp312-cp312-win_arm64.whl", hash = "sha256:669b1805bd639dd2989b281be2cfd951c6121b65e729d9b843e9639ef1fd555e" }, + { url = "https://mirrors.aliyun.com/pypi/packages/ed/dc/d61221eb88ff410de3c49143407f6f3147acf2538c86f2ab7ce65ae7d5f9/rpds_py-0.30.0-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:f83424d738204d9770830d35290ff3273fbb02b41f919870479fab14b9d303b2" }, + { url = "https://mirrors.aliyun.com/pypi/packages/fd/32/55fb50ae104061dbc564ef15cc43c013dc4a9f4527a1f4d99baddf56fe5f/rpds_py-0.30.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:e7536cd91353c5273434b4e003cbda89034d67e7710eab8761fd918ec6c69cf8" }, + { url = "https://mirrors.aliyun.com/pypi/packages/58/70/faed8186300e3b9bdd138d0273109784eea2396c68458ed580f885dfe7ad/rpds_py-0.30.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2771c6c15973347f50fece41fc447c054b7ac2ae0502388ce3b6738cd366e3d4" }, + { url = "https://mirrors.aliyun.com/pypi/packages/bd/a8/073cac3ed2c6387df38f71296d002ab43496a96b92c823e76f46b8af0543/rpds_py-0.30.0-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:0a59119fc6e3f460315fe9d08149f8102aa322299deaa5cab5b40092345c2136" }, + { url = "https://mirrors.aliyun.com/pypi/packages/77/57/5999eb8c58671f1c11eba084115e77a8899d6e694d2a18f69f0ba471ec8b/rpds_py-0.30.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:76fec018282b4ead0364022e3c54b60bf368b9d926877957a8624b58419169b7" }, + { url = "https://mirrors.aliyun.com/pypi/packages/e0/af/5ab4833eadc36c0a8ed2bc5c0de0493c04f6c06de223170bd0798ff98ced/rpds_py-0.30.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:692bef75a5525db97318e8cd061542b5a79812d711ea03dbc1f6f8dbb0c5f0d2" }, + { url = "https://mirrors.aliyun.com/pypi/packages/b7/de/f7192e12b21b9e9a68a6d0f249b4af3fdcdff8418be0767a627564afa1f1/rpds_py-0.30.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9027da1ce107104c50c81383cae773ef5c24d296dd11c99e2629dbd7967a20c6" }, + { url = "https://mirrors.aliyun.com/pypi/packages/91/c4/fc70cd0249496493500e7cc2de87504f5aa6509de1e88623431fec76d4b6/rpds_py-0.30.0-cp313-cp313-manylinux_2_31_riscv64.whl", hash = "sha256:9cf69cdda1f5968a30a359aba2f7f9aa648a9ce4b580d6826437f2b291cfc86e" }, + { url = "https://mirrors.aliyun.com/pypi/packages/58/95/d9275b05ab96556fefff73a385813eb66032e4c99f411d0795372d9abcea/rpds_py-0.30.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a4796a717bf12b9da9d3ad002519a86063dcac8988b030e405704ef7d74d2d9d" }, + { url = "https://mirrors.aliyun.com/pypi/packages/06/c1/3088fc04b6624eb12a57eb814f0d4997a44b0d208d6cace713033ff1a6ba/rpds_py-0.30.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:5d4c2aa7c50ad4728a094ebd5eb46c452e9cb7edbfdb18f9e1221f597a73e1e7" }, + { url = "https://mirrors.aliyun.com/pypi/packages/d8/42/c612a833183b39774e8ac8fecae81263a68b9583ee343db33ab571a7ce55/rpds_py-0.30.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:ba81a9203d07805435eb06f536d95a266c21e5b2dfbf6517748ca40c98d19e31" }, + { url = "https://mirrors.aliyun.com/pypi/packages/5f/60/525a50f45b01d70005403ae0e25f43c0384369ad24ffe46e8d9068b50086/rpds_py-0.30.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:945dccface01af02675628334f7cf49c2af4c1c904748efc5cf7bbdf0b579f95" }, + { url = "https://mirrors.aliyun.com/pypi/packages/0b/5d/47c4655e9bcd5ca907148535c10e7d489044243cc9941c16ed7cd53be91d/rpds_py-0.30.0-cp313-cp313-win32.whl", hash = "sha256:b40fb160a2db369a194cb27943582b38f79fc4887291417685f3ad693c5a1d5d" }, + { url = "https://mirrors.aliyun.com/pypi/packages/f2/e1/485132437d20aa4d3e1d8b3fb5a5e65aa8139f1e097080c2a8443201742c/rpds_py-0.30.0-cp313-cp313-win_amd64.whl", hash = "sha256:806f36b1b605e2d6a72716f321f20036b9489d29c51c91f4dd29a3e3afb73b15" }, + { url = "https://mirrors.aliyun.com/pypi/packages/24/95/ffd128ed1146a153d928617b0ef673960130be0009c77d8fbf0abe306713/rpds_py-0.30.0-cp313-cp313-win_arm64.whl", hash = "sha256:d96c2086587c7c30d44f31f42eae4eac89b60dabbac18c7669be3700f13c3ce1" }, + { url = "https://mirrors.aliyun.com/pypi/packages/ff/1b/b10de890a0def2a319a2626334a7f0ae388215eb60914dbac8a3bae54435/rpds_py-0.30.0-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:eb0b93f2e5c2189ee831ee43f156ed34e2a89a78a66b98cadad955972548be5a" }, + { url = "https://mirrors.aliyun.com/pypi/packages/0d/bf/27e39f5971dc4f305a4fb9c672ca06f290f7c4e261c568f3dea16a410d47/rpds_py-0.30.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:922e10f31f303c7c920da8981051ff6d8c1a56207dbdf330d9047f6d30b70e5e" }, + { url = "https://mirrors.aliyun.com/pypi/packages/40/58/442ada3bba6e8e6615fc00483135c14a7538d2ffac30e2d933ccf6852232/rpds_py-0.30.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cdc62c8286ba9bf7f47befdcea13ea0e26bf294bda99758fd90535cbaf408000" }, + { url = "https://mirrors.aliyun.com/pypi/packages/14/14/f59b0127409a33c6ef6f5c1ebd5ad8e32d7861c9c7adfa9a624fc3889f6c/rpds_py-0.30.0-cp313-cp313t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:47f9a91efc418b54fb8190a6b4aa7813a23fb79c51f4bb84e418f5476c38b8db" }, + { url = "https://mirrors.aliyun.com/pypi/packages/b3/66/e0be3e162ac299b3a22527e8913767d869e6cc75c46bd844aa43fb81ab62/rpds_py-0.30.0-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1f3587eb9b17f3789ad50824084fa6f81921bbf9a795826570bda82cb3ed91f2" }, + { url = "https://mirrors.aliyun.com/pypi/packages/3d/55/fa3b9cf31d0c963ecf1ba777f7cf4b2a2c976795ac430d24a1f43d25a6ba/rpds_py-0.30.0-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:39c02563fc592411c2c61d26b6c5fe1e51eaa44a75aa2c8735ca88b0d9599daa" }, + { url = "https://mirrors.aliyun.com/pypi/packages/60/ca/780cf3b1a32b18c0f05c441958d3758f02544f1d613abf9488cd78876378/rpds_py-0.30.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:51a1234d8febafdfd33a42d97da7a43f5dcb120c1060e352a3fbc0c6d36e2083" }, + { url = "https://mirrors.aliyun.com/pypi/packages/82/86/d5f2e04f2aa6247c613da0c1dd87fcd08fa17107e858193566048a1e2f0a/rpds_py-0.30.0-cp313-cp313t-manylinux_2_31_riscv64.whl", hash = "sha256:eb2c4071ab598733724c08221091e8d80e89064cd472819285a9ab0f24bcedb9" }, + { url = "https://mirrors.aliyun.com/pypi/packages/4b/9a/453255d2f769fe44e07ea9785c8347edaf867f7026872e76c1ad9f7bed92/rpds_py-0.30.0-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6bdfdb946967d816e6adf9a3d8201bfad269c67efe6cefd7093ef959683c8de0" }, + { url = "https://mirrors.aliyun.com/pypi/packages/a3/31/622a86cdc0c45d6df0e9ccb6becdba5074735e7033c20e401a6d9d0e2ca0/rpds_py-0.30.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:c77afbd5f5250bf27bf516c7c4a016813eb2d3e116139aed0096940c5982da94" }, + { url = "https://mirrors.aliyun.com/pypi/packages/1c/5d/15bbf0fb4a3f58a3b1c67855ec1efcc4ceaef4e86644665fff03e1b66d8d/rpds_py-0.30.0-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:61046904275472a76c8c90c9ccee9013d70a6d0f73eecefd38c1ae7c39045a08" }, + { url = "https://mirrors.aliyun.com/pypi/packages/6d/61/21b8c41f68e60c8cc3b2e25644f0e3681926020f11d06ab0b78e3c6bbff1/rpds_py-0.30.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:4c5f36a861bc4b7da6516dbdf302c55313afa09b81931e8280361a4f6c9a2d27" }, + { url = "https://mirrors.aliyun.com/pypi/packages/f9/39/7e067bb06c31de48de3eb200f9fc7c58982a4d3db44b07e73963e10d3be9/rpds_py-0.30.0-cp313-cp313t-win32.whl", hash = "sha256:3d4a69de7a3e50ffc214ae16d79d8fbb0922972da0356dcf4d0fdca2878559c6" }, + { url = "https://mirrors.aliyun.com/pypi/packages/0a/4d/222ef0b46443cf4cf46764d9c630f3fe4abaa7245be9417e56e9f52b8f65/rpds_py-0.30.0-cp313-cp313t-win_amd64.whl", hash = "sha256:f14fc5df50a716f7ece6a80b6c78bb35ea2ca47c499e422aa4463455dd96d56d" }, + { url = "https://mirrors.aliyun.com/pypi/packages/86/81/dad16382ebbd3d0e0328776d8fd7ca94220e4fa0798d1dc5e7da48cb3201/rpds_py-0.30.0-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:68f19c879420aa08f61203801423f6cd5ac5f0ac4ac82a2368a9fcd6a9a075e0" }, + { url = "https://mirrors.aliyun.com/pypi/packages/2b/60/19f7884db5d5603edf3c6bce35408f45ad3e97e10007df0e17dd57af18f8/rpds_py-0.30.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:ec7c4490c672c1a0389d319b3a9cfcd098dcdc4783991553c332a15acf7249be" }, + { url = "https://mirrors.aliyun.com/pypi/packages/bf/c4/76eb0e1e72d1a9c4703c69607cec123c29028bff28ce41588792417098ac/rpds_py-0.30.0-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f251c812357a3fed308d684a5079ddfb9d933860fc6de89f2b7ab00da481e65f" }, + { url = "https://mirrors.aliyun.com/pypi/packages/72/87/87ea665e92f3298d1b26d78814721dc39ed8d2c74b86e83348d6b48a6f31/rpds_py-0.30.0-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ac98b175585ecf4c0348fd7b29c3864bda53b805c773cbf7bfdaffc8070c976f" }, + { url = "https://mirrors.aliyun.com/pypi/packages/77/ad/7783a89ca0587c15dcbf139b4a8364a872a25f861bdb88ed99f9b0dec985/rpds_py-0.30.0-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3e62880792319dbeb7eb866547f2e35973289e7d5696c6e295476448f5b63c87" }, + { url = "https://mirrors.aliyun.com/pypi/packages/5b/3c/2882bdac942bd2172f3da574eab16f309ae10a3925644e969536553cb4ee/rpds_py-0.30.0-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4e7fc54e0900ab35d041b0601431b0a0eb495f0851a0639b6ef90f7741b39a18" }, + { url = "https://mirrors.aliyun.com/pypi/packages/ce/81/9a91c0111ce1758c92516a3e44776920b579d9a7c09b2b06b642d4de3f0f/rpds_py-0.30.0-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47e77dc9822d3ad616c3d5759ea5631a75e5809d5a28707744ef79d7a1bcfcad" }, + { url = "https://mirrors.aliyun.com/pypi/packages/cf/8e/1da49d4a107027e5fbc64daeab96a0706361a2918da10cb41769244b805d/rpds_py-0.30.0-cp314-cp314-manylinux_2_31_riscv64.whl", hash = "sha256:b4dc1a6ff022ff85ecafef7979a2c6eb423430e05f1165d6688234e62ba99a07" }, + { url = "https://mirrors.aliyun.com/pypi/packages/df/5a/7ee239b1aa48a127570ec03becbb29c9d5a9eb092febbd1699d567cae859/rpds_py-0.30.0-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:4559c972db3a360808309e06a74628b95eaccbf961c335c8fe0d590cf587456f" }, + { url = "https://mirrors.aliyun.com/pypi/packages/70/ea/caa143cf6b772f823bc7929a45da1fa83569ee49b11d18d0ada7f5ee6fd6/rpds_py-0.30.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:0ed177ed9bded28f8deb6ab40c183cd1192aa0de40c12f38be4d59cd33cb5c65" }, + { url = "https://mirrors.aliyun.com/pypi/packages/64/91/ac20ba2d69303f961ad8cf55bf7dbdb4763f627291ba3d0d7d67333cced9/rpds_py-0.30.0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:ad1fa8db769b76ea911cb4e10f049d80bf518c104f15b3edb2371cc65375c46f" }, + { url = "https://mirrors.aliyun.com/pypi/packages/21/20/7ff5f3c8b00c8a95f75985128c26ba44503fb35b8e0259d812766ea966c7/rpds_py-0.30.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:46e83c697b1f1c72b50e5ee5adb4353eef7406fb3f2043d64c33f20ad1c2fc53" }, + { url = "https://mirrors.aliyun.com/pypi/packages/72/c7/81dadd7b27c8ee391c132a6b192111ca58d866577ce2d9b0ca157552cce0/rpds_py-0.30.0-cp314-cp314-win32.whl", hash = "sha256:ee454b2a007d57363c2dfd5b6ca4a5d7e2c518938f8ed3b706e37e5d470801ed" }, + { url = "https://mirrors.aliyun.com/pypi/packages/3e/d2/1aaac33287e8cfb07aab2e6b8ac1deca62f6f65411344f1433c55e6f3eb8/rpds_py-0.30.0-cp314-cp314-win_amd64.whl", hash = "sha256:95f0802447ac2d10bcc69f6dc28fe95fdf17940367b21d34e34c737870758950" }, + { url = "https://mirrors.aliyun.com/pypi/packages/e8/95/ab005315818cc519ad074cb7784dae60d939163108bd2b394e60dc7b5461/rpds_py-0.30.0-cp314-cp314-win_arm64.whl", hash = "sha256:613aa4771c99f03346e54c3f038e4cc574ac09a3ddfb0e8878487335e96dead6" }, + { url = "https://mirrors.aliyun.com/pypi/packages/9e/68/154fe0194d83b973cdedcdcc88947a2752411165930182ae41d983dcefa6/rpds_py-0.30.0-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:7e6ecfcb62edfd632e56983964e6884851786443739dbfe3582947e87274f7cb" }, + { url = "https://mirrors.aliyun.com/pypi/packages/83/69/8bbc8b07ec854d92a8b75668c24d2abcb1719ebf890f5604c61c9369a16f/rpds_py-0.30.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:a1d0bc22a7cdc173fedebb73ef81e07faef93692b8c1ad3733b67e31e1b6e1b8" }, + { url = "https://mirrors.aliyun.com/pypi/packages/ab/00/ba2e50183dbd9abcce9497fa5149c62b4ff3e22d338a30d690f9af970561/rpds_py-0.30.0-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0d08f00679177226c4cb8c5265012eea897c8ca3b93f429e546600c971bcbae7" }, + { url = "https://mirrors.aliyun.com/pypi/packages/05/6f/86f0272b84926bcb0e4c972262f54223e8ecc556b3224d281e6598fc9268/rpds_py-0.30.0-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5965af57d5848192c13534f90f9dd16464f3c37aaf166cc1da1cae1fd5a34898" }, + { url = "https://mirrors.aliyun.com/pypi/packages/cb/e9/0e02bb2e6dc63d212641da45df2b0bf29699d01715913e0d0f017ee29438/rpds_py-0.30.0-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9a4e86e34e9ab6b667c27f3211ca48f73dba7cd3d90f8d5b11be56e5dbc3fb4e" }, + { url = "https://mirrors.aliyun.com/pypi/packages/ee/ca/be7bca14cf21513bdf9c0606aba17d1f389ea2b6987035eb4f62bd923f25/rpds_py-0.30.0-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e5d3e6b26f2c785d65cc25ef1e5267ccbe1b069c5c21b8cc724efee290554419" }, + { url = "https://mirrors.aliyun.com/pypi/packages/c2/c7/736e00ebf39ed81d75544c0da6ef7b0998f8201b369acf842f9a90dc8fce/rpds_py-0.30.0-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:626a7433c34566535b6e56a1b39a7b17ba961e97ce3b80ec62e6f1312c025551" }, + { url = "https://mirrors.aliyun.com/pypi/packages/4a/3f/da50dfde9956aaf365c4adc9533b100008ed31aea635f2b8d7b627e25b49/rpds_py-0.30.0-cp314-cp314t-manylinux_2_31_riscv64.whl", hash = "sha256:acd7eb3f4471577b9b5a41baf02a978e8bdeb08b4b355273994f8b87032000a8" }, + { url = "https://mirrors.aliyun.com/pypi/packages/4e/00/34bcc2565b6020eab2623349efbdec810676ad571995911f1abdae62a3a0/rpds_py-0.30.0-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:fe5fa731a1fa8a0a56b0977413f8cacac1768dad38d16b3a296712709476fbd5" }, + { url = "https://mirrors.aliyun.com/pypi/packages/8c/28/882e72b5b3e6f718d5453bd4d0d9cf8df36fddeb4ddbbab17869d5868616/rpds_py-0.30.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:74a3243a411126362712ee1524dfc90c650a503502f135d54d1b352bd01f2404" }, + { url = "https://mirrors.aliyun.com/pypi/packages/3b/97/04a65539c17692de5b85c6e293520fd01317fd878ea1995f0367d4532fb1/rpds_py-0.30.0-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:3e8eeb0544f2eb0d2581774be4c3410356eba189529a6b3e36bbbf9696175856" }, + { url = "https://mirrors.aliyun.com/pypi/packages/85/70/92482ccffb96f5441aab93e26c4d66489eb599efdcf96fad90c14bbfb976/rpds_py-0.30.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:dbd936cde57abfee19ab3213cf9c26be06d60750e60a8e4dd85d1ab12c8b1f40" }, + { url = "https://mirrors.aliyun.com/pypi/packages/20/53/7c7e784abfa500a2b6b583b147ee4bb5a2b3747a9166bab52fec4b5b5e7d/rpds_py-0.30.0-cp314-cp314t-win32.whl", hash = "sha256:dc824125c72246d924f7f796b4f63c1e9dc810c7d9e2355864b3c3a73d59ade0" }, + { url = "https://mirrors.aliyun.com/pypi/packages/d0/02/fa464cdfbe6b26e0600b62c528b72d8608f5cc49f96b8d6e38c95d60c676/rpds_py-0.30.0-cp314-cp314t-win_amd64.whl", hash = "sha256:27f4b0e92de5bfbc6f86e43959e6edd1425c33b5e69aab0984a72047f2bcf1e3" }, +] + [[package]] name = "safetensors" version = "0.7.0" @@ -2032,6 +2238,15 @@ wheels = [ { url = "https://mirrors.aliyun.com/pypi/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274" }, ] +[[package]] +name = "smmap" +version = "5.0.3" +source = { registry = "https://mirrors.aliyun.com/pypi/simple/" } +sdist = { url = "https://mirrors.aliyun.com/pypi/packages/1f/ea/49c993d6dfdd7338c9b1000a0f36817ed7ec84577ae2e52f890d1a4ff909/smmap-5.0.3.tar.gz", hash = "sha256:4d9debb8b99007ae47165abc08670bd74cb74b5227dda7f643eccc4e9eb5642c" } +wheels = [ + { url = "https://mirrors.aliyun.com/pypi/packages/c1/d4/59e74daffcb57a07668852eeeb6035af9f32cbfd7a1d2511f17d2fe6a738/smmap-5.0.3-py3-none-any.whl", hash = "sha256:c106e05d5a61449cf6ba9a1e650227ecfb141590d2a98412103ff35d89fc7b2f" }, +] + [[package]] name = "sniffio" version = "1.3.1" @@ -2041,6 +2256,35 @@ wheels = [ { url = "https://mirrors.aliyun.com/pypi/packages/e9/44/75a9c9421471a6c4805dbf2356f7c181a29c1879239abab1ea2cc8f38b40/sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2" }, ] +[[package]] +name = "streamlit" +version = "1.56.0" +source = { registry = "https://mirrors.aliyun.com/pypi/simple/" } +dependencies = [ + { name = "altair" }, + { name = "blinker" }, + { name = "cachetools" }, + { name = "click" }, + { name = "gitpython" }, + { name = "numpy" }, + { name = "packaging" }, + { name = "pandas" }, + { name = "pillow" }, + { name = "protobuf" }, + { name = "pyarrow" }, + { name = "pydeck" }, + { name = "requests" }, + { name = "tenacity" }, + { name = "toml" }, + { name = "tornado" }, + { name = "typing-extensions" }, + { name = "watchdog", marker = "sys_platform != 'darwin'" }, +] +sdist = { url = "https://mirrors.aliyun.com/pypi/packages/03/85/7c669b3a1336d34ef39fa9760fbd343185f3b15db2ad0838fd78423d1c7f/streamlit-1.56.0.tar.gz", hash = "sha256:1176acfa89ae1318b79078e8efe689a9d02e8d58e325c00fc0e55fa2f3fe8d6a" } +wheels = [ + { url = "https://mirrors.aliyun.com/pypi/packages/e4/91/cb6f13a89e376ef179309d74f37a70ea0041d5e4b5ba5c4836dbf6e020ad/streamlit-1.56.0-py3-none-any.whl", hash = "sha256:8677a335734a30a51bc57ad0ec910e365d95f7c456fc02c60032927cd0729dc5" }, +] + [[package]] name = "suimemodeltraner" version = "0.1.0" @@ -2054,11 +2298,14 @@ dependencies = [ { name = "numpy" }, { name = "onnxruntime" }, { name = "pandas" }, + { name = "plotly" }, { name = "pypinyin" }, { name = "requests" }, { name = "rich" }, + { name = "streamlit" }, { name = "tensorboard" }, { name = "torch" }, + { name = "torchdata" }, { name = "transformers" }, { name = "typer" }, ] @@ -2079,11 +2326,14 @@ requires-dist = [ { name = "numpy", specifier = ">=2.4.2" }, { name = "onnxruntime", specifier = ">=1.24.2" }, { name = "pandas", specifier = ">=3.0.0" }, + { name = "plotly", specifier = ">=5.0.0" }, { name = "pypinyin", specifier = ">=0.55.0" }, { name = "requests", specifier = ">=2.32.5" }, { name = "rich", specifier = ">=14.3.1" }, + { name = "streamlit", specifier = ">=1.56.0" }, { name = "tensorboard", specifier = ">=2.20.0" }, { name = "torch", specifier = ">=2.10.0" }, + { name = "torchdata", specifier = ">=0.11.0" }, { name = "transformers", specifier = "==5.1.0" }, { name = "typer", specifier = ">=0.21.1" }, ] @@ -2106,6 +2356,15 @@ wheels = [ { url = "https://mirrors.aliyun.com/pypi/packages/a2/09/77d55d46fd61b4a135c444fc97158ef34a095e5681d0a6c10b75bf356191/sympy-1.14.0-py3-none-any.whl", hash = "sha256:e091cc3e99d2141a0ba2847328f5479b05d94a6635cb96148ccb3f34671bd8f5" }, ] +[[package]] +name = "tenacity" +version = "9.1.4" +source = { registry = "https://mirrors.aliyun.com/pypi/simple/" } +sdist = { url = "https://mirrors.aliyun.com/pypi/packages/47/c6/ee486fd809e357697ee8a44d3d69222b344920433d3b6666ccd9b374630c/tenacity-9.1.4.tar.gz", hash = "sha256:adb31d4c263f2bd041081ab33b498309a57c77f9acf2db65aadf0898179cf93a" } +wheels = [ + { url = "https://mirrors.aliyun.com/pypi/packages/d7/c1/eb8f9debc45d3b7918a32ab756658a0904732f75e555402972246b0b8e71/tenacity-9.1.4-py3-none-any.whl", hash = "sha256:6095a360c919085f28c6527de529e76a06ad89b23659fa881ae0649b867a9d55" }, +] + [[package]] name = "tensorboard" version = "2.20.0" @@ -2162,6 +2421,15 @@ wheels = [ { url = "https://mirrors.aliyun.com/pypi/packages/72/f4/0de46cfa12cdcbcd464cc59fde36912af405696f687e53a091fb432f694c/tokenizers-0.22.2-cp39-abi3-win_arm64.whl", hash = "sha256:9ce725d22864a1e965217204946f830c37876eee3b2ba6fc6255e8e903d5fcbc" }, ] +[[package]] +name = "toml" +version = "0.10.2" +source = { registry = "https://mirrors.aliyun.com/pypi/simple/" } +sdist = { url = "https://mirrors.aliyun.com/pypi/packages/be/ba/1f744cdc819428fc6b5084ec34d9b30660f6f9daaf70eead706e3203ec3c/toml-0.10.2.tar.gz", hash = "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f" } +wheels = [ + { url = "https://mirrors.aliyun.com/pypi/packages/44/6f/7120676b6d73228c96e17f1f794d8ab046fc910d781c8d151120c3f1569e/toml-0.10.2-py2.py3-none-any.whl", hash = "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b" }, +] + [[package]] name = "torch" version = "2.11.0" @@ -2205,6 +2473,19 @@ wheels = [ { url = "https://mirrors.aliyun.com/pypi/packages/cf/bf/c8d12a2c86dbfd7f40fb2f56fbf5a505ccf2d9ce131eb559dfc7c51e1a04/torch-2.11.0-cp314-cp314t-win_amd64.whl", hash = "sha256:b2a43985ff5ef6ddd923bbcf99943e5f58059805787c5c9a2622bf05ca2965b0" }, ] +[[package]] +name = "torchdata" +version = "0.11.0" +source = { registry = "https://mirrors.aliyun.com/pypi/simple/" } +dependencies = [ + { name = "requests" }, + { name = "torch" }, + { name = "urllib3" }, +] +wheels = [ + { url = "https://mirrors.aliyun.com/pypi/packages/95/d4/af694ef718aedbe95a72760ab9ff7a6a7a44ace2d7f70c27bfeb67c5c503/torchdata-0.11.0-py3-none-any.whl", hash = "sha256:52b940fbbe0e00fb21cabddf528449d1bec5bfb0d0823b7487b15f951658ee33" }, +] + [[package]] name = "tornado" version = "6.5.5" @@ -2337,6 +2618,24 @@ wheels = [ { url = "https://mirrors.aliyun.com/pypi/packages/39/08/aaaad47bc4e9dc8c725e68f9d04865dbcb2052843ff09c97b08904852d84/urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4" }, ] +[[package]] +name = "watchdog" +version = "6.0.0" +source = { registry = "https://mirrors.aliyun.com/pypi/simple/" } +sdist = { url = "https://mirrors.aliyun.com/pypi/packages/db/7d/7f3d619e951c88ed75c6037b246ddcf2d322812ee8ea189be89511721d54/watchdog-6.0.0.tar.gz", hash = "sha256:9ddf7c82fda3ae8e24decda1338ede66e1c99883db93711d8fb941eaa2d8c282" } +wheels = [ + { url = "https://mirrors.aliyun.com/pypi/packages/a9/c7/ca4bf3e518cb57a686b2feb4f55a1892fd9a3dd13f470fca14e00f80ea36/watchdog-6.0.0-py3-none-manylinux2014_aarch64.whl", hash = "sha256:7607498efa04a3542ae3e05e64da8202e58159aa1fa4acddf7678d34a35d4f13" }, + { url = "https://mirrors.aliyun.com/pypi/packages/5c/51/d46dc9332f9a647593c947b4b88e2381c8dfc0942d15b8edc0310fa4abb1/watchdog-6.0.0-py3-none-manylinux2014_armv7l.whl", hash = "sha256:9041567ee8953024c83343288ccc458fd0a2d811d6a0fd68c4c22609e3490379" }, + { url = "https://mirrors.aliyun.com/pypi/packages/d4/57/04edbf5e169cd318d5f07b4766fee38e825d64b6913ca157ca32d1a42267/watchdog-6.0.0-py3-none-manylinux2014_i686.whl", hash = "sha256:82dc3e3143c7e38ec49d61af98d6558288c415eac98486a5c581726e0737c00e" }, + { url = "https://mirrors.aliyun.com/pypi/packages/ab/cc/da8422b300e13cb187d2203f20b9253e91058aaf7db65b74142013478e66/watchdog-6.0.0-py3-none-manylinux2014_ppc64.whl", hash = "sha256:212ac9b8bf1161dc91bd09c048048a95ca3a4c4f5e5d4a7d1b1a7d5752a7f96f" }, + { url = "https://mirrors.aliyun.com/pypi/packages/2c/3b/b8964e04ae1a025c44ba8e4291f86e97fac443bca31de8bd98d3263d2fcf/watchdog-6.0.0-py3-none-manylinux2014_ppc64le.whl", hash = "sha256:e3df4cbb9a450c6d49318f6d14f4bbc80d763fa587ba46ec86f99f9e6876bb26" }, + { url = "https://mirrors.aliyun.com/pypi/packages/62/ae/a696eb424bedff7407801c257d4b1afda455fe40821a2be430e173660e81/watchdog-6.0.0-py3-none-manylinux2014_s390x.whl", hash = "sha256:2cce7cfc2008eb51feb6aab51251fd79b85d9894e98ba847408f662b3395ca3c" }, + { url = "https://mirrors.aliyun.com/pypi/packages/b5/e8/dbf020b4d98251a9860752a094d09a65e1b436ad181faf929983f697048f/watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl", hash = "sha256:20ffe5b202af80ab4266dcd3e91aae72bf2da48c0d33bdb15c66658e685e94e2" }, + { url = "https://mirrors.aliyun.com/pypi/packages/07/f6/d0e5b343768e8bcb4cda79f0f2f55051bf26177ecd5651f84c07567461cf/watchdog-6.0.0-py3-none-win32.whl", hash = "sha256:07df1fdd701c5d4c8e55ef6cf55b8f0120fe1aef7ef39a1c6fc6bc2e606d517a" }, + { url = "https://mirrors.aliyun.com/pypi/packages/db/d9/c495884c6e548fce18a8f40568ff120bc3a4b7b99813081c8ac0c936fa64/watchdog-6.0.0-py3-none-win_amd64.whl", hash = "sha256:cbafb470cf848d93b5d013e2ecb245d4aa1c8fd0504e863ccefa32445359d680" }, + { url = "https://mirrors.aliyun.com/pypi/packages/33/e8/e40370e6d74ddba47f002a32919d91310d6074130fe4e17dabcafc15cbf1/watchdog-6.0.0-py3-none-win_ia64.whl", hash = "sha256:a1914259fa9e1454315171103c6a30961236f508b9b623eae470268bbcc6a22f" }, +] + [[package]] name = "werkzeug" version = "3.1.7"