Files
video-flow/scripts/migrate_projects.py
2026-01-09 14:09:16 +08:00

129 lines
4.5 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""
迁移脚本:将旧版 JSON 项目文件导入到 SQLite 数据库(用于 8503 调试)
关键点:
- 不假设 legacy JSON 与当前 Streamlit UI schema 一致
- 使用 `modules.legacy_normalizer.normalize_legacy_project()` 做纯规则规范化
- 保留 `_legacy`,确保信息不丢失
"""
import json
import sys
from pathlib import Path
# 添加项目根目录到路径
sys.path.insert(0, str(Path(__file__).parent.parent))
import config
from modules.db_manager import db
from modules.legacy_normalizer import normalize_legacy_project
def migrate_json_projects(temp_dir: str = None, force: bool = False):
"""从 temp 目录读取 project_*.json 文件并导入数据库"""
if temp_dir is None:
temp_dir = config.TEMP_DIR
temp_path = Path(temp_dir)
if not temp_path.exists():
print(f"❌ temp 目录不存在: {temp_path}")
return
# 查找所有项目 JSON 文件
json_files = list(temp_path.glob("project_*.json"))
if not json_files:
print(f"⚠️ 未找到项目文件: {temp_path}/project_*.json")
return
print(f"📂 找到 {len(json_files)} 个项目文件")
imported = 0
updated = 0
skipped = 0
errors = 0
for json_file in json_files:
try:
project_id = json_file.stem.replace("project_", "")
# 读取 JSON 文件
with open(json_file, "r", encoding="utf-8") as f:
data = json.load(f)
# 检查是否已存在
existing = db.get_project(project_id)
if existing and not force:
print(f" ⏭️ 跳过已存在: {project_id}")
skipped += 1
continue
# 产品信息:用于 Step1 回显与留存
# 注意legacy 的 image_urls 多为远端 URL当前 Streamlit Step1 使用 uploaded_images(本地路径)。
product_info = {
"prompt": data.get("prompt", ""),
"image_urls": data.get("image_urls", []),
"analysis": data.get("analysis", ""),
"questions": data.get("questions", []),
"answers": data.get("answers", {}),
"uploaded_images": [], # legacy 无本地上传图路径
"_legacy": data,
}
# 获取项目名称
name = data.get("prompt", "")[:50] if data.get("prompt") else f"项目 {project_id}"
# 规范化脚本数据:对齐当前 UI schema并保留 legacy
script_data = normalize_legacy_project(data)
if existing and force:
# 更新现有项目
if script_data:
db.update_project_script(project_id, script_data)
status = data.get("status", "draft")
db.update_project_status(project_id, status)
print(f" 🔄 更新成功: {project_id} ({name[:30]}...)")
updated += 1
else:
# 创建新项目
db.create_project(project_id, name, product_info)
# 更新脚本
if script_data:
db.update_project_script(project_id, script_data)
# 更新状态
status = data.get("status", "draft")
db.update_project_status(project_id, status)
print(f" ✅ 导入成功: {project_id} ({name[:30]}...)")
imported += 1
except Exception as e:
print(f" ❌ 导入失败 {json_file.name}: {e}")
import traceback
traceback.print_exc()
errors += 1
print(f"\n📊 迁移完成:")
print(f" ✅ 新导入: {imported}")
print(f" 🔄 已更新: {updated}")
print(f" ⏭️ 已跳过: {skipped}")
print(f" ❌ 失败: {errors}")
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(description="迁移旧版项目到数据库")
parser.add_argument("--temp-dir", type=str, default=None,
help="temp 目录路径 (默认使用 config.TEMP_DIR)")
parser.add_argument("--force", action="store_true",
help="强制更新已存在的项目")
args = parser.parse_args()
print("🚀 开始迁移项目数据...")
migrate_json_projects(args.temp_dir, args.force)