chore: sync code and project files

2026-01-09 14:09:16 +08:00
parent 3d1fb37769
commit 30d7eb4b35
94 changed files with 12706 additions and 255 deletions
--- a/scripts/import_stickers_manifest.py
+++ b/scripts/import_stickers_manifest.py
@@ -0,0 +1,95 @@
+"""
+根据 manifest 批量导入贴纸到 assets/stickers_builtin，并生成 index.json。
+
+用法：
+  python3 scripts/import_stickers_manifest.py --manifest stickers_manifest.json
+
+manifest 示例：
+{
+  "pack": {
+    "id": "fluent-emoji-subset",
+    "name": "Fluent Emoji 子集（抖音常用）",
+    "license": "MIT (CHECK BEFORE PROD)",
+    "attribution": "Microsoft Fluent UI Emoji"
+  },
+  "categories": [
+    {
+      "id": "douyin-basic",
+      "name": "抖音常用",
+      "items": [
+        {"id": "fire", "name": "火", "url": "https://.../fire.png", "tags": ["火","爆款"]},
+        {"id": "heart", "name": "爱心", "url": "https://.../heart.png", "tags": ["点赞","互动"]}
+      ]
+    }
+  ]
+}
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import os
+from pathlib import Path
+from urllib.request import urlopen, Request
+
+
+def _safe_name(s: str) -> str:
+    return "".join(ch if ch.isalnum() or ch in ("-", "_") else "_" for ch in (s or ""))[:80] or "item"
+
+
+def download(url: str, out: Path) -> None:
+    out.parent.mkdir(parents=True, exist_ok=True)
+    req = Request(url, headers={"User-Agent": "video-flow-stickers/1.0"})
+    with urlopen(req, timeout=60) as r:
+        out.write_bytes(r.read())
+
+
+def main() -> int:
+    ap = argparse.ArgumentParser()
+    ap.add_argument("--manifest", required=True, help="manifest json path")
+    ap.add_argument("--out-dir", default="assets/stickers_builtin", help="output directory")
+    args = ap.parse_args()
+
+    manifest_path = Path(args.manifest)
+    data = json.loads(manifest_path.read_text(encoding="utf-8"))
+
+    out_dir = Path(args.out_dir)
+    out_dir.mkdir(parents=True, exist_ok=True)
+
+    pack = data.get("pack") or {}
+    categories = data.get("categories") or []
+
+    # 下载并重写 file 字段（落地为本地文件）
+    for cat in categories:
+        for it in (cat.get("items") or []):
+            url = str(it.get("url") or "")
+            if not url:
+                continue
+            ext = Path(url.split("?")[0]).suffix.lower()
+            if ext not in [".png", ".svg", ".webp"]:
+                ext = ".png"
+            fid = _safe_name(str(it.get("id") or it.get("name") or "item"))
+            fname = f"{fid}{ext}"
+            target = out_dir / fname
+            if not target.exists():
+                print(f"download: {url} -> {target}")
+                download(url, target)
+            it["file"] = fname
+            it.pop("url", None)
+
+    # 输出 index.json
+    out_index = out_dir / "index.json"
+    out_index.write_text(
+        json.dumps({"pack": pack, "categories": categories}, ensure_ascii=False, indent=2),
+        encoding="utf-8",
+    )
+    print(f"written: {out_index}")
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
+
+
+
--- a/scripts/migrate_projects.py
+++ b/scripts/migrate_projects.py
@@ -0,0 +1,128 @@
+#!/usr/bin/env python3
+"""
+迁移脚本：将旧版 JSON 项目文件导入到 SQLite 数据库（用于 8503 调试）
+
+关键点：
+- 不假设 legacy JSON 与当前 Streamlit UI schema 一致
+- 使用 `modules.legacy_normalizer.normalize_legacy_project()` 做纯规则规范化
+- 保留 `_legacy`，确保信息不丢失
+"""
+import json
+import sys
+from pathlib import Path
+
+# 添加项目根目录到路径
+sys.path.insert(0, str(Path(__file__).parent.parent))
+
+import config
+from modules.db_manager import db
+from modules.legacy_normalizer import normalize_legacy_project
+
+def migrate_json_projects(temp_dir: str = None, force: bool = False):
+    """从 temp 目录读取 project_*.json 文件并导入数据库"""
+    
+    if temp_dir is None:
+        temp_dir = config.TEMP_DIR
+    
+    temp_path = Path(temp_dir)
+    
+    if not temp_path.exists():
+        print(f"❌ temp 目录不存在: {temp_path}")
+        return
+    
+    # 查找所有项目 JSON 文件
+    json_files = list(temp_path.glob("project_*.json"))
+    
+    if not json_files:
+        print(f"⚠️ 未找到项目文件: {temp_path}/project_*.json")
+        return
+    
+    print(f"📂 找到 {len(json_files)} 个项目文件")
+    
+    imported = 0
+    updated = 0
+    skipped = 0
+    errors = 0
+    
+    for json_file in json_files:
+        try:
+            project_id = json_file.stem.replace("project_", "")
+            
+            # 读取 JSON 文件
+            with open(json_file, "r", encoding="utf-8") as f:
+                data = json.load(f)
+            
+            # 检查是否已存在
+            existing = db.get_project(project_id)
+            if existing and not force:
+                print(f"  ⏭️ 跳过已存在: {project_id}")
+                skipped += 1
+                continue
+            
+            # 产品信息：用于 Step1 回显与留存
+            # 注意：legacy 的 image_urls 多为远端 URL；当前 Streamlit Step1 使用 uploaded_images(本地路径)。
+            product_info = {
+                "prompt": data.get("prompt", ""),
+                "image_urls": data.get("image_urls", []),
+                "analysis": data.get("analysis", ""),
+                "questions": data.get("questions", []),
+                "answers": data.get("answers", {}),
+                "uploaded_images": [],  # legacy 无本地上传图路径
+                "_legacy": data,
+            }
+            
+            # 获取项目名称
+            name = data.get("prompt", "")[:50] if data.get("prompt") else f"项目 {project_id}"
+            
+            # 规范化脚本数据：对齐当前 UI schema（并保留 legacy）
+            script_data = normalize_legacy_project(data)
+            
+            if existing and force:
+                # 更新现有项目
+                if script_data:
+                    db.update_project_script(project_id, script_data)
+                status = data.get("status", "draft")
+                db.update_project_status(project_id, status)
+                print(f"  🔄 更新成功: {project_id} ({name[:30]}...)")
+                updated += 1
+            else:
+                # 创建新项目
+                db.create_project(project_id, name, product_info)
+                
+                # 更新脚本
+                if script_data:
+                    db.update_project_script(project_id, script_data)
+                
+                # 更新状态
+                status = data.get("status", "draft")
+                db.update_project_status(project_id, status)
+                
+                print(f"  ✅ 导入成功: {project_id} ({name[:30]}...)")
+                imported += 1
+            
+        except Exception as e:
+            print(f"  ❌ 导入失败 {json_file.name}: {e}")
+            import traceback
+            traceback.print_exc()
+            errors += 1
+    
+    print(f"\n📊 迁移完成:")
+    print(f"   ✅ 新导入: {imported}")
+    print(f"   🔄 已更新: {updated}")
+    print(f"   ⏭️ 已跳过: {skipped}")
+    print(f"   ❌ 失败: {errors}")
+
+
+if __name__ == "__main__":
+    import argparse
+    
+    parser = argparse.ArgumentParser(description="迁移旧版项目到数据库")
+    parser.add_argument("--temp-dir", type=str, default=None, 
+                        help="temp 目录路径 (默认使用 config.TEMP_DIR)")
+    parser.add_argument("--force", action="store_true",
+                        help="强制更新已存在的项目")
+    
+    args = parser.parse_args()
+    
+    print("🚀 开始迁移项目数据...")
+    migrate_json_projects(args.temp_dir, args.force)
--- a/scripts/migrate_users_and_owner.py
+++ b/scripts/migrate_users_and_owner.py
@@ -0,0 +1,30 @@
+"""
+One-off migration:
+- Ensure admin exists
+- Backfill projects.owner_user_id to admin for legacy projects
+
+Usage:
+  python3 scripts/migrate_users_and_owner.py
+"""
+
+import os
+import sys
+
+# Ensure repo root is on sys.path when executed from scripts/ directory
+REPO_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
+if REPO_ROOT not in sys.path:
+    sys.path.insert(0, REPO_ROOT)
+
+from modules.db_manager import db
+
+
+def main():
+    admin_id = db.ensure_admin_user("admin", "admin1234")
+    n = db.migrate_projects_owner_to(admin_id)
+    print(f"admin_id={admin_id} backfilled_projects={n}")
+
+
+if __name__ == "__main__":
+    main()
+
+
--- a/scripts/scan_legacy_schema.py
+++ b/scripts/scan_legacy_schema.py
@@ -0,0 +1,232 @@
+#!/usr/bin/env python3
+"""
+Scan legacy project JSON schemas under temp dir.
+
+Purpose:
+- Identify schema variants for /opt/gloda-factory/temp/project_*.json
+- Produce a machine-readable summary + a markdown report
+
+This script is READ-ONLY.
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+from collections import Counter, defaultdict
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any, Dict, List, Tuple
+
+
+def _safe_load_json(path: Path) -> Dict[str, Any] | None:
+    try:
+        return json.loads(path.read_text(encoding="utf-8"))
+    except Exception:
+        return None
+
+
+def _type_name(v: Any) -> str:
+    if v is None:
+        return "null"
+    if isinstance(v, bool):
+        return "bool"
+    if isinstance(v, int):
+        return "int"
+    if isinstance(v, float):
+        return "float"
+    if isinstance(v, str):
+        return "str"
+    if isinstance(v, list):
+        return "list"
+    if isinstance(v, dict):
+        return "dict"
+    return type(v).__name__
+
+
+def _detect_schema_variant(doc: Dict[str, Any]) -> str:
+    """
+    Heuristic:
+    - Schema_A: scenes contain prompt-like fields (image_prompt/visual_prompt/video_prompt)
+    - Schema_B: scenes do NOT contain these, but contain keyframe/story_beat/camera_movement/image_url
+    """
+    scenes = doc.get("scenes") or []
+    if not isinstance(scenes, list):
+        return "Unknown"
+
+    prompt_keys = {"image_prompt", "visual_prompt", "video_prompt"}
+    seen_prompt = False
+    for s in scenes:
+        if isinstance(s, dict) and (set(s.keys()) & prompt_keys):
+            seen_prompt = True
+            break
+
+    if seen_prompt:
+        return "Schema_A"
+
+    # If no prompt keys, but has typical B keys, call Schema_B
+    typical_b = {"keyframe", "story_beat", "camera_movement", "image_url"}
+    seen_b = False
+    for s in scenes:
+        if isinstance(s, dict) and (set(s.keys()) & typical_b):
+            seen_b = True
+            break
+
+    return "Schema_B" if seen_b else "Unknown"
+
+
+@dataclass
+class ScanResult:
+    total_files: int
+    parsed_files: int
+    failed_files: int
+    schema_counts: Counter
+    top_level_key_counts: Counter
+    scene_key_counts: Counter
+    cta_type_counts: Counter
+    sample_by_schema: Dict[str, List[str]]
+
+
+def scan_dir(temp_dir: Path) -> ScanResult:
+    files = sorted(temp_dir.glob("project_*.json"))
+    schema_counts: Counter = Counter()
+    top_level_key_counts: Counter = Counter()
+    scene_key_counts: Counter = Counter()
+    cta_type_counts: Counter = Counter()
+    sample_by_schema: Dict[str, List[str]] = defaultdict(list)
+
+    parsed = 0
+    failed = 0
+
+    for f in files:
+        doc = _safe_load_json(f)
+        if not isinstance(doc, dict):
+            failed += 1
+            continue
+        parsed += 1
+
+        schema = _detect_schema_variant(doc)
+        schema_counts[schema] += 1
+        if len(sample_by_schema[schema]) < 5:
+            pid = str(doc.get("id") or f.stem.replace("project_", ""))
+            sample_by_schema[schema].append(pid)
+
+        # top-level keys
+        for k in doc.keys():
+            top_level_key_counts[k] += 1
+
+        # scenes keys
+        scenes = doc.get("scenes") or []
+        if isinstance(scenes, list):
+            for s in scenes:
+                if isinstance(s, dict):
+                    for k in s.keys():
+                        scene_key_counts[k] += 1
+
+        # cta type
+        cta_type_counts[_type_name(doc.get("cta"))] += 1
+
+    return ScanResult(
+        total_files=len(files),
+        parsed_files=parsed,
+        failed_files=failed,
+        schema_counts=schema_counts,
+        top_level_key_counts=top_level_key_counts,
+        scene_key_counts=scene_key_counts,
+        cta_type_counts=cta_type_counts,
+        sample_by_schema=dict(sample_by_schema),
+    )
+
+
+def _to_jsonable(sr: ScanResult) -> Dict[str, Any]:
+    return {
+        "total_files": sr.total_files,
+        "parsed_files": sr.parsed_files,
+        "failed_files": sr.failed_files,
+        "schema_counts": dict(sr.schema_counts),
+        "cta_type_counts": dict(sr.cta_type_counts),
+        "top_level_key_counts": dict(sr.top_level_key_counts),
+        "scene_key_counts": dict(sr.scene_key_counts),
+        "sample_by_schema": sr.sample_by_schema,
+    }
+
+
+def _render_markdown(sr: ScanResult, temp_dir: Path) -> str:
+    lines: List[str] = []
+    lines.append("# Legacy Project JSON Schema Scan Report\n")
+    lines.append(f"- temp_dir: `{temp_dir}`")
+    lines.append(f"- total_files: {sr.total_files}")
+    lines.append(f"- parsed_files: {sr.parsed_files}")
+    lines.append(f"- failed_files: {sr.failed_files}\n")
+
+    lines.append("## Schema variants\n")
+    for k, v in sr.schema_counts.most_common():
+        samples = ", ".join(sr.sample_by_schema.get(k, [])[:5])
+        lines.append(f"- {k}: {v} (samples: {samples})")
+    lines.append("")
+
+    lines.append("## CTA type distribution\n")
+    for k, v in sr.cta_type_counts.most_common():
+        lines.append(f"- {k}: {v}")
+    lines.append("")
+
+    def _topn(counter: Counter, n: int = 30) -> List[Tuple[str, int]]:
+        return counter.most_common(n)
+
+    lines.append("## Top-level keys (top 30)\n")
+    for k, v in _topn(sr.top_level_key_counts, 30):
+        lines.append(f"- {k}: {v}/{sr.parsed_files}")
+    lines.append("")
+
+    lines.append("## Scene keys (top 40)\n")
+    for k, v in _topn(sr.scene_key_counts, 40):
+        lines.append(f"- {k}: {v}")
+    lines.append("")
+
+    return "\n".join(lines) + "\n"
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser(description="Scan legacy project JSON schemas")
+    parser.add_argument("--temp-dir", required=True, help="Directory containing project_*.json")
+    parser.add_argument("--out-json", required=False, help="Write summary json to path")
+    parser.add_argument("--out-md", required=False, help="Write markdown report to path")
+    args = parser.parse_args()
+
+    temp_dir = Path(args.temp_dir)
+    if not temp_dir.exists():
+        raise SystemExit(f"temp dir not found: {temp_dir}")
+
+    sr = scan_dir(temp_dir)
+    payload = _to_jsonable(sr)
+
+    print(json.dumps(payload, ensure_ascii=False, indent=2))
+
+    if args.out_json:
+        out_json = Path(args.out_json)
+        out_json.parent.mkdir(parents=True, exist_ok=True)
+        out_json.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")
+
+    if args.out_md:
+        out_md = Path(args.out_md)
+        out_md.parent.mkdir(parents=True, exist_ok=True)
+        out_md.write_text(_render_markdown(sr, temp_dir), encoding="utf-8")
+
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
+
+
+
+
+
+
+
+
+
+
+
+
+