chore: sync code and project files

This commit is contained in:
Tony Zhang
2026-01-09 14:09:16 +08:00
parent 3d1fb37769
commit 30d7eb4b35
94 changed files with 12706 additions and 255 deletions

View File

@@ -0,0 +1,95 @@
"""
根据 manifest 批量导入贴纸到 assets/stickers_builtin并生成 index.json。
用法:
python3 scripts/import_stickers_manifest.py --manifest stickers_manifest.json
manifest 示例:
{
"pack": {
"id": "fluent-emoji-subset",
"name": "Fluent Emoji 子集(抖音常用)",
"license": "MIT (CHECK BEFORE PROD)",
"attribution": "Microsoft Fluent UI Emoji"
},
"categories": [
{
"id": "douyin-basic",
"name": "抖音常用",
"items": [
{"id": "fire", "name": "", "url": "https://.../fire.png", "tags": ["","爆款"]},
{"id": "heart", "name": "爱心", "url": "https://.../heart.png", "tags": ["点赞","互动"]}
]
}
]
}
"""
from __future__ import annotations
import argparse
import json
import os
from pathlib import Path
from urllib.request import urlopen, Request
def _safe_name(s: str) -> str:
return "".join(ch if ch.isalnum() or ch in ("-", "_") else "_" for ch in (s or ""))[:80] or "item"
def download(url: str, out: Path) -> None:
out.parent.mkdir(parents=True, exist_ok=True)
req = Request(url, headers={"User-Agent": "video-flow-stickers/1.0"})
with urlopen(req, timeout=60) as r:
out.write_bytes(r.read())
def main() -> int:
ap = argparse.ArgumentParser()
ap.add_argument("--manifest", required=True, help="manifest json path")
ap.add_argument("--out-dir", default="assets/stickers_builtin", help="output directory")
args = ap.parse_args()
manifest_path = Path(args.manifest)
data = json.loads(manifest_path.read_text(encoding="utf-8"))
out_dir = Path(args.out_dir)
out_dir.mkdir(parents=True, exist_ok=True)
pack = data.get("pack") or {}
categories = data.get("categories") or []
# 下载并重写 file 字段(落地为本地文件)
for cat in categories:
for it in (cat.get("items") or []):
url = str(it.get("url") or "")
if not url:
continue
ext = Path(url.split("?")[0]).suffix.lower()
if ext not in [".png", ".svg", ".webp"]:
ext = ".png"
fid = _safe_name(str(it.get("id") or it.get("name") or "item"))
fname = f"{fid}{ext}"
target = out_dir / fname
if not target.exists():
print(f"download: {url} -> {target}")
download(url, target)
it["file"] = fname
it.pop("url", None)
# 输出 index.json
out_index = out_dir / "index.json"
out_index.write_text(
json.dumps({"pack": pack, "categories": categories}, ensure_ascii=False, indent=2),
encoding="utf-8",
)
print(f"written: {out_index}")
return 0
if __name__ == "__main__":
raise SystemExit(main())

128
scripts/migrate_projects.py Normal file
View File

@@ -0,0 +1,128 @@
#!/usr/bin/env python3
"""
迁移脚本:将旧版 JSON 项目文件导入到 SQLite 数据库(用于 8503 调试)
关键点:
- 不假设 legacy JSON 与当前 Streamlit UI schema 一致
- 使用 `modules.legacy_normalizer.normalize_legacy_project()` 做纯规则规范化
- 保留 `_legacy`,确保信息不丢失
"""
import json
import sys
from pathlib import Path
# 添加项目根目录到路径
sys.path.insert(0, str(Path(__file__).parent.parent))
import config
from modules.db_manager import db
from modules.legacy_normalizer import normalize_legacy_project
def migrate_json_projects(temp_dir: str = None, force: bool = False):
"""从 temp 目录读取 project_*.json 文件并导入数据库"""
if temp_dir is None:
temp_dir = config.TEMP_DIR
temp_path = Path(temp_dir)
if not temp_path.exists():
print(f"❌ temp 目录不存在: {temp_path}")
return
# 查找所有项目 JSON 文件
json_files = list(temp_path.glob("project_*.json"))
if not json_files:
print(f"⚠️ 未找到项目文件: {temp_path}/project_*.json")
return
print(f"📂 找到 {len(json_files)} 个项目文件")
imported = 0
updated = 0
skipped = 0
errors = 0
for json_file in json_files:
try:
project_id = json_file.stem.replace("project_", "")
# 读取 JSON 文件
with open(json_file, "r", encoding="utf-8") as f:
data = json.load(f)
# 检查是否已存在
existing = db.get_project(project_id)
if existing and not force:
print(f" ⏭️ 跳过已存在: {project_id}")
skipped += 1
continue
# 产品信息:用于 Step1 回显与留存
# 注意legacy 的 image_urls 多为远端 URL当前 Streamlit Step1 使用 uploaded_images(本地路径)。
product_info = {
"prompt": data.get("prompt", ""),
"image_urls": data.get("image_urls", []),
"analysis": data.get("analysis", ""),
"questions": data.get("questions", []),
"answers": data.get("answers", {}),
"uploaded_images": [], # legacy 无本地上传图路径
"_legacy": data,
}
# 获取项目名称
name = data.get("prompt", "")[:50] if data.get("prompt") else f"项目 {project_id}"
# 规范化脚本数据:对齐当前 UI schema并保留 legacy
script_data = normalize_legacy_project(data)
if existing and force:
# 更新现有项目
if script_data:
db.update_project_script(project_id, script_data)
status = data.get("status", "draft")
db.update_project_status(project_id, status)
print(f" 🔄 更新成功: {project_id} ({name[:30]}...)")
updated += 1
else:
# 创建新项目
db.create_project(project_id, name, product_info)
# 更新脚本
if script_data:
db.update_project_script(project_id, script_data)
# 更新状态
status = data.get("status", "draft")
db.update_project_status(project_id, status)
print(f" ✅ 导入成功: {project_id} ({name[:30]}...)")
imported += 1
except Exception as e:
print(f" ❌ 导入失败 {json_file.name}: {e}")
import traceback
traceback.print_exc()
errors += 1
print(f"\n📊 迁移完成:")
print(f" ✅ 新导入: {imported}")
print(f" 🔄 已更新: {updated}")
print(f" ⏭️ 已跳过: {skipped}")
print(f" ❌ 失败: {errors}")
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(description="迁移旧版项目到数据库")
parser.add_argument("--temp-dir", type=str, default=None,
help="temp 目录路径 (默认使用 config.TEMP_DIR)")
parser.add_argument("--force", action="store_true",
help="强制更新已存在的项目")
args = parser.parse_args()
print("🚀 开始迁移项目数据...")
migrate_json_projects(args.temp_dir, args.force)

View File

@@ -0,0 +1,30 @@
"""
One-off migration:
- Ensure admin exists
- Backfill projects.owner_user_id to admin for legacy projects
Usage:
python3 scripts/migrate_users_and_owner.py
"""
import os
import sys
# Ensure repo root is on sys.path when executed from scripts/ directory
REPO_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
if REPO_ROOT not in sys.path:
sys.path.insert(0, REPO_ROOT)
from modules.db_manager import db
def main():
admin_id = db.ensure_admin_user("admin", "admin1234")
n = db.migrate_projects_owner_to(admin_id)
print(f"admin_id={admin_id} backfilled_projects={n}")
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,232 @@
#!/usr/bin/env python3
"""
Scan legacy project JSON schemas under temp dir.
Purpose:
- Identify schema variants for /opt/gloda-factory/temp/project_*.json
- Produce a machine-readable summary + a markdown report
This script is READ-ONLY.
"""
from __future__ import annotations
import argparse
import json
from collections import Counter, defaultdict
from dataclasses import dataclass
from pathlib import Path
from typing import Any, Dict, List, Tuple
def _safe_load_json(path: Path) -> Dict[str, Any] | None:
try:
return json.loads(path.read_text(encoding="utf-8"))
except Exception:
return None
def _type_name(v: Any) -> str:
if v is None:
return "null"
if isinstance(v, bool):
return "bool"
if isinstance(v, int):
return "int"
if isinstance(v, float):
return "float"
if isinstance(v, str):
return "str"
if isinstance(v, list):
return "list"
if isinstance(v, dict):
return "dict"
return type(v).__name__
def _detect_schema_variant(doc: Dict[str, Any]) -> str:
"""
Heuristic:
- Schema_A: scenes contain prompt-like fields (image_prompt/visual_prompt/video_prompt)
- Schema_B: scenes do NOT contain these, but contain keyframe/story_beat/camera_movement/image_url
"""
scenes = doc.get("scenes") or []
if not isinstance(scenes, list):
return "Unknown"
prompt_keys = {"image_prompt", "visual_prompt", "video_prompt"}
seen_prompt = False
for s in scenes:
if isinstance(s, dict) and (set(s.keys()) & prompt_keys):
seen_prompt = True
break
if seen_prompt:
return "Schema_A"
# If no prompt keys, but has typical B keys, call Schema_B
typical_b = {"keyframe", "story_beat", "camera_movement", "image_url"}
seen_b = False
for s in scenes:
if isinstance(s, dict) and (set(s.keys()) & typical_b):
seen_b = True
break
return "Schema_B" if seen_b else "Unknown"
@dataclass
class ScanResult:
total_files: int
parsed_files: int
failed_files: int
schema_counts: Counter
top_level_key_counts: Counter
scene_key_counts: Counter
cta_type_counts: Counter
sample_by_schema: Dict[str, List[str]]
def scan_dir(temp_dir: Path) -> ScanResult:
files = sorted(temp_dir.glob("project_*.json"))
schema_counts: Counter = Counter()
top_level_key_counts: Counter = Counter()
scene_key_counts: Counter = Counter()
cta_type_counts: Counter = Counter()
sample_by_schema: Dict[str, List[str]] = defaultdict(list)
parsed = 0
failed = 0
for f in files:
doc = _safe_load_json(f)
if not isinstance(doc, dict):
failed += 1
continue
parsed += 1
schema = _detect_schema_variant(doc)
schema_counts[schema] += 1
if len(sample_by_schema[schema]) < 5:
pid = str(doc.get("id") or f.stem.replace("project_", ""))
sample_by_schema[schema].append(pid)
# top-level keys
for k in doc.keys():
top_level_key_counts[k] += 1
# scenes keys
scenes = doc.get("scenes") or []
if isinstance(scenes, list):
for s in scenes:
if isinstance(s, dict):
for k in s.keys():
scene_key_counts[k] += 1
# cta type
cta_type_counts[_type_name(doc.get("cta"))] += 1
return ScanResult(
total_files=len(files),
parsed_files=parsed,
failed_files=failed,
schema_counts=schema_counts,
top_level_key_counts=top_level_key_counts,
scene_key_counts=scene_key_counts,
cta_type_counts=cta_type_counts,
sample_by_schema=dict(sample_by_schema),
)
def _to_jsonable(sr: ScanResult) -> Dict[str, Any]:
return {
"total_files": sr.total_files,
"parsed_files": sr.parsed_files,
"failed_files": sr.failed_files,
"schema_counts": dict(sr.schema_counts),
"cta_type_counts": dict(sr.cta_type_counts),
"top_level_key_counts": dict(sr.top_level_key_counts),
"scene_key_counts": dict(sr.scene_key_counts),
"sample_by_schema": sr.sample_by_schema,
}
def _render_markdown(sr: ScanResult, temp_dir: Path) -> str:
lines: List[str] = []
lines.append("# Legacy Project JSON Schema Scan Report\n")
lines.append(f"- temp_dir: `{temp_dir}`")
lines.append(f"- total_files: {sr.total_files}")
lines.append(f"- parsed_files: {sr.parsed_files}")
lines.append(f"- failed_files: {sr.failed_files}\n")
lines.append("## Schema variants\n")
for k, v in sr.schema_counts.most_common():
samples = ", ".join(sr.sample_by_schema.get(k, [])[:5])
lines.append(f"- {k}: {v} (samples: {samples})")
lines.append("")
lines.append("## CTA type distribution\n")
for k, v in sr.cta_type_counts.most_common():
lines.append(f"- {k}: {v}")
lines.append("")
def _topn(counter: Counter, n: int = 30) -> List[Tuple[str, int]]:
return counter.most_common(n)
lines.append("## Top-level keys (top 30)\n")
for k, v in _topn(sr.top_level_key_counts, 30):
lines.append(f"- {k}: {v}/{sr.parsed_files}")
lines.append("")
lines.append("## Scene keys (top 40)\n")
for k, v in _topn(sr.scene_key_counts, 40):
lines.append(f"- {k}: {v}")
lines.append("")
return "\n".join(lines) + "\n"
def main() -> int:
parser = argparse.ArgumentParser(description="Scan legacy project JSON schemas")
parser.add_argument("--temp-dir", required=True, help="Directory containing project_*.json")
parser.add_argument("--out-json", required=False, help="Write summary json to path")
parser.add_argument("--out-md", required=False, help="Write markdown report to path")
args = parser.parse_args()
temp_dir = Path(args.temp_dir)
if not temp_dir.exists():
raise SystemExit(f"temp dir not found: {temp_dir}")
sr = scan_dir(temp_dir)
payload = _to_jsonable(sr)
print(json.dumps(payload, ensure_ascii=False, indent=2))
if args.out_json:
out_json = Path(args.out_json)
out_json.parent.mkdir(parents=True, exist_ok=True)
out_json.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")
if args.out_md:
out_md = Path(args.out_md)
out_md.parent.mkdir(parents=True, exist_ok=True)
out_md.write_text(_render_markdown(sr, temp_dir), encoding="utf-8")
return 0
if __name__ == "__main__":
raise SystemExit(main())