chore: sync code and project files
This commit is contained in:
95
scripts/import_stickers_manifest.py
Normal file
95
scripts/import_stickers_manifest.py
Normal file
@@ -0,0 +1,95 @@
|
||||
"""
|
||||
根据 manifest 批量导入贴纸到 assets/stickers_builtin,并生成 index.json。
|
||||
|
||||
用法:
|
||||
python3 scripts/import_stickers_manifest.py --manifest stickers_manifest.json
|
||||
|
||||
manifest 示例:
|
||||
{
|
||||
"pack": {
|
||||
"id": "fluent-emoji-subset",
|
||||
"name": "Fluent Emoji 子集(抖音常用)",
|
||||
"license": "MIT (CHECK BEFORE PROD)",
|
||||
"attribution": "Microsoft Fluent UI Emoji"
|
||||
},
|
||||
"categories": [
|
||||
{
|
||||
"id": "douyin-basic",
|
||||
"name": "抖音常用",
|
||||
"items": [
|
||||
{"id": "fire", "name": "火", "url": "https://.../fire.png", "tags": ["火","爆款"]},
|
||||
{"id": "heart", "name": "爱心", "url": "https://.../heart.png", "tags": ["点赞","互动"]}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
from pathlib import Path
|
||||
from urllib.request import urlopen, Request
|
||||
|
||||
|
||||
def _safe_name(s: str) -> str:
|
||||
return "".join(ch if ch.isalnum() or ch in ("-", "_") else "_" for ch in (s or ""))[:80] or "item"
|
||||
|
||||
|
||||
def download(url: str, out: Path) -> None:
|
||||
out.parent.mkdir(parents=True, exist_ok=True)
|
||||
req = Request(url, headers={"User-Agent": "video-flow-stickers/1.0"})
|
||||
with urlopen(req, timeout=60) as r:
|
||||
out.write_bytes(r.read())
|
||||
|
||||
|
||||
def main() -> int:
|
||||
ap = argparse.ArgumentParser()
|
||||
ap.add_argument("--manifest", required=True, help="manifest json path")
|
||||
ap.add_argument("--out-dir", default="assets/stickers_builtin", help="output directory")
|
||||
args = ap.parse_args()
|
||||
|
||||
manifest_path = Path(args.manifest)
|
||||
data = json.loads(manifest_path.read_text(encoding="utf-8"))
|
||||
|
||||
out_dir = Path(args.out_dir)
|
||||
out_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
pack = data.get("pack") or {}
|
||||
categories = data.get("categories") or []
|
||||
|
||||
# 下载并重写 file 字段(落地为本地文件)
|
||||
for cat in categories:
|
||||
for it in (cat.get("items") or []):
|
||||
url = str(it.get("url") or "")
|
||||
if not url:
|
||||
continue
|
||||
ext = Path(url.split("?")[0]).suffix.lower()
|
||||
if ext not in [".png", ".svg", ".webp"]:
|
||||
ext = ".png"
|
||||
fid = _safe_name(str(it.get("id") or it.get("name") or "item"))
|
||||
fname = f"{fid}{ext}"
|
||||
target = out_dir / fname
|
||||
if not target.exists():
|
||||
print(f"download: {url} -> {target}")
|
||||
download(url, target)
|
||||
it["file"] = fname
|
||||
it.pop("url", None)
|
||||
|
||||
# 输出 index.json
|
||||
out_index = out_dir / "index.json"
|
||||
out_index.write_text(
|
||||
json.dumps({"pack": pack, "categories": categories}, ensure_ascii=False, indent=2),
|
||||
encoding="utf-8",
|
||||
)
|
||||
print(f"written: {out_index}")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
|
||||
|
||||
|
||||
128
scripts/migrate_projects.py
Normal file
128
scripts/migrate_projects.py
Normal file
@@ -0,0 +1,128 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
迁移脚本:将旧版 JSON 项目文件导入到 SQLite 数据库(用于 8503 调试)
|
||||
|
||||
关键点:
|
||||
- 不假设 legacy JSON 与当前 Streamlit UI schema 一致
|
||||
- 使用 `modules.legacy_normalizer.normalize_legacy_project()` 做纯规则规范化
|
||||
- 保留 `_legacy`,确保信息不丢失
|
||||
"""
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# 添加项目根目录到路径
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||
|
||||
import config
|
||||
from modules.db_manager import db
|
||||
from modules.legacy_normalizer import normalize_legacy_project
|
||||
|
||||
def migrate_json_projects(temp_dir: str = None, force: bool = False):
|
||||
"""从 temp 目录读取 project_*.json 文件并导入数据库"""
|
||||
|
||||
if temp_dir is None:
|
||||
temp_dir = config.TEMP_DIR
|
||||
|
||||
temp_path = Path(temp_dir)
|
||||
|
||||
if not temp_path.exists():
|
||||
print(f"❌ temp 目录不存在: {temp_path}")
|
||||
return
|
||||
|
||||
# 查找所有项目 JSON 文件
|
||||
json_files = list(temp_path.glob("project_*.json"))
|
||||
|
||||
if not json_files:
|
||||
print(f"⚠️ 未找到项目文件: {temp_path}/project_*.json")
|
||||
return
|
||||
|
||||
print(f"📂 找到 {len(json_files)} 个项目文件")
|
||||
|
||||
imported = 0
|
||||
updated = 0
|
||||
skipped = 0
|
||||
errors = 0
|
||||
|
||||
for json_file in json_files:
|
||||
try:
|
||||
project_id = json_file.stem.replace("project_", "")
|
||||
|
||||
# 读取 JSON 文件
|
||||
with open(json_file, "r", encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
|
||||
# 检查是否已存在
|
||||
existing = db.get_project(project_id)
|
||||
if existing and not force:
|
||||
print(f" ⏭️ 跳过已存在: {project_id}")
|
||||
skipped += 1
|
||||
continue
|
||||
|
||||
# 产品信息:用于 Step1 回显与留存
|
||||
# 注意:legacy 的 image_urls 多为远端 URL;当前 Streamlit Step1 使用 uploaded_images(本地路径)。
|
||||
product_info = {
|
||||
"prompt": data.get("prompt", ""),
|
||||
"image_urls": data.get("image_urls", []),
|
||||
"analysis": data.get("analysis", ""),
|
||||
"questions": data.get("questions", []),
|
||||
"answers": data.get("answers", {}),
|
||||
"uploaded_images": [], # legacy 无本地上传图路径
|
||||
"_legacy": data,
|
||||
}
|
||||
|
||||
# 获取项目名称
|
||||
name = data.get("prompt", "")[:50] if data.get("prompt") else f"项目 {project_id}"
|
||||
|
||||
# 规范化脚本数据:对齐当前 UI schema(并保留 legacy)
|
||||
script_data = normalize_legacy_project(data)
|
||||
|
||||
if existing and force:
|
||||
# 更新现有项目
|
||||
if script_data:
|
||||
db.update_project_script(project_id, script_data)
|
||||
status = data.get("status", "draft")
|
||||
db.update_project_status(project_id, status)
|
||||
print(f" 🔄 更新成功: {project_id} ({name[:30]}...)")
|
||||
updated += 1
|
||||
else:
|
||||
# 创建新项目
|
||||
db.create_project(project_id, name, product_info)
|
||||
|
||||
# 更新脚本
|
||||
if script_data:
|
||||
db.update_project_script(project_id, script_data)
|
||||
|
||||
# 更新状态
|
||||
status = data.get("status", "draft")
|
||||
db.update_project_status(project_id, status)
|
||||
|
||||
print(f" ✅ 导入成功: {project_id} ({name[:30]}...)")
|
||||
imported += 1
|
||||
|
||||
except Exception as e:
|
||||
print(f" ❌ 导入失败 {json_file.name}: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
errors += 1
|
||||
|
||||
print(f"\n📊 迁移完成:")
|
||||
print(f" ✅ 新导入: {imported}")
|
||||
print(f" 🔄 已更新: {updated}")
|
||||
print(f" ⏭️ 已跳过: {skipped}")
|
||||
print(f" ❌ 失败: {errors}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import argparse
|
||||
|
||||
parser = argparse.ArgumentParser(description="迁移旧版项目到数据库")
|
||||
parser.add_argument("--temp-dir", type=str, default=None,
|
||||
help="temp 目录路径 (默认使用 config.TEMP_DIR)")
|
||||
parser.add_argument("--force", action="store_true",
|
||||
help="强制更新已存在的项目")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
print("🚀 开始迁移项目数据...")
|
||||
migrate_json_projects(args.temp_dir, args.force)
|
||||
30
scripts/migrate_users_and_owner.py
Normal file
30
scripts/migrate_users_and_owner.py
Normal file
@@ -0,0 +1,30 @@
|
||||
"""
|
||||
One-off migration:
|
||||
- Ensure admin exists
|
||||
- Backfill projects.owner_user_id to admin for legacy projects
|
||||
|
||||
Usage:
|
||||
python3 scripts/migrate_users_and_owner.py
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
# Ensure repo root is on sys.path when executed from scripts/ directory
|
||||
REPO_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
|
||||
if REPO_ROOT not in sys.path:
|
||||
sys.path.insert(0, REPO_ROOT)
|
||||
|
||||
from modules.db_manager import db
|
||||
|
||||
|
||||
def main():
|
||||
admin_id = db.ensure_admin_user("admin", "admin1234")
|
||||
n = db.migrate_projects_owner_to(admin_id)
|
||||
print(f"admin_id={admin_id} backfilled_projects={n}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
|
||||
232
scripts/scan_legacy_schema.py
Normal file
232
scripts/scan_legacy_schema.py
Normal file
@@ -0,0 +1,232 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Scan legacy project JSON schemas under temp dir.
|
||||
|
||||
Purpose:
|
||||
- Identify schema variants for /opt/gloda-factory/temp/project_*.json
|
||||
- Produce a machine-readable summary + a markdown report
|
||||
|
||||
This script is READ-ONLY.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
from collections import Counter, defaultdict
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Tuple
|
||||
|
||||
|
||||
def _safe_load_json(path: Path) -> Dict[str, Any] | None:
|
||||
try:
|
||||
return json.loads(path.read_text(encoding="utf-8"))
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def _type_name(v: Any) -> str:
|
||||
if v is None:
|
||||
return "null"
|
||||
if isinstance(v, bool):
|
||||
return "bool"
|
||||
if isinstance(v, int):
|
||||
return "int"
|
||||
if isinstance(v, float):
|
||||
return "float"
|
||||
if isinstance(v, str):
|
||||
return "str"
|
||||
if isinstance(v, list):
|
||||
return "list"
|
||||
if isinstance(v, dict):
|
||||
return "dict"
|
||||
return type(v).__name__
|
||||
|
||||
|
||||
def _detect_schema_variant(doc: Dict[str, Any]) -> str:
|
||||
"""
|
||||
Heuristic:
|
||||
- Schema_A: scenes contain prompt-like fields (image_prompt/visual_prompt/video_prompt)
|
||||
- Schema_B: scenes do NOT contain these, but contain keyframe/story_beat/camera_movement/image_url
|
||||
"""
|
||||
scenes = doc.get("scenes") or []
|
||||
if not isinstance(scenes, list):
|
||||
return "Unknown"
|
||||
|
||||
prompt_keys = {"image_prompt", "visual_prompt", "video_prompt"}
|
||||
seen_prompt = False
|
||||
for s in scenes:
|
||||
if isinstance(s, dict) and (set(s.keys()) & prompt_keys):
|
||||
seen_prompt = True
|
||||
break
|
||||
|
||||
if seen_prompt:
|
||||
return "Schema_A"
|
||||
|
||||
# If no prompt keys, but has typical B keys, call Schema_B
|
||||
typical_b = {"keyframe", "story_beat", "camera_movement", "image_url"}
|
||||
seen_b = False
|
||||
for s in scenes:
|
||||
if isinstance(s, dict) and (set(s.keys()) & typical_b):
|
||||
seen_b = True
|
||||
break
|
||||
|
||||
return "Schema_B" if seen_b else "Unknown"
|
||||
|
||||
|
||||
@dataclass
|
||||
class ScanResult:
|
||||
total_files: int
|
||||
parsed_files: int
|
||||
failed_files: int
|
||||
schema_counts: Counter
|
||||
top_level_key_counts: Counter
|
||||
scene_key_counts: Counter
|
||||
cta_type_counts: Counter
|
||||
sample_by_schema: Dict[str, List[str]]
|
||||
|
||||
|
||||
def scan_dir(temp_dir: Path) -> ScanResult:
|
||||
files = sorted(temp_dir.glob("project_*.json"))
|
||||
schema_counts: Counter = Counter()
|
||||
top_level_key_counts: Counter = Counter()
|
||||
scene_key_counts: Counter = Counter()
|
||||
cta_type_counts: Counter = Counter()
|
||||
sample_by_schema: Dict[str, List[str]] = defaultdict(list)
|
||||
|
||||
parsed = 0
|
||||
failed = 0
|
||||
|
||||
for f in files:
|
||||
doc = _safe_load_json(f)
|
||||
if not isinstance(doc, dict):
|
||||
failed += 1
|
||||
continue
|
||||
parsed += 1
|
||||
|
||||
schema = _detect_schema_variant(doc)
|
||||
schema_counts[schema] += 1
|
||||
if len(sample_by_schema[schema]) < 5:
|
||||
pid = str(doc.get("id") or f.stem.replace("project_", ""))
|
||||
sample_by_schema[schema].append(pid)
|
||||
|
||||
# top-level keys
|
||||
for k in doc.keys():
|
||||
top_level_key_counts[k] += 1
|
||||
|
||||
# scenes keys
|
||||
scenes = doc.get("scenes") or []
|
||||
if isinstance(scenes, list):
|
||||
for s in scenes:
|
||||
if isinstance(s, dict):
|
||||
for k in s.keys():
|
||||
scene_key_counts[k] += 1
|
||||
|
||||
# cta type
|
||||
cta_type_counts[_type_name(doc.get("cta"))] += 1
|
||||
|
||||
return ScanResult(
|
||||
total_files=len(files),
|
||||
parsed_files=parsed,
|
||||
failed_files=failed,
|
||||
schema_counts=schema_counts,
|
||||
top_level_key_counts=top_level_key_counts,
|
||||
scene_key_counts=scene_key_counts,
|
||||
cta_type_counts=cta_type_counts,
|
||||
sample_by_schema=dict(sample_by_schema),
|
||||
)
|
||||
|
||||
|
||||
def _to_jsonable(sr: ScanResult) -> Dict[str, Any]:
|
||||
return {
|
||||
"total_files": sr.total_files,
|
||||
"parsed_files": sr.parsed_files,
|
||||
"failed_files": sr.failed_files,
|
||||
"schema_counts": dict(sr.schema_counts),
|
||||
"cta_type_counts": dict(sr.cta_type_counts),
|
||||
"top_level_key_counts": dict(sr.top_level_key_counts),
|
||||
"scene_key_counts": dict(sr.scene_key_counts),
|
||||
"sample_by_schema": sr.sample_by_schema,
|
||||
}
|
||||
|
||||
|
||||
def _render_markdown(sr: ScanResult, temp_dir: Path) -> str:
|
||||
lines: List[str] = []
|
||||
lines.append("# Legacy Project JSON Schema Scan Report\n")
|
||||
lines.append(f"- temp_dir: `{temp_dir}`")
|
||||
lines.append(f"- total_files: {sr.total_files}")
|
||||
lines.append(f"- parsed_files: {sr.parsed_files}")
|
||||
lines.append(f"- failed_files: {sr.failed_files}\n")
|
||||
|
||||
lines.append("## Schema variants\n")
|
||||
for k, v in sr.schema_counts.most_common():
|
||||
samples = ", ".join(sr.sample_by_schema.get(k, [])[:5])
|
||||
lines.append(f"- {k}: {v} (samples: {samples})")
|
||||
lines.append("")
|
||||
|
||||
lines.append("## CTA type distribution\n")
|
||||
for k, v in sr.cta_type_counts.most_common():
|
||||
lines.append(f"- {k}: {v}")
|
||||
lines.append("")
|
||||
|
||||
def _topn(counter: Counter, n: int = 30) -> List[Tuple[str, int]]:
|
||||
return counter.most_common(n)
|
||||
|
||||
lines.append("## Top-level keys (top 30)\n")
|
||||
for k, v in _topn(sr.top_level_key_counts, 30):
|
||||
lines.append(f"- {k}: {v}/{sr.parsed_files}")
|
||||
lines.append("")
|
||||
|
||||
lines.append("## Scene keys (top 40)\n")
|
||||
for k, v in _topn(sr.scene_key_counts, 40):
|
||||
lines.append(f"- {k}: {v}")
|
||||
lines.append("")
|
||||
|
||||
return "\n".join(lines) + "\n"
|
||||
|
||||
|
||||
def main() -> int:
|
||||
parser = argparse.ArgumentParser(description="Scan legacy project JSON schemas")
|
||||
parser.add_argument("--temp-dir", required=True, help="Directory containing project_*.json")
|
||||
parser.add_argument("--out-json", required=False, help="Write summary json to path")
|
||||
parser.add_argument("--out-md", required=False, help="Write markdown report to path")
|
||||
args = parser.parse_args()
|
||||
|
||||
temp_dir = Path(args.temp_dir)
|
||||
if not temp_dir.exists():
|
||||
raise SystemExit(f"temp dir not found: {temp_dir}")
|
||||
|
||||
sr = scan_dir(temp_dir)
|
||||
payload = _to_jsonable(sr)
|
||||
|
||||
print(json.dumps(payload, ensure_ascii=False, indent=2))
|
||||
|
||||
if args.out_json:
|
||||
out_json = Path(args.out_json)
|
||||
out_json.parent.mkdir(parents=True, exist_ok=True)
|
||||
out_json.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")
|
||||
|
||||
if args.out_md:
|
||||
out_md = Path(args.out_md)
|
||||
out_md.parent.mkdir(parents=True, exist_ok=True)
|
||||
out_md.write_text(_render_markdown(sr, temp_dir), encoding="utf-8")
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user