perf(8502): 并行生图(6并发)+超时重试；视频URL直连预览/下载；路径隔离

2025-12-17 12:21:22 +08:00
parent ebcf165c3f
commit 1e210ffccf
12 changed files with 1168 additions and 201 deletions
--- a/modules/legacy_normalizer.py
+++ b/modules/legacy_normalizer.py
@@ -0,0 +1,248 @@
+"""
+Legacy project JSON normalizer.
+
+Goal:
+- Convert legacy project JSON (from /opt/gloda-factory/temp/project_*.json)
+  into the script_data schema expected by current Streamlit UI (`app.py`)
+  and composer (`modules/composer.py`).
+
+Principles:
+- Pure rule-based, no AI generation.
+- Never drop legacy information: keep full raw doc under `script_data["_legacy"]`
+  and per-scene under `scene["_legacy"]`.
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import Any, Dict, List, Optional, Tuple
+
+
+def _as_str(v: Any) -> str:
+    return v if isinstance(v, str) else ""
+
+
+def _as_dict(v: Any) -> Dict[str, Any]:
+    return v if isinstance(v, dict) else {}
+
+
+def _as_list(v: Any) -> List[Any]:
+    return v if isinstance(v, list) else []
+
+
+def _detect_schema_variant(doc: Dict[str, Any]) -> str:
+    scenes = _as_list(doc.get("scenes"))
+    if not scenes:
+        return "Unknown"
+    prompt_keys = {"image_prompt", "visual_prompt", "video_prompt"}
+    for s in scenes:
+        if isinstance(s, dict) and (set(s.keys()) & prompt_keys):
+            return "Schema_A"
+    typical_b = {"keyframe", "story_beat", "camera_movement", "image_url"}
+    for s in scenes:
+        if isinstance(s, dict) and (set(s.keys()) & typical_b):
+            return "Schema_B"
+    return "Unknown"
+
+
+def _derive_visual_prompt_from_keyframe(scene: Dict[str, Any]) -> str:
+    """
+    Build a readable prompt-like summary from keyframe + story_beat.
+    This is NOT an AI prompt; it's a structured description to avoid empty fields.
+    """
+    keyframe = _as_dict(scene.get("keyframe") or scene.get("keyframes"))
+    story_beat = _as_str(scene.get("story_beat"))
+
+    parts: List[str] = []
+    if keyframe:
+        parts.append("[DerivedFromKeyframe]")
+        # deterministic ordering for readability
+        for k in sorted(keyframe.keys()):
+            v = keyframe.get(k)
+            if isinstance(v, (str, int, float)) and str(v).strip():
+                parts.append(f"{k}: {v}")
+            elif isinstance(v, dict) and v:
+                # flatten one level
+                sub = ", ".join(f"{sk}={sv}" for sk, sv in sorted(v.items()) if str(sv).strip())
+                if sub:
+                    parts.append(f"{k}: {sub}")
+    if story_beat:
+        parts.append(f"story_beat: {story_beat}")
+    return "\n".join(parts).strip()
+
+
+def _derive_video_prompt_from_motion(scene: Dict[str, Any]) -> str:
+    camera_movement = _as_str(scene.get("camera_movement"))
+    rhythm = scene.get("rhythm")
+    story_beat = _as_str(scene.get("story_beat"))
+
+    parts: List[str] = []
+    parts.append("[DerivedFromMotion]")
+    if camera_movement:
+        parts.append(f"camera_movement: {camera_movement}")
+    if isinstance(rhythm, dict) and rhythm:
+        # keep stable keys
+        sub = ", ".join(f"{k}={rhythm.get(k)}" for k in sorted(rhythm.keys()))
+        parts.append(f"rhythm: {sub}")
+    if story_beat:
+        parts.append(f"story_beat: {story_beat}")
+    return "\n".join(parts).strip()
+
+
+def _normalize_fancy_text(scene: Dict[str, Any], default_duration: float) -> Dict[str, Any]:
+    ft = scene.get("fancy_text")
+    if isinstance(ft, dict):
+        # Ensure required keys exist
+        out = dict(ft)
+        out.setdefault("text", "")
+        out.setdefault("style", "highlight")
+        # support either position dict or string
+        if "position" not in out:
+            out["position"] = "center"
+        out.setdefault("start_time", 0.0)
+        out.setdefault("duration", default_duration)
+        return out
+
+    # legacy doesn't have fancy_text
+    return {
+        "text": "",
+        "style": "highlight",
+        "position": "center",
+        "start_time": 0.0,
+        "duration": default_duration,
+    }
+
+
+def _build_voiceover_timeline_from_scenes(normalized_scenes: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+    timeline: List[Dict[str, Any]] = []
+    t = 0.0
+    for idx, s in enumerate(normalized_scenes):
+        dur = float(s.get("duration") or 0) or 0.0
+        legacy = _as_dict(s.get("_legacy"))
+        vo = _as_str(legacy.get("voiceover") or s.get("voiceover") or "")
+        if vo.strip():
+            timeline.append(
+                {
+                    "id": idx + 1,
+                    "text": vo,
+                    "subtitle": vo,
+                    "start_time": t,
+                    "duration": dur if dur > 0 else 3.0,
+                }
+            )
+        t += dur if dur > 0 else 0.0
+    return timeline
+
+
+def normalize_legacy_project(doc: Dict[str, Any]) -> Dict[str, Any]:
+    """
+    Return a script_data dict compatible with current UI.
+    """
+    schema = _detect_schema_variant(doc)
+
+    scenes_in = _as_list(doc.get("scenes"))
+    normalized_scenes: List[Dict[str, Any]] = []
+
+    for s in scenes_in:
+        if not isinstance(s, dict):
+            continue
+
+        scene_id = int(s.get("id") or (len(normalized_scenes) + 1))
+        duration = float(s.get("duration") or 0) or 0.0
+        if duration <= 0:
+            duration = 3.0
+
+        # visual prompt
+        visual_prompt = ""
+        if schema == "Schema_A":
+            # legacy key is usually image_prompt
+            visual_prompt = _as_str(s.get("visual_prompt") or s.get("image_prompt") or "")
+        elif schema == "Schema_B":
+            visual_prompt = _derive_visual_prompt_from_keyframe(s)
+        else:
+            visual_prompt = _as_str(s.get("visual_prompt") or s.get("image_prompt") or "")
+
+        if not visual_prompt and s.get("keyframe"):
+            visual_prompt = _derive_visual_prompt_from_keyframe(s)
+
+        # video prompt
+        video_prompt = _as_str(s.get("video_prompt") or "")
+        if not video_prompt:
+            video_prompt = _derive_video_prompt_from_motion(s)
+
+        # fancy text (default safe)
+        fancy_text = _normalize_fancy_text(s, default_duration=duration)
+
+        normalized_scene: Dict[str, Any] = {
+            "id": scene_id,
+            "duration": duration,
+            "visual_prompt": visual_prompt,
+            "video_prompt": video_prompt,
+            "fancy_text": fancy_text,
+            # keep optional fields if present
+            "timeline": s.get("timeline", ""),
+        }
+
+        # Attach per-scene legacy snapshot (do not mutate the original)
+        normalized_scene["_legacy"] = {
+            "schema": schema,
+            "image_url": s.get("image_url"),
+            "keyframe": s.get("keyframe") or s.get("keyframes"),
+            "camera_movement": s.get("camera_movement"),
+            "story_beat": s.get("story_beat"),
+            "rhythm": s.get("rhythm"),
+            "sound_design": s.get("sound_design"),
+            "voiceover": s.get("voiceover"),
+        }
+
+        normalized_scenes.append(normalized_scene)
+
+    # voiceover timeline: normalize existing if present, else derive from scenes voiceover
+    vtl = doc.get("voiceover_timeline")
+    voiceover_timeline: List[Dict[str, Any]] = []
+    if isinstance(vtl, list) and vtl:
+        for idx, it in enumerate(vtl):
+            if not isinstance(it, dict):
+                continue
+            # unify field names
+            text = _as_str(it.get("text") or it.get("voiceover") or "")
+            subtitle = _as_str(it.get("subtitle") or text)
+            start_time = float(it.get("start_time") or 0.0)
+            duration = float(it.get("duration") or 3.0)
+            voiceover_timeline.append(
+                {
+                    "id": int(it.get("id") or (idx + 1)),
+                    "text": text,
+                    "subtitle": subtitle,
+                    "start_time": start_time,
+                    "duration": duration,
+                }
+            )
+    else:
+        voiceover_timeline = _build_voiceover_timeline_from_scenes(normalized_scenes)
+
+    # script_data expected by UI
+    script_data: Dict[str, Any] = {
+        "hook": doc.get("hook", ""),
+        "selling_points": doc.get("selling_points", []) or [],
+        "target_audience": doc.get("target_audience", "") or "",
+        "video_style": doc.get("video_style", "") or "",
+        "bgm_style": doc.get("bgm_style", "") or "",
+        "voiceover_timeline": voiceover_timeline,
+        "scenes": normalized_scenes,
+        "cta": doc.get("cta", ""),
+        # Keep analysis for UI fallback display
+        "analysis": doc.get("analysis", ""),
+        # Preserve original
+        "_legacy": doc,
+        "_legacy_schema": schema,
+    }
+
+    return script_data
+
+
+
+
+
+
+