perf(8502): 并行生图(6并发)+超时重试；视频URL直连预览/下载；路径隔离

2025-12-17 12:21:22 +08:00
parent ebcf165c3f
commit 1e210ffccf
12 changed files with 1168 additions and 201 deletions
--- a/app.py
+++ b/app.py
@@ -9,6 +9,8 @@ import os
 import random
 from pathlib import Path
 import pandas as pd
+from time import perf_counter
+from concurrent.futures import ThreadPoolExecutor, as_completed

 # Import Backend Modules
 import config
@@ -19,6 +21,10 @@ from modules.composer import VideoComposer, VideoComposer as Composer # alias
 from modules.text_renderer import renderer
 from modules import export_utils
 from modules.db_manager import db
+from modules import path_utils
+from modules import limits
+from modules.legacy_path_mapper import map_legacy_local_path
+from modules.legacy_normalizer import normalize_legacy_project

 # Page Config
 st.set_page_config(
@@ -138,6 +144,32 @@ def load_project(project_id):
    st.session_state.script_data = data.get("script_data")
    st.session_state.view_mode = "workspace"

+    # Fallback: 如果 DB 中的 script_data 是旧结构/缺字段，则从 legacy JSON 重新规范化一次
+    try:
+        script_data = st.session_state.script_data
+        legacy_json = Path(config.TEMP_DIR) / f"project_{project_id}.json"
+
+        def _needs_normalize(sd: Any) -> bool:
+            if not isinstance(sd, dict):
+                return True
+            if "_legacy_schema" not in sd:
+                return True
+            scenes = sd.get("scenes") or []
+            if scenes and isinstance(scenes, list) and isinstance(scenes[0], dict):
+                if "visual_prompt" not in scenes[0] or "video_prompt" not in scenes[0]:
+                    return True
+            return False
+
+        if legacy_json.exists() and _needs_normalize(script_data):
+            raw = json.loads(legacy_json.read_text(encoding="utf-8"))
+            normalized = normalize_legacy_project(raw)
+            st.session_state.script_data = normalized
+            # 写回 DB，避免每次 load 都重新算
+            db.update_project_script(project_id, normalized)
+            st.info("已从 legacy JSON 重新规范化脚本字段（兼容旧版项目）。")
+    except Exception as e:
+        st.warning(f"legacy 规范化失败（将继续使用 DB 数据）: {e}")
+    
    # Restore product info for Step 1 display
    product_info = data.get("product_info", {})
    st.session_state.loaded_product_name = data.get("name", "")
@@ -161,13 +193,17 @@ def load_project(project_id):
    
    for asset in assets:
        sid = asset["scene_id"]
+        source_path, _mapped_url = map_legacy_local_path(asset.get("local_path"))
        # 假设 scene_id 0 或 -1 用于 final video
        if asset["asset_type"] == "image" and asset["status"] == "completed":
-            images[sid] = asset["local_path"]
+            if source_path:
+                images[sid] = source_path
        elif asset["asset_type"] == "video" and asset["status"] == "completed":
-            videos[sid] = asset["local_path"]
+            if source_path:
+                videos[sid] = source_path
        elif asset["asset_type"] == "final_video" and asset["status"] == "completed":
-            final_vid = asset["local_path"]
+            if source_path:
+                final_vid = source_path
            
    st.session_state.scene_images = images
    st.session_state.scene_videos = videos
@@ -233,6 +269,33 @@ with st.sidebar:

        if st.session_state.project_id:
            st.caption(f"Current ID: {st.session_state.project_id}")
+            with st.expander("⏱️ 性能与诊断", expanded=False):
+                m = _get_metrics(st.session_state.project_id)
+                if not m:
+                    st.caption("暂无指标（执行一次脚本/生图/生视频/合成后会出现）。")
+                else:
+                    keys = [
+                        "script_gen_s",
+                        "image_gen_total_s",
+                        "video_submit_s",
+                        "video_recover_s",
+                        "compose_s",
+                        "script_model",
+                        "image_provider",
+                        "image_generated",
+                        "video_submitted",
+                        "video_recovered",
+                        "bgm_used",
+                    ]
+                    for k in keys:
+                        if k in m:
+                            st.caption(f"{k}: {m.get(k)}")
+        # 在线剪辑入口（React Editor）
+        web_base_url = os.getenv("WEB_BASE_URL", "http://localhost:3000").rstrip("/")
+        st.markdown(
+            f"[打开在线剪辑器]({web_base_url}/editor/{st.session_state.project_id})",
+            unsafe_allow_html=False,
+        )
        
        st.markdown("---")
        
@@ -258,14 +321,48 @@ with st.sidebar:
 # ============================================================
 # Helper Functions
 # ============================================================
-def save_uploaded_file(uploaded_file):
-    """Save uploaded file to temp dir."""
-    if uploaded_file is not None:
-        file_path = config.TEMP_DIR / uploaded_file.name
-        with open(file_path, "wb") as f:
-            f.write(uploaded_file.getbuffer())
-        return str(file_path)
-    return None
+def _record_metrics(project_id: str, patch: dict):
+    """Persist lightweight timing/diagnostic metrics into project.product_info['_metrics']."""
+    if not project_id or not isinstance(patch, dict) or not patch:
+        return
+    try:
+        proj = db.get_project(project_id) or {}
+        product_info = proj.get("product_info") or {}
+        metrics = product_info.get("_metrics") if isinstance(product_info.get("_metrics"), dict) else {}
+        metrics.update(patch)
+        metrics["updated_at"] = time.time()
+        product_info["_metrics"] = metrics
+        db.update_project_product_info(project_id, product_info)
+    except Exception:
+        # metrics must never break UX
+        pass
+
+
+def _get_metrics(project_id: str) -> dict:
+    try:
+        proj = db.get_project(project_id) or {}
+        product_info = proj.get("product_info") or {}
+        m = product_info.get("_metrics")
+        return m if isinstance(m, dict) else {}
+    except Exception:
+        return {}
+
+def save_uploaded_file(project_id: str, uploaded_file):
+    """Save uploaded file to per-project upload dir (avoid overwrites across projects)."""
+    if uploaded_file is None:
+        return None
+    if not project_id:
+        raise ValueError("project_id is required to save uploaded files safely")
+    upload_dir = path_utils.project_upload_dir(project_id)
+    original = path_utils.sanitize_filename(getattr(uploaded_file, "name", "upload"))
+    # keep original stem for readability, but ensure uniqueness
+    suffix = Path(original).suffix.lstrip(".") or "bin"
+    stem = Path(original).stem or "upload"
+    unique_name = path_utils.unique_filename(prefix=f"upload_{stem}", ext=suffix, project_id=project_id)
+    file_path = upload_dir / unique_name
+    with open(file_path, "wb") as f:
+        f.write(uploaded_file.getbuffer())
+    return str(file_path)

 # ============================================================
 # Main Content: Workspace
@@ -318,11 +415,16 @@ if st.session_state.view_mode == "workspace":
        # 允许在没有上传新图片但有历史图片的情况下继续
        can_submit = uploaded_files or st.session_state.uploaded_images
        
-        # Model Selection
-        model_options = ["Gemini 3 Pro", "Doubao Pro (Vision)"]
-        selected_model_label = st.radio("选择脚本生成模型", model_options, horizontal=True)
+        # Model Selection (all support images; user explicitly chooses model)
+        model_options = ["GPT-5.2", "Gemini 3 Pro", "Doubao Pro (Vision)"]
+        selected_model_label = st.radio("选择脚本生成模型", model_options, horizontal=True, index=0)
        # Map label to provider key
-        model_provider = "doubao" if "Doubao" in selected_model_label else "shubiaobiao"
+        if selected_model_label == "GPT-5.2":
+            model_provider = "shubiaobiao_gpt"
+        elif "Doubao" in selected_model_label:
+            model_provider = "doubao"
+        else:
+            model_provider = "shubiaobiao"
        
        if st.button("提交任务 & 生成脚本", type="primary", disabled=(not can_submit)):
            # 处理图片路径
@@ -335,7 +437,7 @@ if st.session_state.view_mode == "workspace":
                    st.session_state.project_id = f"PROJ-{int(time.time())}"
                    
                for uf in uploaded_files:
-                    path = save_uploaded_file(uf)
+                    path = save_uploaded_file(st.session_state.project_id, uf)
                    if path: image_paths.append(path)
                    
            st.session_state.uploaded_images = image_paths
@@ -352,7 +454,12 @@ if st.session_state.view_mode == "workspace":
            # Call Script Generator
            with st.spinner(f"正在分析商品信息并生成脚本 ({selected_model_label})..."):
                gen = ScriptGenerator()
+                t0 = perf_counter()
                script = gen.generate_script(product_name, product_info, image_paths, model_provider=model_provider)
+                _record_metrics(st.session_state.project_id, {
+                    "script_gen_s": round(perf_counter() - t0, 3),
+                    "script_model": model_provider,
+                })
                
                if script:
                    st.session_state.script_data = script
@@ -371,10 +478,39 @@ if st.session_state.view_mode == "workspace":
            if st.session_state.script_data:
                script = st.session_state.script_data
                
-                # Display Basic Info
+                # Display Basic Info (兼容 legacy schema)
+                selling_points = script.get("selling_points", []) or []
+                target_audience = script.get("target_audience", "") or ""
+                analysis_text = script.get("analysis", "") or ""
+                legacy_schema = script.get("_legacy_schema", "") or ""
+
                c1, c2 = st.columns(2)
-                c1.write(f"**核心卖点**: {', '.join(script.get('selling_points', []))}")
-                c2.write(f"**目标人群**: {script.get('target_audience', '')}")
+                if selling_points:
+                    c1.write(f"**核心卖点**: {', '.join(selling_points)}")
+                else:
+                    c1.write("**核心卖点**: (legacy 项目可能未生成该字段)")
+                    if analysis_text:
+                        with st.expander("查看 legacy analysis（用于补齐信息）"):
+                            st.write(analysis_text)
+
+                if target_audience:
+                    c2.write(f"**目标人群**: {target_audience}")
+                else:
+                    c2.write("**目标人群**: (legacy 项目可能未生成该字段)")
+
+                # Hook / CTA / Schema
+                hook = script.get("hook", "") or ""
+                if hook:
+                    st.markdown(f"**Hook**: {hook}")
+                cta = script.get("cta", "")
+                if cta:
+                    if isinstance(cta, dict):
+                        st.markdown("**CTA（legacy object）**")
+                        st.json(cta)
+                    else:
+                        st.markdown(f"**CTA**: {cta}")
+                if legacy_schema:
+                    st.caption(f"Legacy Schema: {legacy_schema}")
                
                # Prompt Visualization
                if "_debug" in script:
@@ -397,10 +533,15 @@ if st.session_state.view_mode == "workspace":
                # Global Voiceover Timeline (New)
                st.markdown("### 🎙️ 整体旁白与字幕时间轴")
                with st.expander("编辑旁白时间轴 (Voiceover Timeline)", expanded=True):
-                    timeline = script.get("voiceover_timeline", [])
+                    timeline = script.get("voiceover_timeline", []) or []
                    if not timeline:
-                        # Init with default if empty (使用秒)
-                        timeline = [{"text": "示例旁白", "subtitle": "示例字幕", "start_time": 0.0, "duration": 3.0}]
+                        # 对于历史项目：如果没有 scenes 也没有 timeline，不要强行塞“示例旁白”，避免污染数据
+                        if not scenes and analysis_text:
+                            st.info("该历史项目暂无旁白时间轴（可能停留在分析/提问阶段）。")
+                            timeline = []
+                        else:
+                            # Init with default if empty (使用秒)
+                            timeline = [{"text": "示例旁白", "subtitle": "示例字幕", "start_time": 0.0, "duration": 3.0}]
                    
                    updated_timeline = []
                    for i, item in enumerate(timeline):
@@ -444,12 +585,41 @@ if st.session_state.view_mode == "workspace":
                            # 花字编辑保留
                            ft = scene.get("fancy_text", {})
                            if isinstance(ft, dict):
-                                new_ft_text = st.text_input(f"Fancy Text (Scene {scene['id']})", value=ft.get("text", ""), key=f"ft_{i}")
+                                new_ft_text = st.text_input(
+                                    f"Fancy Text (Scene {scene['id']})",
+                                    value=ft.get("text", ""),
+                                    key=f"ft_{i}",
+                                )
+                                # 兼容：旧数据可能没有 fancy_text 字段
+                                if not isinstance(scene.get("fancy_text"), dict):
+                                    scene["fancy_text"] = {}
                                scene["fancy_text"]["text"] = new_ft_text
                            
                            # 旁白/字幕已移至上方整体时间轴，此处仅作展示或删除
                            st.caption("注：旁白与字幕已移至上方整体时间轴编辑")

+                            # Legacy 信息展示（只读，用于调试/对齐）
+                            legacy_scene = scene.get("_legacy", {}) if isinstance(scene.get("_legacy", {}), dict) else {}
+                            if legacy_scene:
+                                with st.expander(f"Legacy 信息 (Scene {scene['id']})", expanded=False):
+                                    img_url = legacy_scene.get("image_url")
+                                    if img_url:
+                                        st.markdown(f"- image_url: `{img_url}`")
+                                    cam = legacy_scene.get("camera_movement")
+                                    if cam:
+                                        st.markdown(f"- camera_movement: {cam}")
+                                    vo = legacy_scene.get("voiceover")
+                                    if vo:
+                                        st.markdown(f"- voiceover: {vo}")
+                                    keyframe = legacy_scene.get("keyframe")
+                                    if keyframe:
+                                        st.markdown("- keyframe:")
+                                        st.json(keyframe)
+                                    rhythm = legacy_scene.get("rhythm")
+                                    if rhythm:
+                                        st.markdown("- rhythm:")
+                                        st.json(rhythm)
+                        
                        updated_scenes.append(scene)
                        st.divider()
                
@@ -497,9 +667,13 @@ if st.session_state.view_mode == "workspace":
                st.session_state.selected_img_provider = img_provider

                if st.button("🚀 执行 AI 生图", type="primary"):
-                    img_gen = ImageGenerator()
-                    # Pass ALL uploaded images as reference
-                    base_imgs = st.session_state.uploaded_images if st.session_state.uploaded_images else []
+                    with limits.acquire_image(blocking=False) as ok:
+                        if not ok:
+                            st.warning("系统正在生成其他任务（生图并发已达上限），请稍后再试。")
+                            st.stop()
+                        img_gen = ImageGenerator()
+                        # Pass ALL uploaded images as reference
+                        base_imgs = st.session_state.uploaded_images if st.session_state.uploaded_images else []
                    
                    if not base_imgs: 
                        st.error("No base image found (未找到参考底图). Please upload in Step 1.")
@@ -514,11 +688,18 @@ if st.session_state.view_mode == "workspace":
                         # --- Group Generation Logic ---
                         with st.spinner("正在进行 Doubao 组图生成 (Batch Group Generation)..."):
                             try:
+                                 t0 = perf_counter()
                                 results = img_gen.generate_group_images_doubao(
                                     scenes=scenes,
                                     reference_images=base_imgs,
-                                     visual_anchor=visual_anchor
+                                     visual_anchor=visual_anchor,
+                                     project_id=st.session_state.project_id
                                 )
+                                 _record_metrics(st.session_state.project_id, {
+                                     "image_gen_total_s": round(perf_counter() - t0, 3),
+                                     "image_provider": img_provider,
+                                     "image_generated": len(results),
+                                 })
                                 
                                 for s_id, path in results.items():
                                     st.session_state.scene_images[s_id] = path
@@ -536,35 +717,53 @@ if st.session_state.view_mode == "workspace":
                             except Exception as e:
                                 st.error(f"组图生成失败: {e}")
                    else:
-                        # --- Sequential Logic ---
+                        # --- Parallel Logic (default): only merchant uploaded images as references ---
                        total_scenes = len(scenes)
                        progress_bar = st.progress(0)
                        status_text = st.empty()

-                        current_refs = list(base_imgs) # Start with base images
-                        
                        try:
-                            for idx, scene in enumerate(scenes):
-                                scene_id = scene["id"]
-                                status_text.text(f"正在生成 Scene {scene_id} ({idx+1}/{total_scenes}) using {selected_img_model}...")
+                            t0 = perf_counter()
+                            # Parallel workers within a single run; global semaphore already acquired above.
+                            max_workers = 6
+                            futures = {}
+                            with ThreadPoolExecutor(max_workers=max_workers) as ex:
+                                for idx, scene in enumerate(scenes):
+                                    scene_id = scene["id"]
+                                    futures[ex.submit(
+                                        img_gen.generate_single_scene_image,
+                                        scene=scene,
+                                        original_image_path=list(base_imgs),  # ONLY merchant images
+                                        previous_image_path=None,
+                                        model_provider=img_provider,
+                                        visual_anchor=visual_anchor,
+                                        project_id=st.session_state.project_id,
+                                    )] = (idx, scene_id)

-                                img_path = img_gen.generate_single_scene_image(
-                                    scene=scene,
-                                    original_image_path=current_refs, # Pass ALL accumulated images
-                                    previous_image_path=None, 
-                                    model_provider=img_provider,
-                                    visual_anchor=visual_anchor
-                                )
+                                done = 0
+                                for fut in as_completed(futures):
+                                    idx, scene_id = futures[fut]
+                                    done += 1
+                                    status_text.text(f"已完成 {done}/{total_scenes}（Scene {scene_id}）")
+                                    try:
+                                        img_path = fut.result()
+                                    except Exception as e:
+                                        img_path = None
+                                        st.warning(f"Scene {scene_id} 生成失败：{e}")

-                                if img_path:
-                                    st.session_state.scene_images[scene_id] = img_path
-                                    current_refs.append(img_path) # Add newly generated image to references for next scene
-                                    db.save_asset(st.session_state.project_id, scene_id, "image", "completed", local_path=img_path)
+                                    if img_path:
+                                        st.session_state.scene_images[scene_id] = img_path
+                                        db.save_asset(st.session_state.project_id, scene_id, "image", "completed", local_path=img_path)

-                                progress_bar.progress((idx + 1) / total_scenes)
+                                    progress_bar.progress(done / total_scenes)
                            
                            status_text.text("生图完成！")
                            st.success("生图完成！")
+                            _record_metrics(st.session_state.project_id, {
+                                "image_gen_total_s": round(perf_counter() - t0, 3),
+                                "image_provider": img_provider,
+                                "image_generated": len(st.session_state.scene_images),
+                            })
                            # Update Status
                            db.update_project_status(st.session_state.project_id, "images_generated")
                            time.sleep(1)
@@ -603,11 +802,8 @@ if st.session_state.view_mode == "workspace":
                            else:
                                with st.spinner(f"正在重绘 Scene {scene_id}..."):
                                    img_gen = ImageGenerator()
-                                    # Use ALL uploaded images + previously generated images up to this point
+                                    # Only merchant uploaded images as references (no chaining)
                                    current_refs_for_regen = list(st.session_state.uploaded_images)
-                                    for prev_s_id in range(1, scene_id):
-                                        if prev_s_id in st.session_state.scene_images:
-                                            current_refs_for_regen.append(st.session_state.scene_images[prev_s_id])
                                    
                                    # Fallback to single mode for regen if group was used
                                    provider = st.session_state.get("selected_img_provider", "shubiaobiao")
@@ -621,7 +817,8 @@ if st.session_state.view_mode == "workspace":
                                        original_image_path=current_refs_for_regen,
                                        previous_image_path=None, 
                                        model_provider=provider,
-                                        visual_anchor=regen_visual_anchor
+                                        visual_anchor=regen_visual_anchor,
+                                        project_id=st.session_state.project_id
                                    )
                                    if new_path:
                                        st.session_state.scene_images[scene_id] = new_path
@@ -636,33 +833,119 @@ if st.session_state.view_mode == "workspace":
    if st.session_state.current_step >= 3:
        with st.expander("🎥 4. 视频生成 (Volcengine I2V)", expanded=(st.session_state.current_step == 3)):
            
-            if not st.session_state.scene_videos:
-                if st.button("🎬 执行图生视频", type="primary"):
-                    with st.spinner("正在生成视频 (耗时较长)..."):
-                        vid_gen = VideoGenerator()
-                        # Pass project_id
-                        videos = vid_gen.generate_scene_videos(
-                            st.session_state.project_id,
-                            st.session_state.script_data, 
-                            st.session_state.scene_images
+            scenes = st.session_state.script_data.get("scenes", [])
+            vid_gen = VideoGenerator()
+
+            # Submit-only (non-blocking) to avoid freezing Streamlit under concurrency
+            if st.button("🎬 提交图生视频任务（非阻塞）", type="primary"):
+                with limits.acquire_video(blocking=False) as ok:
+                    if not ok:
+                        st.warning("系统正在处理其他视频任务（并发已达上限），请稍后再试。")
+                        st.stop()
+                    t0 = perf_counter()
+                    submitted = 0
+                    for scene in scenes:
+                        scene_id = scene["id"]
+                        image_path = st.session_state.scene_images.get(scene_id)
+                        prompt = scene.get("video_prompt", "High quality video")
+                        task_id = vid_gen.submit_scene_video_task(
+                            st.session_state.project_id, scene_id, image_path, prompt
                        )
+                        if task_id:
+                            submitted += 1
+                    _record_metrics(st.session_state.project_id, {
+                        "video_submit_s": round(perf_counter() - t0, 3),
+                        "video_submitted": submitted,
+                    })
+                    if submitted:
+                        db.update_project_status(st.session_state.project_id, "videos_processing")
+                        st.success(f"已提交 {submitted} 个分镜视频任务。可点击下方“刷新恢复”下载结果。")
+                        time.sleep(0.5)
+                        st.rerun()
+                    else:
+                        st.warning("未提交任何任务（可能缺少图片或接口失败）。")

-                        if videos:
-                            st.session_state.scene_videos = videos
-                            for sid, path in videos.items():
-                                db.save_asset(st.session_state.project_id, sid, "video", "completed", local_path=path)
+            if st.button("🔄 刷新状态并恢复已完成任务", type="secondary"):
+                with limits.acquire_video(blocking=False) as ok:
+                    if not ok:
+                        st.warning("系统正在处理其他视频任务（并发已达上限），请稍后再试。")
+                        st.stop()
+                    t0 = perf_counter()
+                    updated = 0
+                    for scene in scenes:
+                        scene_id = scene["id"]
+                        asset = db.get_asset(st.session_state.project_id, scene_id, "video")
+                        if not asset or not asset.get("task_id"):
+                            continue
+                        # if already have local video, skip
+                        existing = st.session_state.scene_videos.get(scene_id)
+                        if existing and os.path.exists(existing):
+                            continue
+                        task_id = asset.get("task_id")
+                        # Query volc status; store URL for direct preview (no server download)
+                        status = None
+                        url = None
+                        # short retries for "succeeded but url missing"
+                        for attempt in range(3):
+                            status, url = vid_gen.check_task_status(task_id)
+                            if status == "succeeded" and url:
+                                break
+                            time.sleep(0.5 * (2 ** attempt))

-                            # Update Status
-                            db.update_project_status(st.session_state.project_id, "videos_generated")
-                            st.success("视频生成完成！")
-                            st.rerun()
-                        else:
-                            st.warning("部分或全部视频生成失败")
+                        meta_patch = {"checked_at": time.time(), "volc_status": status}
+                        if url:
+                            meta_patch["video_url"] = url
+                        db.update_asset_metadata(st.session_state.project_id, scene_id, "video", meta_patch)
+                        updated += 1

-            # Display Videos
-            if st.session_state.scene_videos:
+                    _record_metrics(st.session_state.project_id, {
+                        "video_recover_s": round(perf_counter() - t0, 3),
+                        "video_recovered": updated,
+                    })
+                    if updated:
+                        st.success(f"已刷新 {updated} 个分镜状态（成功的将以 URL 直连预览）。")
+                    else:
+                        st.info("暂无可恢复的视频（可能仍在排队/生成中）。")
+                    time.sleep(0.5)
+                    st.rerun()
+
+            if st.button("📥 准备合成素材（下载成功的视频到服务器）", type="secondary"):
+                with limits.acquire_video(blocking=False) as ok:
+                    if not ok:
+                        st.warning("系统正在处理其他视频任务（并发已达上限），请稍后再试。")
+                        st.stop()
+                    downloaded = 0
+                    for scene in scenes:
+                        scene_id = scene["id"]
+                        existing = st.session_state.scene_videos.get(scene_id)
+                        if existing and os.path.exists(existing):
+                            continue
+                        asset = db.get_asset(st.session_state.project_id, scene_id, "video")
+                        meta = (asset or {}).get("metadata") or {}
+                        video_url = meta.get("video_url")
+                        if not video_url:
+                            continue
+                        out_name = path_utils.unique_filename(
+                            prefix="scene_video",
+                            ext="mp4",
+                            project_id=st.session_state.project_id,
+                            scene_id=scene_id,
+                        )
+                        target_path = str(path_utils.project_videos_dir(st.session_state.project_id) / out_name)
+                        if vid_gen._download_video_to(video_url, target_path):
+                            st.session_state.scene_videos[scene_id] = target_path
+                            db.save_asset(st.session_state.project_id, scene_id, "video", "completed", local_path=target_path, task_id=(asset or {}).get("task_id"), metadata=meta)
+                            downloaded += 1
+                    if downloaded:
+                        st.success(f"已下载 {downloaded} 段视频，可进入合成。")
+                    else:
+                        st.info("暂无可下载的视频（请先刷新状态获取 video_url）。")
+                    time.sleep(0.5)
+                    st.rerun()
+            
+            # Display Videos (even when partially available)
+            if st.session_state.scene_videos or scenes:
                cols = st.columns(4)
-                scenes = st.session_state.script_data.get("scenes", [])
                
                for i, scene in enumerate(scenes):
                    scene_id = scene["id"]
@@ -677,24 +960,27 @@ if st.session_state.view_mode == "workspace":
                        if vid_path and os.path.exists(vid_path):
                            st.video(vid_path)
                        else:
-                            st.warning("Video missing")
-                            # --- Recovery Logic ---
+                            # Try URL preview from DB metadata
                            asset = db.get_asset(st.session_state.project_id, scene_id, "video")
+                            meta = (asset or {}).get("metadata") or {}
+                            video_url = meta.get("video_url")
+                            if video_url:
+                                st.caption("URL 直连预览（不经服务器落盘）")
+                                st.video(video_url)
+                            else:
+                                st.warning("Video missing")
+                            # --- Recovery Logic ---
                            if asset and asset.get("task_id"):
                                 task_id = asset.get("task_id")
-                                 if st.button(f"🔍 找回视频 (Task {task_id[-6:]})", key=f"recov_{scene_id}"):
+                                 if st.button(f"🔍 刷新URL (Task {task_id[-6:]})", key=f"recov_{scene_id}"):
                                     with st.spinner("查询任务状态中..."):
-                                         vid_gen = VideoGenerator()
-                                         output_filename = f"scene_{scene_id}_video.mp4"
-                                         target_path = str(config.TEMP_DIR / output_filename)
-                                         
-                                         if vid_gen.recover_video_from_task(task_id, target_path):
-                                             st.session_state.scene_videos[scene_id] = target_path
-                                             db.save_asset(st.session_state.project_id, scene_id, "video", "completed", local_path=target_path)
-                                             st.success("找回成功！")
-                                             st.rerun()
-                                         else:
-                                             st.error("找回失败")
+                                         status, url = vid_gen.check_task_status(task_id)
+                                         patch = {"checked_at": time.time(), "volc_status": status}
+                                         if url:
+                                             patch["video_url"] = url
+                                         db.update_asset_metadata(st.session_state.project_id, scene_id, "video", patch)
+                                         st.success("已刷新任务状态。")
+                                         st.rerun()
                            
                        # Per-scene regenerate button
                        if st.button(f"🔄 重生 S{scene_id}", key=f"regen_vid_{scene_id}"):
@@ -769,6 +1055,13 @@ if st.session_state.view_mode == "workspace":
                        ["None"] + bgm_names,
                        index=default_idx
                    )
+                    # 明确提示：BGM 目录为空或选中 BGM 不存在时，本次将不含 BGM
+                    if not bgm_names:
+                        st.warning(f"BGM 目录为空：{bgm_dir}（本次合成将不含 BGM）")
+                    elif selected_bgm != "None":
+                        candidate = config.ASSETS_DIR / "bgm" / selected_bgm
+                        if not candidate.exists():
+                            st.warning(f"所选 BGM 文件不存在：{candidate}（本次合成将不含 BGM）")
                with col_g2:
                    # Voice Select
                    selected_voice = st.selectbox("配音音色 (TTS)", [config.VOLC_TTS_DEFAULT_VOICE, "zh_female_meilinvyou_saturn_bigtts"])
@@ -812,8 +1105,10 @@ if st.session_state.view_mode == "workspace":
                        ft = scene.get("fancy_text", {})
                        ft_text = ft.get("text", "") if isinstance(ft, dict) else ""
                        new_ft = st.text_input(f"花字", value=ft_text, key=f"tune_ft_{i}")
-                        if isinstance(scene.get("fancy_text"), dict):
-                            scene["fancy_text"]["text"] = new_ft
+                        # 兼容：旧数据可能没有 fancy_text 字段
+                        if not isinstance(scene.get("fancy_text"), dict):
+                            scene["fancy_text"] = {}
+                        scene["fancy_text"]["text"] = new_ft
                        
                        updated_scenes.append(scene)
                
@@ -831,12 +1126,18 @@ if st.session_state.view_mode == "workspace":
                            # Save updated script first
                            db.update_project_script(st.session_state.project_id, st.session_state.script_data)
                            
+                            t0 = perf_counter()
                            output_path = composer.compose_from_script(
                                script=st.session_state.script_data,
                                video_map=st.session_state.scene_videos,
                                bgm_path=bgm_path,
-                                output_name=f"final_{st.session_state.project_id}_{int(time.time())}" # Unique name for history
+                                output_name=f"final_{st.session_state.project_id}_{int(time.time())}", # Unique name for history
+                                project_id=st.session_state.project_id,
                            )
+                            _record_metrics(st.session_state.project_id, {
+                                "compose_s": round(perf_counter() - t0, 3),
+                                "bgm_used": bool(bgm_path and Path(bgm_path).exists()),
+                            })
                            st.session_state.final_video = output_path
                            db.save_asset(st.session_state.project_id, 0, "final_video", "completed", local_path=output_path)
                            
@@ -902,16 +1203,25 @@ if st.session_state.view_mode == "workspace":
                            # 智能匹配 BGM：根据脚本 bgm_style 匹配
                            bgm_style = st.session_state.script_data.get("bgm_style", "")
                            bgm_path = match_bgm_by_style(bgm_style, config.ASSETS_DIR / "bgm")
+                            if bgm_path and not Path(bgm_path).exists():
+                                st.warning(f"推荐的 BGM 文件不存在：{bgm_path}（本次将不含 BGM）")
+                                bgm_path = None
                            
                            try:
                                # 首次合成也加上时间戳
                                output_name = f"final_{st.session_state.project_id}_{int(time.time())}"
+                                t0 = perf_counter()
                                output_path = composer.compose_from_script(
                                    script=st.session_state.script_data,
                                    video_map=st.session_state.scene_videos,
                                    bgm_path=bgm_path,
-                                    output_name=output_name
+                                    output_name=output_name,
+                                    project_id=st.session_state.project_id,
                                )
+                                _record_metrics(st.session_state.project_id, {
+                                    "compose_s": round(perf_counter() - t0, 3),
+                                    "bgm_used": bool(bgm_path and Path(bgm_path).exists()),
+                                })
                                st.session_state.final_video = output_path
                                db.save_asset(st.session_state.project_id, 0, "final_video", "completed", local_path=output_path)
                                db.update_project_status(st.session_state.project_id, "completed")
--- a/modules/composer.py
+++ b/modules/composer.py
@@ -11,6 +11,7 @@ from typing import Dict, Any, List, Optional, Union
 import config
 from modules import ffmpeg_utils, fancy_text, factory, storage
 from modules.text_renderer import renderer
+from modules import path_utils

 logger = logging.getLogger(__name__)

@@ -65,6 +66,7 @@ class VideoComposer:
        bgm_path: str = None,
        bgm_volume: float = 0.15,
        output_name: str = None,
+        project_id: Optional[str] = None,
        upload_to_r2: bool = False
    ) -> str:
        """
@@ -89,25 +91,27 @@ class VideoComposer:
        
        timestamp = int(time.time())
        output_name = output_name or f"composed_{timestamp}"
+        # Per-project temp dir to avoid cross-project overwrites
+        temp_root = path_utils.project_compose_dir(project_id, output_name) if project_id else config.TEMP_DIR
        
        logger.info(f"Starting composition: {len(video_paths)} videos")
        
        try:
            # Step 1: 拼接视频
-            merged_path = str(config.TEMP_DIR / f"{output_name}_merged.mp4")
+            merged_path = str(Path(temp_root) / f"{output_name}_merged.mp4")
            ffmpeg_utils.concat_videos(video_paths, merged_path, self.target_size)
            self._add_temp(merged_path)
            current_video = merged_path

            # Step 1.1: 若无音轨，补一条静音底，避免后续滤镜找不到 0:a
-            silent_path = str(config.TEMP_DIR / f"{output_name}_silent.mp4")
+            silent_path = str(Path(temp_root) / f"{output_name}_silent.mp4")
            ffmpeg_utils.add_silence_audio(current_video, silent_path)
            self._add_temp(silent_path)
            current_video = silent_path
            
            # Step 2: 添加字幕 (白字黑边，无底框，水平居中)
            if subtitles:
-                subtitled_path = str(config.TEMP_DIR / f"{output_name}_subtitled.mp4")
+                subtitled_path = str(Path(temp_root) / f"{output_name}_subtitled.mp4")
                subtitle_style = {
                    "font": ffmpeg_utils._get_font_path(),
                    "fontsize": 60,
@@ -169,7 +173,7 @@ class VideoComposer:
                        "duration": ft.get("duration", 999)
                    })
                
-                fancy_path = str(config.TEMP_DIR / f"{output_name}_fancy.mp4")
+                fancy_path = str(Path(temp_root) / f"{output_name}_fancy.mp4")
                ffmpeg_utils.overlay_multiple_images(
                    current_video, overlay_configs, fancy_path
                )
@@ -178,13 +182,15 @@ class VideoComposer:
            
            # Step 4: 生成并混合旁白（火山 WS 优先，失败回退 Edge）
            if voiceover_text:
+                vo_out = str(Path(temp_root) / f"{output_name}_vo_full.mp3")
                vo_path = factory.generate_voiceover_volcengine(
                    text=voiceover_text,
-                    voice_type=self.voice_type
+                    voice_type=self.voice_type,
+                    output_path=vo_out,
                )
                self._add_temp(vo_path)
                
-                voiced_path = str(config.TEMP_DIR / f"{output_name}_voiced.mp4")
+                voiced_path = str(Path(temp_root) / f"{output_name}_voiced.mp4")
                ffmpeg_utils.mix_audio(
                    current_video, vo_path, voiced_path,
                    audio_volume=1.5,
@@ -195,12 +201,12 @@ class VideoComposer:
            
            elif voiceover_segments:
                current_video = self._add_segmented_voiceover(
-                    current_video, voiceover_segments, output_name
+                    current_video, voiceover_segments, output_name, Path(temp_root)
                )
            
            # Step 5: 添加BGM（淡入淡出，若 duck 失败会自动退回低音量混合）
            if bgm_path:
-                bgm_output = str(config.TEMP_DIR / f"{output_name}_bgm.mp4")
+                bgm_output = str(Path(temp_root) / f"{output_name}_bgm.mp4")
                ffmpeg_utils.add_bgm(
                    current_video, bgm_path, bgm_output,
                    bgm_volume=bgm_volume,
@@ -237,7 +243,8 @@ class VideoComposer:
        self,
        video_path: str,
        segments: List[Dict[str, Any]],
-        output_name: str
+        output_name: str,
+        temp_root: Path,
    ) -> str:
        """添加分段旁白"""
        if not segments:
@@ -254,7 +261,7 @@ class VideoComposer:
            audio_path = factory.generate_voiceover_volcengine(
                text=text,
                voice_type=voice,
-                output_path=str(config.TEMP_DIR / f"{output_name}_seg_{i}.mp3")
+                output_path=str(temp_root / f"{output_name}_seg_{i}.mp3")
            )
            
            if audio_path:
@@ -270,7 +277,7 @@ class VideoComposer:
        # 依次混入音频
        current = video_path
        for i, af in enumerate(audio_files):
-            output = str(config.TEMP_DIR / f"{output_name}_seg_mixed_{i}.mp4")
+            output = str(temp_root / f"{output_name}_seg_mixed_{i}.mp4")
            ffmpeg_utils.mix_audio(
                current, af["path"], output,
                audio_volume=1.0,
@@ -287,7 +294,8 @@ class VideoComposer:
        script: Dict[str, Any],
        video_map: Dict[int, str],
        bgm_path: str = None,
-        output_name: str = None
+        output_name: str = None,
+        project_id: Optional[str] = None,
    ) -> str:
        """
        基于生成脚本和视频映射进行合成
@@ -340,13 +348,30 @@ class VideoComposer:
                            # 无 background，不加底框
                        }

+                        # 让花字时长默认跟随镜头（不改 prompt，仅纠正过短/缺失 duration）
+                        start_in_scene = float(ft.get("start_time", 0) or 0.0)
+                        if start_in_scene < 0:
+                            start_in_scene = 0.0
+                        if start_in_scene >= duration:
+                            start_in_scene = 0.0
+                        ft_dur = ft.get("duration", None)
+                        try:
+                            ft_dur_val = float(ft_dur) if ft_dur is not None else None
+                        except Exception:
+                            ft_dur_val = None
+                        # If too short, extend to scene end
+                        if ft_dur_val is None or ft_dur_val < 1.5:
+                            ft_dur_val = max(duration - start_in_scene, 1.5)
+                        # Clamp within scene duration
+                        ft_dur_val = max(0.5, min(ft_dur_val, duration))
+
                        fancy_texts.append({
                            "text": text,
                            "style": fixed_style,
                            "x": "(W-w)/2",  # 居中
                            "y": "180",      # 上半区域
-                            "start": total_duration + float(ft.get("start_time", 0)),
-                            "duration": float(ft.get("duration", duration))
+                            "start": total_duration + start_in_scene,
+                            "duration": ft_dur_val
                        })
            
            total_duration += duration
@@ -354,15 +379,16 @@ class VideoComposer:
        # 2. 拼接视频
        timestamp = int(time.time())
        output_name = output_name or f"composed_{timestamp}"
+        temp_root = path_utils.project_compose_dir(project_id, output_name) if project_id else config.TEMP_DIR
        
-        merged_path = str(config.TEMP_DIR / f"{output_name}_merged.mp4")
+        merged_path = str(Path(temp_root) / f"{output_name}_merged.mp4")
        ffmpeg_utils.concat_videos(video_paths, merged_path, self.target_size)
        self._add_temp(merged_path)
        current_video = merged_path
        
        # 3. 处理整体旁白时间轴 (New Logic)
        voiceover_timeline = script.get("voiceover_timeline", [])
-        mixed_audio_path = str(config.TEMP_DIR / f"{output_name}_mixed_vo.mp3")
+        mixed_audio_path = str(Path(temp_root) / f"{output_name}_mixed_vo.mp3")
        
        # 初始化静音底轨 (长度为 total_duration)
        ffmpeg_utils._run_ffmpeg([
@@ -401,17 +427,17 @@ class VideoComposer:
                tts_path = factory.generate_voiceover_volcengine(
                    text=text,
                    voice_type=self.voice_type,
-                    output_path=str(config.TEMP_DIR / f"{output_name}_vo_{i}.mp3")
+                    output_path=str(Path(temp_root) / f"{output_name}_vo_{i}.mp3")
                )
                self._add_temp(tts_path)
                
                # 调整时长
-                adjusted_path = str(config.TEMP_DIR / f"{output_name}_vo_adj_{i}.mp3")
+                adjusted_path = str(Path(temp_root) / f"{output_name}_vo_adj_{i}.mp3")
                ffmpeg_utils.adjust_audio_duration(tts_path, target_duration, adjusted_path)
                self._add_temp(adjusted_path)
                
                # 混合到总音轨
-                new_mixed = str(config.TEMP_DIR / f"{output_name}_mixed_{i}.mp3")
+                new_mixed = str(Path(temp_root) / f"{output_name}_mixed_{i}.mp3")
                ffmpeg_utils.mix_audio_at_offset(mixed_audio_path, adjusted_path, target_start, new_mixed)
                mixed_audio_path = new_mixed # Update current mixed path
                self._add_temp(new_mixed)
@@ -425,7 +451,7 @@ class VideoComposer:
                })
        
        # 4. 将合成好的旁白混入视频
-        voiced_path = str(config.TEMP_DIR / f"{output_name}_voiced.mp4")
+        voiced_path = str(Path(temp_root) / f"{output_name}_voiced.mp4")
        ffmpeg_utils.mix_audio(
            current_video, mixed_audio_path, voiced_path,
            audio_volume=1.5,
@@ -436,7 +462,7 @@ class VideoComposer:
        
        # 5. 添加字幕 (使用新的 ffmpeg_utils.add_multiple_subtitles)
        if subtitles:
-            subtitled_path = str(config.TEMP_DIR / f"{output_name}_subtitled.mp4")
+            subtitled_path = str(Path(temp_root) / f"{output_name}_subtitled.mp4")
            subtitle_style = {
                "font": ffmpeg_utils._get_font_path(),
                "fontsize": 60,
@@ -455,7 +481,7 @@ class VideoComposer:
            
        # 6. 添加花字
        if fancy_texts:
-            fancy_path = str(config.TEMP_DIR / f"{output_name}_fancy.mp4")
+            fancy_path = str(Path(temp_root) / f"{output_name}_fancy.mp4")
            
            overlay_configs = []
            for ft in fancy_texts:
@@ -477,7 +503,7 @@ class VideoComposer:
            
        # 7. 添加 BGM
        if bgm_path:
-            bgm_output = str(config.TEMP_DIR / f"{output_name}_bgm.mp4")
+            bgm_output = str(Path(temp_root) / f"{output_name}_bgm.mp4")
            ffmpeg_utils.add_bgm(
                current_video, bgm_path, bgm_output,
                bgm_volume=0.15
--- a/modules/db_manager.py
+++ b/modules/db_manager.py
@@ -113,6 +113,25 @@ class DBManager:
        finally:
            session.close()

+    def update_project_product_info(self, project_id: str, product_info: Dict[str, Any]):
+        """
+        Update project.product_info JSON (read-write with Postgres shared DB).
+        Used to persist editor state without changing schema.
+        """
+        session = self._get_session()
+        try:
+            project = session.query(Project).filter_by(id=project_id).first()
+            if project:
+                project.product_info = json.dumps(product_info, ensure_ascii=False)
+                project.updated_at = time.time()
+                session.commit()
+        except Exception as e:
+            session.rollback()
+            logger.error(f"Error updating product_info: {e}")
+            raise
+        finally:
+            session.close()
+
    def update_project_status(self, project_id: str, status: str):
        session = self._get_session()
        try:
@@ -260,6 +279,35 @@ class DBManager:
        finally:
            session.close()

+    def update_asset_metadata(self, project_id: str, scene_id: int, asset_type: str, patch: Dict[str, Any]) -> None:
+        """Merge-patch asset.metadata JSON without overwriting other fields."""
+        if not patch:
+            return
+        session = self._get_session()
+        try:
+            asset = session.query(SceneAsset).filter_by(
+                project_id=project_id,
+                scene_id=scene_id,
+                asset_type=asset_type
+            ).first()
+            if not asset:
+                return
+            try:
+                existing = json.loads(asset.metadata_json) if asset.metadata_json else {}
+            except Exception:
+                existing = {}
+            if not isinstance(existing, dict):
+                existing = {}
+            existing.update(patch)
+            asset.metadata_json = json.dumps(existing, ensure_ascii=False)
+            asset.updated_at = time.time()
+            session.commit()
+        except Exception as e:
+            session.rollback()
+            logger.error(f"Error updating asset metadata: {e}")
+        finally:
+            session.close()
+
    # --- Config/Prompt Operations ---

    def get_config(self, key: str, default: Any = None) -> Any:
--- a/modules/factory.py
+++ b/modules/factory.py
@@ -697,8 +697,12 @@ def generate_voiceover_volcengine_long(
    
    # 生成每段音频
    chunk_files = []
+    # Keep temp artifacts near output_path when provided to avoid cross-project collisions
+    base_tmp_dir = Path(output_path).parent if output_path else config.TEMP_DIR
+    base_tmp_dir.mkdir(parents=True, exist_ok=True)
    for i, chunk in enumerate(chunks):
-        chunk_path = str(config.TEMP_DIR / f"vo_chunk_{i}_{int(time.time())}.mp3")
+        import uuid
+        chunk_path = str(base_tmp_dir / f"vo_chunk_{i}_{int(time.time() * 1000)}_{uuid.uuid4().hex[:8]}.mp3")
        try:
            path = generate_voiceover_volcengine(
                text=chunk,
@@ -723,13 +727,14 @@ def generate_voiceover_volcengine_long(
        return chunk_files[0]
    
    # 创建合并文件列表
-    concat_list = config.TEMP_DIR / f"concat_audio_{os.getpid()}.txt"
+    import uuid
+    concat_list = base_tmp_dir / f"concat_audio_{int(time.time() * 1000)}_{uuid.uuid4().hex[:8]}.txt"
    with open(concat_list, "w") as f:
        for cf in chunk_files:
            f.write(f"file '{cf}'\n")
    
    if not output_path:
-        output_path = str(config.TEMP_DIR / f"vo_volc_merged_{int(time.time())}.mp3")
+        output_path = str(base_tmp_dir / f"vo_volc_merged_{int(time.time() * 1000)}_{uuid.uuid4().hex[:8]}.mp3")
    
    # FFmpeg 合并
    import subprocess
--- a/modules/ffmpeg_utils.py
+++ b/modules/ffmpeg_utils.py
@@ -7,6 +7,7 @@ import re
 import subprocess
 import tempfile
 import logging
+import shutil
 from pathlib import Path
 from typing import List, Dict, Any, Optional, Tuple

@@ -14,9 +15,39 @@ import config

 logger = logging.getLogger(__name__)

-# FFmpeg/FFprobe 路径（优先使用项目内的二进制）
-FFMPEG_PATH = str(config.BASE_DIR / "bin" / "ffmpeg") if (config.BASE_DIR / "bin" / "ffmpeg").exists() else "ffmpeg"
-FFPROBE_PATH = str(config.BASE_DIR / "bin" / "ffprobe") if (config.BASE_DIR / "bin" / "ffprobe").exists() else "ffprobe"
+def _pick_exec(preferred_path: str, fallback_name: str) -> str:
+    """
+    Pick an executable path.
+
+    Why:
+    - In docker, /app/bin may accidentally contain binaries built for another OS/arch,
+      causing `Exec format error` at runtime (seen on /app/bin/ffprobe).
+    Strategy:
+    - Prefer preferred_path if it exists AND is runnable.
+    - Otherwise fall back to PATH-resolved command (fallback_name).
+    """
+    if preferred_path and os.path.exists(preferred_path):
+        try:
+            # Validate it can be executed (arch OK) and is a real binary.
+            # ffmpeg/ffprobe both support `-version`.
+            result = subprocess.run(
+                [preferred_path, "-version"],
+                capture_output=True,
+                text=True,
+            )
+            if result.returncode == 0:
+                return preferred_path
+        except OSError:
+            # Exec format error / permission error -> fall back
+            pass
+
+    resolved = shutil.which(fallback_name)
+    return resolved or fallback_name
+
+
+# FFmpeg/FFprobe 路径（优先使用项目内的二进制，但会做可执行性自检）
+FFMPEG_PATH = _pick_exec(str(config.BASE_DIR / "bin" / "ffmpeg"), "ffmpeg")
+FFPROBE_PATH = _pick_exec(str(config.BASE_DIR / "bin" / "ffprobe"), "ffprobe")

 # 字体路径：优先使用项目内置字体，然后按平台回退到系统字体
 DEFAULT_FONT_PATHS = [
@@ -159,15 +190,6 @@ def concat_videos(
    
    logger.info(f"Concatenating {len(video_paths)} videos...")
    
-    # 创建 concat 文件列表
-    concat_file = config.TEMP_DIR / f"concat_{os.getpid()}.txt"
-    
-    with open(concat_file, "w", encoding="utf-8") as f:
-        for vp in video_paths:
-            # 使用绝对路径并转义单引号
-            abs_path = os.path.abspath(vp)
-            f.write(f"file '{abs_path}'\n")
-    
    width, height = target_size
    
    # 使用 filter_complex 统一分辨率后拼接
@@ -203,10 +225,6 @@ def concat_videos(
    
    _run_ffmpeg(cmd)
    
-    # 清理临时文件
-    if concat_file.exists():
-        concat_file.unlink()
-    
    logger.info(f"Concatenated video saved: {output_path}")
    return output_path

@@ -825,10 +843,10 @@ def add_bgm(
        bgm_volume: BGM音量
        loop: 是否循环BGM
    """
-    # 验证 BGM 文件存在
+    # 验证 BGM 文件存在（默认保持兼容：仍会输出视频，但会明确打日志）
    if not bgm_path or not os.path.exists(bgm_path):
-        logger.error(f"BGM file not found: {bgm_path}")
-        # 直接复制原视频，不添加 BGM
+        logger.error(f"BGM file not found (skip add_bgm): {bgm_path}")
+        # 直接复制原视频，不添加 BGM（上层应当提示用户/写入 metadata）
        import shutil
        shutil.copy(video_path, output_path)
        return output_path
--- a/modules/image_gen.py
+++ b/modules/image_gen.py
@@ -15,9 +15,52 @@ import io
 from modules import storage

 import config
+from modules import path_utils

 logger = logging.getLogger(__name__)

+
+def _env_int(name: str, default: int) -> int:
+    try:
+        return int(os.getenv(name, str(default)))
+    except Exception:
+        return default
+
+
+# Tunables: slow channels can be hot; default conservative but adjustable.
+IMG_SUBMIT_TIMEOUT_S = _env_int("IMG_SUBMIT_TIMEOUT_S", 180)
+IMG_POLL_TIMEOUT_S = _env_int("IMG_POLL_TIMEOUT_S", 30)
+IMG_MAX_RETRIES = _env_int("IMG_MAX_RETRIES", 3)
+IMG_POLL_INTERVAL_S = _env_int("IMG_POLL_INTERVAL_S", 2)
+IMG_POLL_MAX_RETRIES = _env_int("IMG_POLL_MAX_RETRIES", 90)  # 90*2s ~= 180s
+
+
+def _is_retryable_exception(e: Exception) -> bool:
+    # Network / transient errors
+    if isinstance(e, (requests.Timeout, requests.ConnectionError)):
+        return True
+    msg = str(e).lower()
+    # Transient provider errors often contain these keywords
+    if any(k in msg for k in ["timeout", "temporarily", "temporarily unavailable", "gateway", "rate", "try again"]):
+        return True
+    return False
+
+
+def _with_retries(fn, *, max_retries: int, label: str):
+    last = None
+    for attempt in range(1, max_retries + 1):
+        try:
+            return fn()
+        except Exception as e:
+            last = e
+            retryable = _is_retryable_exception(e)
+            logger.warning(f"[{label}] attempt {attempt}/{max_retries} failed: {e} (retryable={retryable})")
+            if not retryable or attempt >= max_retries:
+                raise
+            # small backoff
+            time.sleep(min(2 ** (attempt - 1), 4))
+    raise last  # pragma: no cover
+
 class ImageGenerator:
    """连贯图片生成器 (Volcengine Provider)"""
    
@@ -51,7 +94,8 @@ class ImageGenerator:
        original_image_path: Any, 
        previous_image_path: Optional[str] = None,
        model_provider: str = "shubiaobiao", # "shubiaobiao", "gemini", "doubao"
-        visual_anchor: str = ""  # 视觉锚点，强制拼接到 prompt 前
+        visual_anchor: str = "",  # 视觉锚点，强制拼接到 prompt 前
+        project_id: Optional[str] = None,
    ) -> Optional[str]:
        """
        生成单张分镜图片 (Public)
@@ -78,11 +122,19 @@ class ImageGenerator:
            input_images.append(previous_image_path)
            
        try:
+            out_dir = path_utils.project_images_dir(project_id) if project_id else config.TEMP_DIR
+            out_name = path_utils.unique_filename(
+                prefix="scene_image",
+                ext="png",
+                project_id=project_id,
+                scene_id=scene_id,
+            )
            output_path = self._generate_single_image(
                prompt=visual_prompt,
                reference_images=input_images, 
-                output_filename=f"scene_{scene_id}_{int(time.time())}.png",
-                provider=model_provider
+                output_filename=out_name,
+                provider=model_provider,
+                output_dir=out_dir,
            )
            
            if output_path:
@@ -101,7 +153,8 @@ class ImageGenerator:
        self,
        scenes: List[Dict[str, Any]],
        reference_images: List[str],
-        visual_anchor: str = ""  # 视觉锚点
+        visual_anchor: str = "",  # 视觉锚点
+        project_id: Optional[str] = None,
    ) -> Dict[int, str]:
        """
        Doubao 组图生成 (Batch) - 拼接 Prompt 一次生成多张
@@ -187,7 +240,15 @@ class ImageGenerator:
                        if image_url:
                            # Download
                            img_resp = requests.get(image_url, timeout=60)
-                            output_path = config.TEMP_DIR / f"scene_{scene_id}_{int(time.time())}.png"
+                            out_dir = path_utils.project_images_dir(project_id) if project_id else config.TEMP_DIR
+                            out_name = path_utils.unique_filename(
+                                prefix="scene_image",
+                                ext="png",
+                                project_id=project_id,
+                                scene_id=scene_id,
+                                extra="group",
+                            )
+                            output_path = out_dir / out_name
                            with open(output_path, "wb") as f:
                                f.write(img_resp.content)
                            results[scene_id] = str(output_path)
@@ -203,21 +264,24 @@ class ImageGenerator:
        prompt: str, 
        reference_images: List[str],
        output_filename: str,
-        provider: str = "shubiaobiao"
+        provider: str = "shubiaobiao",
+        output_dir: Optional[Path] = None,
    ) -> Optional[str]:
        """统一入口"""
+        out_dir = output_dir or config.TEMP_DIR
        if provider == "doubao":
-            return self._generate_single_image_doubao(prompt, reference_images, output_filename)
+            return self._generate_single_image_doubao(prompt, reference_images, output_filename, out_dir)
        elif provider == "gemini":
-            return self._generate_single_image_gemini(prompt, reference_images, output_filename)
+            return self._generate_single_image_gemini(prompt, reference_images, output_filename, out_dir)
        else:
-            return self._generate_single_image_shubiao(prompt, reference_images, output_filename)
+            return self._generate_single_image_shubiao(prompt, reference_images, output_filename, out_dir)

    def _generate_single_image_doubao(
        self,
        prompt: str,
        reference_images: List[str],
-        output_filename: str
+        output_filename: str,
+        output_dir: Path
    ) -> Optional[str]:
        """调用 Volcengine Doubao (Image API)"""
        
@@ -255,9 +319,9 @@ class ImageGenerator:
            "Authorization": f"Bearer {config.VOLC_API_KEY}"
        }
        
-        try:
+        def _call():
            logger.info(f"Submitting to Doubao Image: {self.endpoint}")
-            resp = requests.post(self.endpoint, json=payload, headers=headers, timeout=180) 
+            resp = requests.post(self.endpoint, json=payload, headers=headers, timeout=IMG_SUBMIT_TIMEOUT_S)
            
            if resp.status_code != 200:
                msg = f"Doubao Image Failed ({resp.status_code}): {resp.text}"
@@ -272,22 +336,20 @@ class ImageGenerator:
                    img_resp = requests.get(image_url, timeout=60)
                    img_resp.raise_for_status()
                    
-                    output_path = config.TEMP_DIR / output_filename
+                    output_path = output_dir / output_filename
                    with open(output_path, "wb") as f:
                        f.write(img_resp.content)
                    return str(output_path)
            
            raise RuntimeError(f"No image URL in Doubao response: {data}")
-            
-        except Exception as e:
-            logger.error(f"Doubao Gen Failed: {e}")
-            raise e
+        return _with_retries(_call, max_retries=IMG_MAX_RETRIES, label="doubao_image")

    def _generate_single_image_shubiao(
        self,
        prompt: str,
        reference_images: List[str],
-        output_filename: str
+        output_filename: str,
+        output_dir: Path
    ) -> Optional[str]:
        """调用 api2img.shubiaobiao.com 通道生成图片（同步返回 base64）"""
        # 准备参考图，内联 base64 方式
@@ -338,9 +400,9 @@ class ImageGenerator:
            "Content-Type": "application/json"
        }
        
-        try:
+        def _call():
            logger.info(f"Submitting to Shubiaobiao Img: {endpoint}")
-            resp = requests.post(endpoint, json=payload, headers=headers, timeout=120)
+            resp = requests.post(endpoint, json=payload, headers=headers, timeout=IMG_SUBMIT_TIMEOUT_S)
            
            if resp.status_code != 200:
                msg = f"Shubiaobiao 提交失败 ({resp.status_code}): {resp.text}"
@@ -365,22 +427,20 @@ class ImageGenerator:
                logger.error(msg)
                raise RuntimeError(msg)
            
-            output_path = config.TEMP_DIR / output_filename
+            output_path = output_dir / output_filename
            with open(output_path, "wb") as f:
                f.write(base64.b64decode(img_b64))
                
            logger.info(f"Shubiaobiao Generation Success: {output_path}")
            return str(output_path)
-            
-        except Exception as e:
-            logger.error(f"Shubiaobiao Generation Exception: {e}")
-            raise
+        return _with_retries(_call, max_retries=IMG_MAX_RETRIES, label="shubiaobiao_image")

    def _generate_single_image_gemini(
        self,
        prompt: str,
        reference_images: List[str],
-        output_filename: str
+        output_filename: str,
+        output_dir: Path
    ) -> Optional[str]:
        """调用 Gemini (Wuyin Keji / NanoBanana-Pro) 生成单张图片"""
        
@@ -420,10 +480,10 @@ class ImageGenerator:
            "Content-Type": "application/json;charset:utf-8"
        }
        
-        # 2. 提交任务
-        try:
+        def _call():
+            # 2. 提交任务
            logger.info(f"Submitting to Gemini: {config.GEMINI_IMG_API_URL}")
-            resp = requests.post(config.GEMINI_IMG_API_URL, json=payload, headers=headers, timeout=30)
+            resp = requests.post(config.GEMINI_IMG_API_URL, json=payload, headers=headers, timeout=IMG_SUBMIT_TIMEOUT_S)
            
            if resp.status_code != 200:
                msg = f"Gemini 提交失败 ({resp.status_code}): {resp.text}"
@@ -443,13 +503,12 @@ class ImageGenerator:
            logger.info(f"Gemini Task Submitted, ID: {task_id}")
            
            # 3. 轮询状态
-            max_retries = 60
-            for i in range(max_retries):
-                time.sleep(2)
+            for _ in range(IMG_POLL_MAX_RETRIES):
+                time.sleep(IMG_POLL_INTERVAL_S)
                
                poll_url = f"{config.GEMINI_IMG_DETAIL_URL}?key={config.GEMINI_IMG_KEY}&id={task_id}"
                try:
-                    poll_resp = requests.get(poll_url, headers=headers, timeout=30)
+                    poll_resp = requests.get(poll_url, headers=headers, timeout=IMG_POLL_TIMEOUT_S)
                except requests.Timeout:
                    continue
                except Exception as e:
@@ -474,7 +533,7 @@ class ImageGenerator:
                    img_resp = requests.get(image_url, timeout=60)
                    img_resp.raise_for_status()
                    
-                    output_path = config.TEMP_DIR / output_filename
+                    output_path = output_dir / output_filename
                    with open(output_path, "wb") as f:
                        f.write(img_resp.content)
                        
@@ -485,7 +544,4 @@ class ImageGenerator:
                    raise RuntimeError(f"Gemini 生成失败: {fail_reason}")

            raise RuntimeError("Gemini 生成超时")
-            
-        except Exception as e:
-            logger.error(f"Gemini Generation Exception: {e}")
-            raise
+        return _with_retries(_call, max_retries=IMG_MAX_RETRIES, label="gemini_image")
--- a/modules/legacy_normalizer.py
+++ b/modules/legacy_normalizer.py
@@ -0,0 +1,248 @@
+"""
+Legacy project JSON normalizer.
+
+Goal:
+- Convert legacy project JSON (from /opt/gloda-factory/temp/project_*.json)
+  into the script_data schema expected by current Streamlit UI (`app.py`)
+  and composer (`modules/composer.py`).
+
+Principles:
+- Pure rule-based, no AI generation.
+- Never drop legacy information: keep full raw doc under `script_data["_legacy"]`
+  and per-scene under `scene["_legacy"]`.
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import Any, Dict, List, Optional, Tuple
+
+
+def _as_str(v: Any) -> str:
+    return v if isinstance(v, str) else ""
+
+
+def _as_dict(v: Any) -> Dict[str, Any]:
+    return v if isinstance(v, dict) else {}
+
+
+def _as_list(v: Any) -> List[Any]:
+    return v if isinstance(v, list) else []
+
+
+def _detect_schema_variant(doc: Dict[str, Any]) -> str:
+    scenes = _as_list(doc.get("scenes"))
+    if not scenes:
+        return "Unknown"
+    prompt_keys = {"image_prompt", "visual_prompt", "video_prompt"}
+    for s in scenes:
+        if isinstance(s, dict) and (set(s.keys()) & prompt_keys):
+            return "Schema_A"
+    typical_b = {"keyframe", "story_beat", "camera_movement", "image_url"}
+    for s in scenes:
+        if isinstance(s, dict) and (set(s.keys()) & typical_b):
+            return "Schema_B"
+    return "Unknown"
+
+
+def _derive_visual_prompt_from_keyframe(scene: Dict[str, Any]) -> str:
+    """
+    Build a readable prompt-like summary from keyframe + story_beat.
+    This is NOT an AI prompt; it's a structured description to avoid empty fields.
+    """
+    keyframe = _as_dict(scene.get("keyframe") or scene.get("keyframes"))
+    story_beat = _as_str(scene.get("story_beat"))
+
+    parts: List[str] = []
+    if keyframe:
+        parts.append("[DerivedFromKeyframe]")
+        # deterministic ordering for readability
+        for k in sorted(keyframe.keys()):
+            v = keyframe.get(k)
+            if isinstance(v, (str, int, float)) and str(v).strip():
+                parts.append(f"{k}: {v}")
+            elif isinstance(v, dict) and v:
+                # flatten one level
+                sub = ", ".join(f"{sk}={sv}" for sk, sv in sorted(v.items()) if str(sv).strip())
+                if sub:
+                    parts.append(f"{k}: {sub}")
+    if story_beat:
+        parts.append(f"story_beat: {story_beat}")
+    return "\n".join(parts).strip()
+
+
+def _derive_video_prompt_from_motion(scene: Dict[str, Any]) -> str:
+    camera_movement = _as_str(scene.get("camera_movement"))
+    rhythm = scene.get("rhythm")
+    story_beat = _as_str(scene.get("story_beat"))
+
+    parts: List[str] = []
+    parts.append("[DerivedFromMotion]")
+    if camera_movement:
+        parts.append(f"camera_movement: {camera_movement}")
+    if isinstance(rhythm, dict) and rhythm:
+        # keep stable keys
+        sub = ", ".join(f"{k}={rhythm.get(k)}" for k in sorted(rhythm.keys()))
+        parts.append(f"rhythm: {sub}")
+    if story_beat:
+        parts.append(f"story_beat: {story_beat}")
+    return "\n".join(parts).strip()
+
+
+def _normalize_fancy_text(scene: Dict[str, Any], default_duration: float) -> Dict[str, Any]:
+    ft = scene.get("fancy_text")
+    if isinstance(ft, dict):
+        # Ensure required keys exist
+        out = dict(ft)
+        out.setdefault("text", "")
+        out.setdefault("style", "highlight")
+        # support either position dict or string
+        if "position" not in out:
+            out["position"] = "center"
+        out.setdefault("start_time", 0.0)
+        out.setdefault("duration", default_duration)
+        return out
+
+    # legacy doesn't have fancy_text
+    return {
+        "text": "",
+        "style": "highlight",
+        "position": "center",
+        "start_time": 0.0,
+        "duration": default_duration,
+    }
+
+
+def _build_voiceover_timeline_from_scenes(normalized_scenes: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+    timeline: List[Dict[str, Any]] = []
+    t = 0.0
+    for idx, s in enumerate(normalized_scenes):
+        dur = float(s.get("duration") or 0) or 0.0
+        legacy = _as_dict(s.get("_legacy"))
+        vo = _as_str(legacy.get("voiceover") or s.get("voiceover") or "")
+        if vo.strip():
+            timeline.append(
+                {
+                    "id": idx + 1,
+                    "text": vo,
+                    "subtitle": vo,
+                    "start_time": t,
+                    "duration": dur if dur > 0 else 3.0,
+                }
+            )
+        t += dur if dur > 0 else 0.0
+    return timeline
+
+
+def normalize_legacy_project(doc: Dict[str, Any]) -> Dict[str, Any]:
+    """
+    Return a script_data dict compatible with current UI.
+    """
+    schema = _detect_schema_variant(doc)
+
+    scenes_in = _as_list(doc.get("scenes"))
+    normalized_scenes: List[Dict[str, Any]] = []
+
+    for s in scenes_in:
+        if not isinstance(s, dict):
+            continue
+
+        scene_id = int(s.get("id") or (len(normalized_scenes) + 1))
+        duration = float(s.get("duration") or 0) or 0.0
+        if duration <= 0:
+            duration = 3.0
+
+        # visual prompt
+        visual_prompt = ""
+        if schema == "Schema_A":
+            # legacy key is usually image_prompt
+            visual_prompt = _as_str(s.get("visual_prompt") or s.get("image_prompt") or "")
+        elif schema == "Schema_B":
+            visual_prompt = _derive_visual_prompt_from_keyframe(s)
+        else:
+            visual_prompt = _as_str(s.get("visual_prompt") or s.get("image_prompt") or "")
+
+        if not visual_prompt and s.get("keyframe"):
+            visual_prompt = _derive_visual_prompt_from_keyframe(s)
+
+        # video prompt
+        video_prompt = _as_str(s.get("video_prompt") or "")
+        if not video_prompt:
+            video_prompt = _derive_video_prompt_from_motion(s)
+
+        # fancy text (default safe)
+        fancy_text = _normalize_fancy_text(s, default_duration=duration)
+
+        normalized_scene: Dict[str, Any] = {
+            "id": scene_id,
+            "duration": duration,
+            "visual_prompt": visual_prompt,
+            "video_prompt": video_prompt,
+            "fancy_text": fancy_text,
+            # keep optional fields if present
+            "timeline": s.get("timeline", ""),
+        }
+
+        # Attach per-scene legacy snapshot (do not mutate the original)
+        normalized_scene["_legacy"] = {
+            "schema": schema,
+            "image_url": s.get("image_url"),
+            "keyframe": s.get("keyframe") or s.get("keyframes"),
+            "camera_movement": s.get("camera_movement"),
+            "story_beat": s.get("story_beat"),
+            "rhythm": s.get("rhythm"),
+            "sound_design": s.get("sound_design"),
+            "voiceover": s.get("voiceover"),
+        }
+
+        normalized_scenes.append(normalized_scene)
+
+    # voiceover timeline: normalize existing if present, else derive from scenes voiceover
+    vtl = doc.get("voiceover_timeline")
+    voiceover_timeline: List[Dict[str, Any]] = []
+    if isinstance(vtl, list) and vtl:
+        for idx, it in enumerate(vtl):
+            if not isinstance(it, dict):
+                continue
+            # unify field names
+            text = _as_str(it.get("text") or it.get("voiceover") or "")
+            subtitle = _as_str(it.get("subtitle") or text)
+            start_time = float(it.get("start_time") or 0.0)
+            duration = float(it.get("duration") or 3.0)
+            voiceover_timeline.append(
+                {
+                    "id": int(it.get("id") or (idx + 1)),
+                    "text": text,
+                    "subtitle": subtitle,
+                    "start_time": start_time,
+                    "duration": duration,
+                }
+            )
+    else:
+        voiceover_timeline = _build_voiceover_timeline_from_scenes(normalized_scenes)
+
+    # script_data expected by UI
+    script_data: Dict[str, Any] = {
+        "hook": doc.get("hook", ""),
+        "selling_points": doc.get("selling_points", []) or [],
+        "target_audience": doc.get("target_audience", "") or "",
+        "video_style": doc.get("video_style", "") or "",
+        "bgm_style": doc.get("bgm_style", "") or "",
+        "voiceover_timeline": voiceover_timeline,
+        "scenes": normalized_scenes,
+        "cta": doc.get("cta", ""),
+        # Keep analysis for UI fallback display
+        "analysis": doc.get("analysis", ""),
+        # Preserve original
+        "_legacy": doc,
+        "_legacy_schema": schema,
+    }
+
+    return script_data
+
+
+
+
+
+
+
--- a/modules/legacy_path_mapper.py
+++ b/modules/legacy_path_mapper.py
@@ -0,0 +1,66 @@
+"""
+Legacy path mapper for assets generated by the 8502 runtime (/root/video-flow).
+
+Problem:
+- Postgres `scene_assets.local_path` may contain paths like `/root/video-flow/temp/...`
+  which are not visible inside docker containers running 8503 stack.
+
+Solution:
+- Mount host directories into containers (e.g. /legacy/temp, /legacy/output)
+- Map legacy host paths -> container paths, and produce static URLs accordingly.
+"""
+
+from __future__ import annotations
+
+import os
+from pathlib import Path
+from typing import Optional, Tuple
+
+
+LEGACY_HOST_TEMP_PREFIX = "/root/video-flow/temp/"
+LEGACY_HOST_OUTPUT_PREFIX = "/root/video-flow/output/"
+
+# Container mount points (see docker-compose.yml)
+LEGACY_CONTAINER_TEMP_DIR = "/legacy/temp"
+LEGACY_CONTAINER_OUTPUT_DIR = "/legacy/output"
+
+LEGACY_STATIC_TEMP_PREFIX = "/static/legacy-temp/"
+LEGACY_STATIC_OUTPUT_PREFIX = "/static/legacy-output/"
+
+
+def map_legacy_local_path(local_path: Optional[str]) -> Tuple[Optional[str], Optional[str]]:
+    """
+    Returns: (container_visible_path, static_url)
+    - If local_path exists as-is, returns (local_path, None)
+    - If it's a legacy host path, rewrite to container mount and provide URL
+    - If unknown, returns (local_path, None)
+    """
+    if not local_path:
+        return None, None
+
+    # If container can see it already, keep
+    if os.path.exists(local_path):
+        return local_path, None
+
+    # Legacy host -> container mapping by basename
+    if local_path.startswith(LEGACY_HOST_TEMP_PREFIX):
+        name = Path(local_path).name
+        container_path = str(Path(LEGACY_CONTAINER_TEMP_DIR) / name)
+        url = f"{LEGACY_STATIC_TEMP_PREFIX}{name}"
+        return container_path, url
+
+    if local_path.startswith(LEGACY_HOST_OUTPUT_PREFIX):
+        name = Path(local_path).name
+        container_path = str(Path(LEGACY_CONTAINER_OUTPUT_DIR) / name)
+        url = f"{LEGACY_STATIC_OUTPUT_PREFIX}{name}"
+        return container_path, url
+
+    # Unknown path: keep as-is
+    return local_path, None
+
+
+
+
+
+
+
--- a/modules/limits.py
+++ b/modules/limits.py
@@ -0,0 +1,49 @@
+"""
+Process-wide concurrency limits for Streamlit single-process deployment.
+
+These limits reduce tail latency and avoid a single user saturating network/CPU
+and impacting other concurrent sessions.
+"""
+
+from __future__ import annotations
+
+import os
+import threading
+from contextlib import contextmanager
+from typing import Iterator
+
+
+def _env_int(name: str, default: int) -> int:
+    try:
+        return max(1, int(os.getenv(name, str(default))))
+    except Exception:
+        return default
+
+
+MAX_CONCURRENT_IMAGE = _env_int("MAX_CONCURRENT_IMAGE", 6)
+MAX_CONCURRENT_VIDEO = _env_int("MAX_CONCURRENT_VIDEO", 1)
+
+_image_sem = threading.BoundedSemaphore(MAX_CONCURRENT_IMAGE)
+_video_sem = threading.BoundedSemaphore(MAX_CONCURRENT_VIDEO)
+
+
+@contextmanager
+def acquire_image(blocking: bool = True) -> Iterator[bool]:
+    ok = _image_sem.acquire(blocking=blocking)
+    try:
+        yield ok
+    finally:
+        if ok:
+            _image_sem.release()
+
+
+@contextmanager
+def acquire_video(blocking: bool = True) -> Iterator[bool]:
+    ok = _video_sem.acquire(blocking=blocking)
+    try:
+        yield ok
+    finally:
+        if ok:
+            _video_sem.release()
+
+
--- a/modules/path_utils.py
+++ b/modules/path_utils.py
@@ -0,0 +1,93 @@
+"""
+Path utilities for cross-session / cross-project isolation.
+
+Goal:
+- Avoid file overwrites across concurrent users/projects by namespacing all temp artifacts
+  under temp/projects/{project_id}/...
+- Provide safe unique filename helpers.
+"""
+
+from __future__ import annotations
+
+import os
+import re
+import time
+import uuid
+from pathlib import Path
+from typing import Optional
+
+import config
+
+
+_SAFE_CHARS_RE = re.compile(r"[^A-Za-z0-9._-]+")
+
+
+def sanitize_filename(name: str) -> str:
+    """Keep only safe filename characters and strip path separators."""
+    if not isinstance(name, str):
+        return "file"
+    name = name.replace("\\", "_").replace("/", "_").strip()
+    name = _SAFE_CHARS_RE.sub("_", name)
+    return name or "file"
+
+
+def ensure_dir(path: Path) -> Path:
+    path.mkdir(parents=True, exist_ok=True)
+    return path
+
+
+def project_root(project_id: str) -> Path:
+    pid = sanitize_filename(project_id or "UNKNOWN")
+    return ensure_dir(config.TEMP_DIR / "projects" / pid)
+
+
+def project_upload_dir(project_id: str) -> Path:
+    return ensure_dir(project_root(project_id) / "uploads")
+
+
+def project_images_dir(project_id: str) -> Path:
+    return ensure_dir(project_root(project_id) / "images")
+
+
+def project_videos_dir(project_id: str) -> Path:
+    return ensure_dir(project_root(project_id) / "videos")
+
+
+def project_audio_dir(project_id: str) -> Path:
+    return ensure_dir(project_root(project_id) / "audio")
+
+
+def project_compose_dir(project_id: str, output_name: str) -> Path:
+    out = sanitize_filename(output_name or f"compose_{int(time.time())}")
+    return ensure_dir(project_root(project_id) / "compose" / out)
+
+
+def unique_filename(
+    prefix: str,
+    ext: str,
+    project_id: Optional[str] = None,
+    scene_id: Optional[int] = None,
+    extra: Optional[str] = None,
+) -> str:
+    """
+    Build a unique filename.
+    Example: scene_1_PROJ-xxx_173..._a1b2c3.mp4
+    """
+    pfx = sanitize_filename(prefix or "file")
+    e = (ext or "").lstrip(".") or "bin"
+    pid = sanitize_filename(project_id) if project_id else None
+    sid = str(int(scene_id)) if scene_id is not None else None
+    ex = sanitize_filename(extra) if extra else None
+    ts = str(int(time.time() * 1000))
+    rnd = uuid.uuid4().hex[:8]
+    parts = [pfx]
+    if sid:
+        parts.append(f"s{sid}")
+    if pid:
+        parts.append(pid)
+    if ex:
+        parts.append(ex)
+    parts.extend([ts, rnd])
+    return f"{'_'.join(parts)}.{e}"
+
+
--- a/modules/video_gen.py
+++ b/modules/video_gen.py
@@ -12,6 +12,7 @@ from pathlib import Path
 import config
 from modules import storage
 from modules.db_manager import db
+from modules import path_utils

 logger = logging.getLogger(__name__)

@@ -76,15 +77,7 @@ class VideoGenerator:
            logger.info(f"Recovering task {task_id}: status={status}")
            
            if status == "succeeded" and video_url:
-                downloaded_path = self._download_video(video_url, os.path.basename(output_path))
-                if downloaded_path:
-                    # 如果下载的文件名和目标路径不一致 (download_video 使用 filename 参数拼接到 TEMP_DIR)，
-                    # 需要移动或确认。 _download_video 返回完整路径。
-                    # 如果 output_path 是绝对路径且不同，则移动。
-                    if os.path.abspath(downloaded_path) != os.path.abspath(output_path):
-                         import shutil
-                         shutil.move(downloaded_path, output_path)
-                    return True
+                return self._download_video_to(video_url, output_path)
            return False
        except Exception as e:
            logger.error(f"Failed to recover video task {task_id}: {e}")
@@ -144,7 +137,15 @@ class VideoGenerator:
                if status == "succeeded":
                    logger.info(f"Scene {scene_id} video generated successfully")
                    # 下载视频
-                    video_path = self._download_video(result_url, f"scene_{scene_id}_video.mp4")
+                    out_dir = path_utils.project_videos_dir(project_id) if project_id else config.TEMP_DIR
+                    fname = path_utils.unique_filename(
+                        prefix="scene_video",
+                        ext="mp4",
+                        project_id=project_id,
+                        scene_id=scene_id,
+                        extra=(task_id[-8:] if isinstance(task_id, str) else None),
+                    )
+                    video_path = self._download_video(result_url, fname, output_dir=out_dir)
                    if video_path:
                        generated_videos[scene_id] = video_path
                        # Update DB
@@ -235,13 +236,26 @@ class VideoGenerator:
            content_url = None
            
            if status == "succeeded":
-                if "content" in result:
-                    content = result["content"]
-                    if isinstance(content, list) and len(content) > 0:
-                         item = content[0]
-                         content_url = item.get("video_url") or item.get("url")
-                    elif isinstance(content, dict):
-                         content_url = content.get("video_url") or content.get("url")
+                # Try multiple known shapes for volcengine response
+                content = result.get("content")
+                # sometimes nested: data.content or data.result.content, etc.
+                if not content and isinstance(result.get("result"), dict):
+                    content = result["result"].get("content")
+
+                def _extract_url(obj):
+                    if isinstance(obj, dict):
+                        return obj.get("video_url") or obj.get("url")
+                    return None
+
+                if isinstance(content, list) and content:
+                    # pick the first item that has a usable url
+                    for item in content:
+                        u = _extract_url(item)
+                        if u:
+                            content_url = u
+                            break
+                elif isinstance(content, dict):
+                    content_url = _extract_url(content)
            
            return status, content_url
            
@@ -249,8 +263,26 @@ class VideoGenerator:
            logger.error(f"Check task failed: {e}")
            return "unknown", None

-    def _download_video(self, url: str, filename: str) -> str:
-        """下载视频到临时目录"""
+    def _download_video_to(self, url: str, output_path: str) -> bool:
+        """下载视频到指定路径（避免 TEMP_DIR 固定文件名导致覆盖）"""
+        if not url or not output_path:
+            return False
+        try:
+            out_p = Path(output_path)
+            out_p.parent.mkdir(parents=True, exist_ok=True)
+            response = requests.get(url, stream=True, timeout=60)
+            response.raise_for_status()
+            with open(out_p, "wb") as f:
+                for chunk in response.iter_content(chunk_size=8192):
+                    if chunk:
+                        f.write(chunk)
+            return True
+        except Exception as e:
+            logger.error(f"Download video failed: {e}")
+            return False
+
+    def _download_video(self, url: str, filename: str, output_dir: Optional[Path] = None) -> str:
+        """下载视频到临时目录（默认使用 config.TEMP_DIR；可指定 output_dir 避免覆盖）"""
        if not url:
            return None
            
@@ -258,10 +290,13 @@ class VideoGenerator:
            response = requests.get(url, stream=True, timeout=60)
            response.raise_for_status()
            
-            output_path = config.TEMP_DIR / filename
+            out_dir = output_dir or config.TEMP_DIR
+            out_dir.mkdir(parents=True, exist_ok=True)
+            output_path = out_dir / filename
            with open(output_path, "wb") as f:
                for chunk in response.iter_content(chunk_size=8192):
-                    f.write(chunk)
+                    if chunk:
+                        f.write(chunk)
            
            return str(output_path)
        except Exception as e:
--- a/requirements.txt
+++ b/requirements.txt
@@ -21,9 +21,22 @@ imageio[ffmpeg]>=2.33.0
 Pillow>=10.0.0
 numpy>=1.24.0

-# Web UI
+# Web UI (Streamlit - 保留原有调试界面)
 streamlit>=1.29.0

+# FastAPI Backend (新增前后端分离)
+fastapi>=0.109.0
+uvicorn[standard]>=0.27.0
+python-multipart>=0.0.6
+
+# Task Queue (异步任务处理，支持水平扩展)
+celery[redis]>=5.3.0
+redis>=5.0.0
+
+# Database
+sqlalchemy>=2.0.0
+psycopg2-binary>=2.9.9
+
 # Config
 python-dotenv>=1.0.0
 PyYAML>=6.0.1