perf(8502): 并行生图(6并发)+超时重试;视频URL直连预览/下载;路径隔离

This commit is contained in:
Tony Zhang
2025-12-17 12:21:22 +08:00
parent ebcf165c3f
commit 1e210ffccf
12 changed files with 1168 additions and 201 deletions

452
app.py
View File

@@ -9,6 +9,8 @@ import os
import random import random
from pathlib import Path from pathlib import Path
import pandas as pd import pandas as pd
from time import perf_counter
from concurrent.futures import ThreadPoolExecutor, as_completed
# Import Backend Modules # Import Backend Modules
import config import config
@@ -19,6 +21,10 @@ from modules.composer import VideoComposer, VideoComposer as Composer # alias
from modules.text_renderer import renderer from modules.text_renderer import renderer
from modules import export_utils from modules import export_utils
from modules.db_manager import db from modules.db_manager import db
from modules import path_utils
from modules import limits
from modules.legacy_path_mapper import map_legacy_local_path
from modules.legacy_normalizer import normalize_legacy_project
# Page Config # Page Config
st.set_page_config( st.set_page_config(
@@ -138,6 +144,32 @@ def load_project(project_id):
st.session_state.script_data = data.get("script_data") st.session_state.script_data = data.get("script_data")
st.session_state.view_mode = "workspace" st.session_state.view_mode = "workspace"
# Fallback: 如果 DB 中的 script_data 是旧结构/缺字段,则从 legacy JSON 重新规范化一次
try:
script_data = st.session_state.script_data
legacy_json = Path(config.TEMP_DIR) / f"project_{project_id}.json"
def _needs_normalize(sd: Any) -> bool:
if not isinstance(sd, dict):
return True
if "_legacy_schema" not in sd:
return True
scenes = sd.get("scenes") or []
if scenes and isinstance(scenes, list) and isinstance(scenes[0], dict):
if "visual_prompt" not in scenes[0] or "video_prompt" not in scenes[0]:
return True
return False
if legacy_json.exists() and _needs_normalize(script_data):
raw = json.loads(legacy_json.read_text(encoding="utf-8"))
normalized = normalize_legacy_project(raw)
st.session_state.script_data = normalized
# 写回 DB避免每次 load 都重新算
db.update_project_script(project_id, normalized)
st.info("已从 legacy JSON 重新规范化脚本字段(兼容旧版项目)。")
except Exception as e:
st.warning(f"legacy 规范化失败(将继续使用 DB 数据): {e}")
# Restore product info for Step 1 display # Restore product info for Step 1 display
product_info = data.get("product_info", {}) product_info = data.get("product_info", {})
st.session_state.loaded_product_name = data.get("name", "") st.session_state.loaded_product_name = data.get("name", "")
@@ -161,13 +193,17 @@ def load_project(project_id):
for asset in assets: for asset in assets:
sid = asset["scene_id"] sid = asset["scene_id"]
source_path, _mapped_url = map_legacy_local_path(asset.get("local_path"))
# 假设 scene_id 0 或 -1 用于 final video # 假设 scene_id 0 或 -1 用于 final video
if asset["asset_type"] == "image" and asset["status"] == "completed": if asset["asset_type"] == "image" and asset["status"] == "completed":
images[sid] = asset["local_path"] if source_path:
images[sid] = source_path
elif asset["asset_type"] == "video" and asset["status"] == "completed": elif asset["asset_type"] == "video" and asset["status"] == "completed":
videos[sid] = asset["local_path"] if source_path:
videos[sid] = source_path
elif asset["asset_type"] == "final_video" and asset["status"] == "completed": elif asset["asset_type"] == "final_video" and asset["status"] == "completed":
final_vid = asset["local_path"] if source_path:
final_vid = source_path
st.session_state.scene_images = images st.session_state.scene_images = images
st.session_state.scene_videos = videos st.session_state.scene_videos = videos
@@ -233,6 +269,33 @@ with st.sidebar:
if st.session_state.project_id: if st.session_state.project_id:
st.caption(f"Current ID: {st.session_state.project_id}") st.caption(f"Current ID: {st.session_state.project_id}")
with st.expander("⏱️ 性能与诊断", expanded=False):
m = _get_metrics(st.session_state.project_id)
if not m:
st.caption("暂无指标(执行一次脚本/生图/生视频/合成后会出现)。")
else:
keys = [
"script_gen_s",
"image_gen_total_s",
"video_submit_s",
"video_recover_s",
"compose_s",
"script_model",
"image_provider",
"image_generated",
"video_submitted",
"video_recovered",
"bgm_used",
]
for k in keys:
if k in m:
st.caption(f"{k}: {m.get(k)}")
# 在线剪辑入口React Editor
web_base_url = os.getenv("WEB_BASE_URL", "http://localhost:3000").rstrip("/")
st.markdown(
f"[打开在线剪辑器]({web_base_url}/editor/{st.session_state.project_id})",
unsafe_allow_html=False,
)
st.markdown("---") st.markdown("---")
@@ -258,14 +321,48 @@ with st.sidebar:
# ============================================================ # ============================================================
# Helper Functions # Helper Functions
# ============================================================ # ============================================================
def save_uploaded_file(uploaded_file): def _record_metrics(project_id: str, patch: dict):
"""Save uploaded file to temp dir.""" """Persist lightweight timing/diagnostic metrics into project.product_info['_metrics']."""
if uploaded_file is not None: if not project_id or not isinstance(patch, dict) or not patch:
file_path = config.TEMP_DIR / uploaded_file.name return
try:
proj = db.get_project(project_id) or {}
product_info = proj.get("product_info") or {}
metrics = product_info.get("_metrics") if isinstance(product_info.get("_metrics"), dict) else {}
metrics.update(patch)
metrics["updated_at"] = time.time()
product_info["_metrics"] = metrics
db.update_project_product_info(project_id, product_info)
except Exception:
# metrics must never break UX
pass
def _get_metrics(project_id: str) -> dict:
try:
proj = db.get_project(project_id) or {}
product_info = proj.get("product_info") or {}
m = product_info.get("_metrics")
return m if isinstance(m, dict) else {}
except Exception:
return {}
def save_uploaded_file(project_id: str, uploaded_file):
"""Save uploaded file to per-project upload dir (avoid overwrites across projects)."""
if uploaded_file is None:
return None
if not project_id:
raise ValueError("project_id is required to save uploaded files safely")
upload_dir = path_utils.project_upload_dir(project_id)
original = path_utils.sanitize_filename(getattr(uploaded_file, "name", "upload"))
# keep original stem for readability, but ensure uniqueness
suffix = Path(original).suffix.lstrip(".") or "bin"
stem = Path(original).stem or "upload"
unique_name = path_utils.unique_filename(prefix=f"upload_{stem}", ext=suffix, project_id=project_id)
file_path = upload_dir / unique_name
with open(file_path, "wb") as f: with open(file_path, "wb") as f:
f.write(uploaded_file.getbuffer()) f.write(uploaded_file.getbuffer())
return str(file_path) return str(file_path)
return None
# ============================================================ # ============================================================
# Main Content: Workspace # Main Content: Workspace
@@ -318,11 +415,16 @@ if st.session_state.view_mode == "workspace":
# 允许在没有上传新图片但有历史图片的情况下继续 # 允许在没有上传新图片但有历史图片的情况下继续
can_submit = uploaded_files or st.session_state.uploaded_images can_submit = uploaded_files or st.session_state.uploaded_images
# Model Selection # Model Selection (all support images; user explicitly chooses model)
model_options = ["Gemini 3 Pro", "Doubao Pro (Vision)"] model_options = ["GPT-5.2", "Gemini 3 Pro", "Doubao Pro (Vision)"]
selected_model_label = st.radio("选择脚本生成模型", model_options, horizontal=True) selected_model_label = st.radio("选择脚本生成模型", model_options, horizontal=True, index=0)
# Map label to provider key # Map label to provider key
model_provider = "doubao" if "Doubao" in selected_model_label else "shubiaobiao" if selected_model_label == "GPT-5.2":
model_provider = "shubiaobiao_gpt"
elif "Doubao" in selected_model_label:
model_provider = "doubao"
else:
model_provider = "shubiaobiao"
if st.button("提交任务 & 生成脚本", type="primary", disabled=(not can_submit)): if st.button("提交任务 & 生成脚本", type="primary", disabled=(not can_submit)):
# 处理图片路径 # 处理图片路径
@@ -335,7 +437,7 @@ if st.session_state.view_mode == "workspace":
st.session_state.project_id = f"PROJ-{int(time.time())}" st.session_state.project_id = f"PROJ-{int(time.time())}"
for uf in uploaded_files: for uf in uploaded_files:
path = save_uploaded_file(uf) path = save_uploaded_file(st.session_state.project_id, uf)
if path: image_paths.append(path) if path: image_paths.append(path)
st.session_state.uploaded_images = image_paths st.session_state.uploaded_images = image_paths
@@ -352,7 +454,12 @@ if st.session_state.view_mode == "workspace":
# Call Script Generator # Call Script Generator
with st.spinner(f"正在分析商品信息并生成脚本 ({selected_model_label})..."): with st.spinner(f"正在分析商品信息并生成脚本 ({selected_model_label})..."):
gen = ScriptGenerator() gen = ScriptGenerator()
t0 = perf_counter()
script = gen.generate_script(product_name, product_info, image_paths, model_provider=model_provider) script = gen.generate_script(product_name, product_info, image_paths, model_provider=model_provider)
_record_metrics(st.session_state.project_id, {
"script_gen_s": round(perf_counter() - t0, 3),
"script_model": model_provider,
})
if script: if script:
st.session_state.script_data = script st.session_state.script_data = script
@@ -371,10 +478,39 @@ if st.session_state.view_mode == "workspace":
if st.session_state.script_data: if st.session_state.script_data:
script = st.session_state.script_data script = st.session_state.script_data
# Display Basic Info # Display Basic Info (兼容 legacy schema)
selling_points = script.get("selling_points", []) or []
target_audience = script.get("target_audience", "") or ""
analysis_text = script.get("analysis", "") or ""
legacy_schema = script.get("_legacy_schema", "") or ""
c1, c2 = st.columns(2) c1, c2 = st.columns(2)
c1.write(f"**核心卖点**: {', '.join(script.get('selling_points', []))}") if selling_points:
c2.write(f"**目标人群**: {script.get('target_audience', '')}") c1.write(f"**核心卖点**: {', '.join(selling_points)}")
else:
c1.write("**核心卖点**: (legacy 项目可能未生成该字段)")
if analysis_text:
with st.expander("查看 legacy analysis用于补齐信息"):
st.write(analysis_text)
if target_audience:
c2.write(f"**目标人群**: {target_audience}")
else:
c2.write("**目标人群**: (legacy 项目可能未生成该字段)")
# Hook / CTA / Schema
hook = script.get("hook", "") or ""
if hook:
st.markdown(f"**Hook**: {hook}")
cta = script.get("cta", "")
if cta:
if isinstance(cta, dict):
st.markdown("**CTAlegacy object**")
st.json(cta)
else:
st.markdown(f"**CTA**: {cta}")
if legacy_schema:
st.caption(f"Legacy Schema: {legacy_schema}")
# Prompt Visualization # Prompt Visualization
if "_debug" in script: if "_debug" in script:
@@ -397,8 +533,13 @@ if st.session_state.view_mode == "workspace":
# Global Voiceover Timeline (New) # Global Voiceover Timeline (New)
st.markdown("### 🎙️ 整体旁白与字幕时间轴") st.markdown("### 🎙️ 整体旁白与字幕时间轴")
with st.expander("编辑旁白时间轴 (Voiceover Timeline)", expanded=True): with st.expander("编辑旁白时间轴 (Voiceover Timeline)", expanded=True):
timeline = script.get("voiceover_timeline", []) timeline = script.get("voiceover_timeline", []) or []
if not timeline: if not timeline:
# 对于历史项目:如果没有 scenes 也没有 timeline不要强行塞“示例旁白”避免污染数据
if not scenes and analysis_text:
st.info("该历史项目暂无旁白时间轴(可能停留在分析/提问阶段)。")
timeline = []
else:
# Init with default if empty (使用秒) # Init with default if empty (使用秒)
timeline = [{"text": "示例旁白", "subtitle": "示例字幕", "start_time": 0.0, "duration": 3.0}] timeline = [{"text": "示例旁白", "subtitle": "示例字幕", "start_time": 0.0, "duration": 3.0}]
@@ -444,12 +585,41 @@ if st.session_state.view_mode == "workspace":
# 花字编辑保留 # 花字编辑保留
ft = scene.get("fancy_text", {}) ft = scene.get("fancy_text", {})
if isinstance(ft, dict): if isinstance(ft, dict):
new_ft_text = st.text_input(f"Fancy Text (Scene {scene['id']})", value=ft.get("text", ""), key=f"ft_{i}") new_ft_text = st.text_input(
f"Fancy Text (Scene {scene['id']})",
value=ft.get("text", ""),
key=f"ft_{i}",
)
# 兼容:旧数据可能没有 fancy_text 字段
if not isinstance(scene.get("fancy_text"), dict):
scene["fancy_text"] = {}
scene["fancy_text"]["text"] = new_ft_text scene["fancy_text"]["text"] = new_ft_text
# 旁白/字幕已移至上方整体时间轴,此处仅作展示或删除 # 旁白/字幕已移至上方整体时间轴,此处仅作展示或删除
st.caption("注:旁白与字幕已移至上方整体时间轴编辑") st.caption("注:旁白与字幕已移至上方整体时间轴编辑")
# Legacy 信息展示(只读,用于调试/对齐)
legacy_scene = scene.get("_legacy", {}) if isinstance(scene.get("_legacy", {}), dict) else {}
if legacy_scene:
with st.expander(f"Legacy 信息 (Scene {scene['id']})", expanded=False):
img_url = legacy_scene.get("image_url")
if img_url:
st.markdown(f"- image_url: `{img_url}`")
cam = legacy_scene.get("camera_movement")
if cam:
st.markdown(f"- camera_movement: {cam}")
vo = legacy_scene.get("voiceover")
if vo:
st.markdown(f"- voiceover: {vo}")
keyframe = legacy_scene.get("keyframe")
if keyframe:
st.markdown("- keyframe:")
st.json(keyframe)
rhythm = legacy_scene.get("rhythm")
if rhythm:
st.markdown("- rhythm:")
st.json(rhythm)
updated_scenes.append(scene) updated_scenes.append(scene)
st.divider() st.divider()
@@ -497,6 +667,10 @@ if st.session_state.view_mode == "workspace":
st.session_state.selected_img_provider = img_provider st.session_state.selected_img_provider = img_provider
if st.button("🚀 执行 AI 生图", type="primary"): if st.button("🚀 执行 AI 生图", type="primary"):
with limits.acquire_image(blocking=False) as ok:
if not ok:
st.warning("系统正在生成其他任务(生图并发已达上限),请稍后再试。")
st.stop()
img_gen = ImageGenerator() img_gen = ImageGenerator()
# Pass ALL uploaded images as reference # Pass ALL uploaded images as reference
base_imgs = st.session_state.uploaded_images if st.session_state.uploaded_images else [] base_imgs = st.session_state.uploaded_images if st.session_state.uploaded_images else []
@@ -514,11 +688,18 @@ if st.session_state.view_mode == "workspace":
# --- Group Generation Logic --- # --- Group Generation Logic ---
with st.spinner("正在进行 Doubao 组图生成 (Batch Group Generation)..."): with st.spinner("正在进行 Doubao 组图生成 (Batch Group Generation)..."):
try: try:
t0 = perf_counter()
results = img_gen.generate_group_images_doubao( results = img_gen.generate_group_images_doubao(
scenes=scenes, scenes=scenes,
reference_images=base_imgs, reference_images=base_imgs,
visual_anchor=visual_anchor visual_anchor=visual_anchor,
project_id=st.session_state.project_id
) )
_record_metrics(st.session_state.project_id, {
"image_gen_total_s": round(perf_counter() - t0, 3),
"image_provider": img_provider,
"image_generated": len(results),
})
for s_id, path in results.items(): for s_id, path in results.items():
st.session_state.scene_images[s_id] = path st.session_state.scene_images[s_id] = path
@@ -536,35 +717,53 @@ if st.session_state.view_mode == "workspace":
except Exception as e: except Exception as e:
st.error(f"组图生成失败: {e}") st.error(f"组图生成失败: {e}")
else: else:
# --- Sequential Logic --- # --- Parallel Logic (default): only merchant uploaded images as references ---
total_scenes = len(scenes) total_scenes = len(scenes)
progress_bar = st.progress(0) progress_bar = st.progress(0)
status_text = st.empty() status_text = st.empty()
current_refs = list(base_imgs) # Start with base images
try: try:
t0 = perf_counter()
# Parallel workers within a single run; global semaphore already acquired above.
max_workers = 6
futures = {}
with ThreadPoolExecutor(max_workers=max_workers) as ex:
for idx, scene in enumerate(scenes): for idx, scene in enumerate(scenes):
scene_id = scene["id"] scene_id = scene["id"]
status_text.text(f"正在生成 Scene {scene_id} ({idx+1}/{total_scenes}) using {selected_img_model}...") futures[ex.submit(
img_gen.generate_single_scene_image,
img_path = img_gen.generate_single_scene_image(
scene=scene, scene=scene,
original_image_path=current_refs, # Pass ALL accumulated images original_image_path=list(base_imgs), # ONLY merchant images
previous_image_path=None, previous_image_path=None,
model_provider=img_provider, model_provider=img_provider,
visual_anchor=visual_anchor visual_anchor=visual_anchor,
) project_id=st.session_state.project_id,
)] = (idx, scene_id)
done = 0
for fut in as_completed(futures):
idx, scene_id = futures[fut]
done += 1
status_text.text(f"已完成 {done}/{total_scenes}Scene {scene_id}")
try:
img_path = fut.result()
except Exception as e:
img_path = None
st.warning(f"Scene {scene_id} 生成失败:{e}")
if img_path: if img_path:
st.session_state.scene_images[scene_id] = img_path st.session_state.scene_images[scene_id] = img_path
current_refs.append(img_path) # Add newly generated image to references for next scene
db.save_asset(st.session_state.project_id, scene_id, "image", "completed", local_path=img_path) db.save_asset(st.session_state.project_id, scene_id, "image", "completed", local_path=img_path)
progress_bar.progress((idx + 1) / total_scenes) progress_bar.progress(done / total_scenes)
status_text.text("生图完成!") status_text.text("生图完成!")
st.success("生图完成!") st.success("生图完成!")
_record_metrics(st.session_state.project_id, {
"image_gen_total_s": round(perf_counter() - t0, 3),
"image_provider": img_provider,
"image_generated": len(st.session_state.scene_images),
})
# Update Status # Update Status
db.update_project_status(st.session_state.project_id, "images_generated") db.update_project_status(st.session_state.project_id, "images_generated")
time.sleep(1) time.sleep(1)
@@ -603,11 +802,8 @@ if st.session_state.view_mode == "workspace":
else: else:
with st.spinner(f"正在重绘 Scene {scene_id}..."): with st.spinner(f"正在重绘 Scene {scene_id}..."):
img_gen = ImageGenerator() img_gen = ImageGenerator()
# Use ALL uploaded images + previously generated images up to this point # Only merchant uploaded images as references (no chaining)
current_refs_for_regen = list(st.session_state.uploaded_images) current_refs_for_regen = list(st.session_state.uploaded_images)
for prev_s_id in range(1, scene_id):
if prev_s_id in st.session_state.scene_images:
current_refs_for_regen.append(st.session_state.scene_images[prev_s_id])
# Fallback to single mode for regen if group was used # Fallback to single mode for regen if group was used
provider = st.session_state.get("selected_img_provider", "shubiaobiao") provider = st.session_state.get("selected_img_provider", "shubiaobiao")
@@ -621,7 +817,8 @@ if st.session_state.view_mode == "workspace":
original_image_path=current_refs_for_regen, original_image_path=current_refs_for_regen,
previous_image_path=None, previous_image_path=None,
model_provider=provider, model_provider=provider,
visual_anchor=regen_visual_anchor visual_anchor=regen_visual_anchor,
project_id=st.session_state.project_id
) )
if new_path: if new_path:
st.session_state.scene_images[scene_id] = new_path st.session_state.scene_images[scene_id] = new_path
@@ -636,33 +833,119 @@ if st.session_state.view_mode == "workspace":
if st.session_state.current_step >= 3: if st.session_state.current_step >= 3:
with st.expander("🎥 4. 视频生成 (Volcengine I2V)", expanded=(st.session_state.current_step == 3)): with st.expander("🎥 4. 视频生成 (Volcengine I2V)", expanded=(st.session_state.current_step == 3)):
if not st.session_state.scene_videos: scenes = st.session_state.script_data.get("scenes", [])
if st.button("🎬 执行图生视频", type="primary"):
with st.spinner("正在生成视频 (耗时较长)..."):
vid_gen = VideoGenerator() vid_gen = VideoGenerator()
# Pass project_id
videos = vid_gen.generate_scene_videos( # Submit-only (non-blocking) to avoid freezing Streamlit under concurrency
st.session_state.project_id, if st.button("🎬 提交图生视频任务(非阻塞)", type="primary"):
st.session_state.script_data, with limits.acquire_video(blocking=False) as ok:
st.session_state.scene_images if not ok:
st.warning("系统正在处理其他视频任务(并发已达上限),请稍后再试。")
st.stop()
t0 = perf_counter()
submitted = 0
for scene in scenes:
scene_id = scene["id"]
image_path = st.session_state.scene_images.get(scene_id)
prompt = scene.get("video_prompt", "High quality video")
task_id = vid_gen.submit_scene_video_task(
st.session_state.project_id, scene_id, image_path, prompt
) )
if task_id:
if videos: submitted += 1
st.session_state.scene_videos = videos _record_metrics(st.session_state.project_id, {
for sid, path in videos.items(): "video_submit_s": round(perf_counter() - t0, 3),
db.save_asset(st.session_state.project_id, sid, "video", "completed", local_path=path) "video_submitted": submitted,
})
# Update Status if submitted:
db.update_project_status(st.session_state.project_id, "videos_generated") db.update_project_status(st.session_state.project_id, "videos_processing")
st.success("视频生成完成!") st.success(f"已提交 {submitted} 个分镜视频任务。可点击下方“刷新恢复”下载结果。")
time.sleep(0.5)
st.rerun() st.rerun()
else: else:
st.warning("部分或全部视频生成失败") st.warning("未提交任何任务(可能缺少图片或接口失败)。")
# Display Videos if st.button("🔄 刷新状态并恢复已完成任务", type="secondary"):
if st.session_state.scene_videos: with limits.acquire_video(blocking=False) as ok:
if not ok:
st.warning("系统正在处理其他视频任务(并发已达上限),请稍后再试。")
st.stop()
t0 = perf_counter()
updated = 0
for scene in scenes:
scene_id = scene["id"]
asset = db.get_asset(st.session_state.project_id, scene_id, "video")
if not asset or not asset.get("task_id"):
continue
# if already have local video, skip
existing = st.session_state.scene_videos.get(scene_id)
if existing and os.path.exists(existing):
continue
task_id = asset.get("task_id")
# Query volc status; store URL for direct preview (no server download)
status = None
url = None
# short retries for "succeeded but url missing"
for attempt in range(3):
status, url = vid_gen.check_task_status(task_id)
if status == "succeeded" and url:
break
time.sleep(0.5 * (2 ** attempt))
meta_patch = {"checked_at": time.time(), "volc_status": status}
if url:
meta_patch["video_url"] = url
db.update_asset_metadata(st.session_state.project_id, scene_id, "video", meta_patch)
updated += 1
_record_metrics(st.session_state.project_id, {
"video_recover_s": round(perf_counter() - t0, 3),
"video_recovered": updated,
})
if updated:
st.success(f"已刷新 {updated} 个分镜状态(成功的将以 URL 直连预览)。")
else:
st.info("暂无可恢复的视频(可能仍在排队/生成中)。")
time.sleep(0.5)
st.rerun()
if st.button("📥 准备合成素材(下载成功的视频到服务器)", type="secondary"):
with limits.acquire_video(blocking=False) as ok:
if not ok:
st.warning("系统正在处理其他视频任务(并发已达上限),请稍后再试。")
st.stop()
downloaded = 0
for scene in scenes:
scene_id = scene["id"]
existing = st.session_state.scene_videos.get(scene_id)
if existing and os.path.exists(existing):
continue
asset = db.get_asset(st.session_state.project_id, scene_id, "video")
meta = (asset or {}).get("metadata") or {}
video_url = meta.get("video_url")
if not video_url:
continue
out_name = path_utils.unique_filename(
prefix="scene_video",
ext="mp4",
project_id=st.session_state.project_id,
scene_id=scene_id,
)
target_path = str(path_utils.project_videos_dir(st.session_state.project_id) / out_name)
if vid_gen._download_video_to(video_url, target_path):
st.session_state.scene_videos[scene_id] = target_path
db.save_asset(st.session_state.project_id, scene_id, "video", "completed", local_path=target_path, task_id=(asset or {}).get("task_id"), metadata=meta)
downloaded += 1
if downloaded:
st.success(f"已下载 {downloaded} 段视频,可进入合成。")
else:
st.info("暂无可下载的视频(请先刷新状态获取 video_url")
time.sleep(0.5)
st.rerun()
# Display Videos (even when partially available)
if st.session_state.scene_videos or scenes:
cols = st.columns(4) cols = st.columns(4)
scenes = st.session_state.script_data.get("scenes", [])
for i, scene in enumerate(scenes): for i, scene in enumerate(scenes):
scene_id = scene["id"] scene_id = scene["id"]
@@ -676,25 +959,28 @@ if st.session_state.view_mode == "workspace":
if vid_path and os.path.exists(vid_path): if vid_path and os.path.exists(vid_path):
st.video(vid_path) st.video(vid_path)
else:
# Try URL preview from DB metadata
asset = db.get_asset(st.session_state.project_id, scene_id, "video")
meta = (asset or {}).get("metadata") or {}
video_url = meta.get("video_url")
if video_url:
st.caption("URL 直连预览(不经服务器落盘)")
st.video(video_url)
else: else:
st.warning("Video missing") st.warning("Video missing")
# --- Recovery Logic --- # --- Recovery Logic ---
asset = db.get_asset(st.session_state.project_id, scene_id, "video")
if asset and asset.get("task_id"): if asset and asset.get("task_id"):
task_id = asset.get("task_id") task_id = asset.get("task_id")
if st.button(f"🔍 找回视频 (Task {task_id[-6:]})", key=f"recov_{scene_id}"): if st.button(f"🔍 刷新URL (Task {task_id[-6:]})", key=f"recov_{scene_id}"):
with st.spinner("查询任务状态中..."): with st.spinner("查询任务状态中..."):
vid_gen = VideoGenerator() status, url = vid_gen.check_task_status(task_id)
output_filename = f"scene_{scene_id}_video.mp4" patch = {"checked_at": time.time(), "volc_status": status}
target_path = str(config.TEMP_DIR / output_filename) if url:
patch["video_url"] = url
if vid_gen.recover_video_from_task(task_id, target_path): db.update_asset_metadata(st.session_state.project_id, scene_id, "video", patch)
st.session_state.scene_videos[scene_id] = target_path st.success("已刷新任务状态。")
db.save_asset(st.session_state.project_id, scene_id, "video", "completed", local_path=target_path)
st.success("找回成功!")
st.rerun() st.rerun()
else:
st.error("找回失败")
# Per-scene regenerate button # Per-scene regenerate button
if st.button(f"🔄 重生 S{scene_id}", key=f"regen_vid_{scene_id}"): if st.button(f"🔄 重生 S{scene_id}", key=f"regen_vid_{scene_id}"):
@@ -769,6 +1055,13 @@ if st.session_state.view_mode == "workspace":
["None"] + bgm_names, ["None"] + bgm_names,
index=default_idx index=default_idx
) )
# 明确提示BGM 目录为空或选中 BGM 不存在时,本次将不含 BGM
if not bgm_names:
st.warning(f"BGM 目录为空:{bgm_dir}(本次合成将不含 BGM")
elif selected_bgm != "None":
candidate = config.ASSETS_DIR / "bgm" / selected_bgm
if not candidate.exists():
st.warning(f"所选 BGM 文件不存在:{candidate}(本次合成将不含 BGM")
with col_g2: with col_g2:
# Voice Select # Voice Select
selected_voice = st.selectbox("配音音色 (TTS)", [config.VOLC_TTS_DEFAULT_VOICE, "zh_female_meilinvyou_saturn_bigtts"]) selected_voice = st.selectbox("配音音色 (TTS)", [config.VOLC_TTS_DEFAULT_VOICE, "zh_female_meilinvyou_saturn_bigtts"])
@@ -812,7 +1105,9 @@ if st.session_state.view_mode == "workspace":
ft = scene.get("fancy_text", {}) ft = scene.get("fancy_text", {})
ft_text = ft.get("text", "") if isinstance(ft, dict) else "" ft_text = ft.get("text", "") if isinstance(ft, dict) else ""
new_ft = st.text_input(f"花字", value=ft_text, key=f"tune_ft_{i}") new_ft = st.text_input(f"花字", value=ft_text, key=f"tune_ft_{i}")
if isinstance(scene.get("fancy_text"), dict): # 兼容:旧数据可能没有 fancy_text 字段
if not isinstance(scene.get("fancy_text"), dict):
scene["fancy_text"] = {}
scene["fancy_text"]["text"] = new_ft scene["fancy_text"]["text"] = new_ft
updated_scenes.append(scene) updated_scenes.append(scene)
@@ -831,12 +1126,18 @@ if st.session_state.view_mode == "workspace":
# Save updated script first # Save updated script first
db.update_project_script(st.session_state.project_id, st.session_state.script_data) db.update_project_script(st.session_state.project_id, st.session_state.script_data)
t0 = perf_counter()
output_path = composer.compose_from_script( output_path = composer.compose_from_script(
script=st.session_state.script_data, script=st.session_state.script_data,
video_map=st.session_state.scene_videos, video_map=st.session_state.scene_videos,
bgm_path=bgm_path, bgm_path=bgm_path,
output_name=f"final_{st.session_state.project_id}_{int(time.time())}" # Unique name for history output_name=f"final_{st.session_state.project_id}_{int(time.time())}", # Unique name for history
project_id=st.session_state.project_id,
) )
_record_metrics(st.session_state.project_id, {
"compose_s": round(perf_counter() - t0, 3),
"bgm_used": bool(bgm_path and Path(bgm_path).exists()),
})
st.session_state.final_video = output_path st.session_state.final_video = output_path
db.save_asset(st.session_state.project_id, 0, "final_video", "completed", local_path=output_path) db.save_asset(st.session_state.project_id, 0, "final_video", "completed", local_path=output_path)
@@ -902,16 +1203,25 @@ if st.session_state.view_mode == "workspace":
# 智能匹配 BGM根据脚本 bgm_style 匹配 # 智能匹配 BGM根据脚本 bgm_style 匹配
bgm_style = st.session_state.script_data.get("bgm_style", "") bgm_style = st.session_state.script_data.get("bgm_style", "")
bgm_path = match_bgm_by_style(bgm_style, config.ASSETS_DIR / "bgm") bgm_path = match_bgm_by_style(bgm_style, config.ASSETS_DIR / "bgm")
if bgm_path and not Path(bgm_path).exists():
st.warning(f"推荐的 BGM 文件不存在:{bgm_path}(本次将不含 BGM")
bgm_path = None
try: try:
# 首次合成也加上时间戳 # 首次合成也加上时间戳
output_name = f"final_{st.session_state.project_id}_{int(time.time())}" output_name = f"final_{st.session_state.project_id}_{int(time.time())}"
t0 = perf_counter()
output_path = composer.compose_from_script( output_path = composer.compose_from_script(
script=st.session_state.script_data, script=st.session_state.script_data,
video_map=st.session_state.scene_videos, video_map=st.session_state.scene_videos,
bgm_path=bgm_path, bgm_path=bgm_path,
output_name=output_name output_name=output_name,
project_id=st.session_state.project_id,
) )
_record_metrics(st.session_state.project_id, {
"compose_s": round(perf_counter() - t0, 3),
"bgm_used": bool(bgm_path and Path(bgm_path).exists()),
})
st.session_state.final_video = output_path st.session_state.final_video = output_path
db.save_asset(st.session_state.project_id, 0, "final_video", "completed", local_path=output_path) db.save_asset(st.session_state.project_id, 0, "final_video", "completed", local_path=output_path)
db.update_project_status(st.session_state.project_id, "completed") db.update_project_status(st.session_state.project_id, "completed")

View File

@@ -11,6 +11,7 @@ from typing import Dict, Any, List, Optional, Union
import config import config
from modules import ffmpeg_utils, fancy_text, factory, storage from modules import ffmpeg_utils, fancy_text, factory, storage
from modules.text_renderer import renderer from modules.text_renderer import renderer
from modules import path_utils
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@@ -65,6 +66,7 @@ class VideoComposer:
bgm_path: str = None, bgm_path: str = None,
bgm_volume: float = 0.15, bgm_volume: float = 0.15,
output_name: str = None, output_name: str = None,
project_id: Optional[str] = None,
upload_to_r2: bool = False upload_to_r2: bool = False
) -> str: ) -> str:
""" """
@@ -89,25 +91,27 @@ class VideoComposer:
timestamp = int(time.time()) timestamp = int(time.time())
output_name = output_name or f"composed_{timestamp}" output_name = output_name or f"composed_{timestamp}"
# Per-project temp dir to avoid cross-project overwrites
temp_root = path_utils.project_compose_dir(project_id, output_name) if project_id else config.TEMP_DIR
logger.info(f"Starting composition: {len(video_paths)} videos") logger.info(f"Starting composition: {len(video_paths)} videos")
try: try:
# Step 1: 拼接视频 # Step 1: 拼接视频
merged_path = str(config.TEMP_DIR / f"{output_name}_merged.mp4") merged_path = str(Path(temp_root) / f"{output_name}_merged.mp4")
ffmpeg_utils.concat_videos(video_paths, merged_path, self.target_size) ffmpeg_utils.concat_videos(video_paths, merged_path, self.target_size)
self._add_temp(merged_path) self._add_temp(merged_path)
current_video = merged_path current_video = merged_path
# Step 1.1: 若无音轨,补一条静音底,避免后续滤镜找不到 0:a # Step 1.1: 若无音轨,补一条静音底,避免后续滤镜找不到 0:a
silent_path = str(config.TEMP_DIR / f"{output_name}_silent.mp4") silent_path = str(Path(temp_root) / f"{output_name}_silent.mp4")
ffmpeg_utils.add_silence_audio(current_video, silent_path) ffmpeg_utils.add_silence_audio(current_video, silent_path)
self._add_temp(silent_path) self._add_temp(silent_path)
current_video = silent_path current_video = silent_path
# Step 2: 添加字幕 (白字黑边,无底框,水平居中) # Step 2: 添加字幕 (白字黑边,无底框,水平居中)
if subtitles: if subtitles:
subtitled_path = str(config.TEMP_DIR / f"{output_name}_subtitled.mp4") subtitled_path = str(Path(temp_root) / f"{output_name}_subtitled.mp4")
subtitle_style = { subtitle_style = {
"font": ffmpeg_utils._get_font_path(), "font": ffmpeg_utils._get_font_path(),
"fontsize": 60, "fontsize": 60,
@@ -169,7 +173,7 @@ class VideoComposer:
"duration": ft.get("duration", 999) "duration": ft.get("duration", 999)
}) })
fancy_path = str(config.TEMP_DIR / f"{output_name}_fancy.mp4") fancy_path = str(Path(temp_root) / f"{output_name}_fancy.mp4")
ffmpeg_utils.overlay_multiple_images( ffmpeg_utils.overlay_multiple_images(
current_video, overlay_configs, fancy_path current_video, overlay_configs, fancy_path
) )
@@ -178,13 +182,15 @@ class VideoComposer:
# Step 4: 生成并混合旁白(火山 WS 优先,失败回退 Edge # Step 4: 生成并混合旁白(火山 WS 优先,失败回退 Edge
if voiceover_text: if voiceover_text:
vo_out = str(Path(temp_root) / f"{output_name}_vo_full.mp3")
vo_path = factory.generate_voiceover_volcengine( vo_path = factory.generate_voiceover_volcengine(
text=voiceover_text, text=voiceover_text,
voice_type=self.voice_type voice_type=self.voice_type,
output_path=vo_out,
) )
self._add_temp(vo_path) self._add_temp(vo_path)
voiced_path = str(config.TEMP_DIR / f"{output_name}_voiced.mp4") voiced_path = str(Path(temp_root) / f"{output_name}_voiced.mp4")
ffmpeg_utils.mix_audio( ffmpeg_utils.mix_audio(
current_video, vo_path, voiced_path, current_video, vo_path, voiced_path,
audio_volume=1.5, audio_volume=1.5,
@@ -195,12 +201,12 @@ class VideoComposer:
elif voiceover_segments: elif voiceover_segments:
current_video = self._add_segmented_voiceover( current_video = self._add_segmented_voiceover(
current_video, voiceover_segments, output_name current_video, voiceover_segments, output_name, Path(temp_root)
) )
# Step 5: 添加BGM淡入淡出若 duck 失败会自动退回低音量混合) # Step 5: 添加BGM淡入淡出若 duck 失败会自动退回低音量混合)
if bgm_path: if bgm_path:
bgm_output = str(config.TEMP_DIR / f"{output_name}_bgm.mp4") bgm_output = str(Path(temp_root) / f"{output_name}_bgm.mp4")
ffmpeg_utils.add_bgm( ffmpeg_utils.add_bgm(
current_video, bgm_path, bgm_output, current_video, bgm_path, bgm_output,
bgm_volume=bgm_volume, bgm_volume=bgm_volume,
@@ -237,7 +243,8 @@ class VideoComposer:
self, self,
video_path: str, video_path: str,
segments: List[Dict[str, Any]], segments: List[Dict[str, Any]],
output_name: str output_name: str,
temp_root: Path,
) -> str: ) -> str:
"""添加分段旁白""" """添加分段旁白"""
if not segments: if not segments:
@@ -254,7 +261,7 @@ class VideoComposer:
audio_path = factory.generate_voiceover_volcengine( audio_path = factory.generate_voiceover_volcengine(
text=text, text=text,
voice_type=voice, voice_type=voice,
output_path=str(config.TEMP_DIR / f"{output_name}_seg_{i}.mp3") output_path=str(temp_root / f"{output_name}_seg_{i}.mp3")
) )
if audio_path: if audio_path:
@@ -270,7 +277,7 @@ class VideoComposer:
# 依次混入音频 # 依次混入音频
current = video_path current = video_path
for i, af in enumerate(audio_files): for i, af in enumerate(audio_files):
output = str(config.TEMP_DIR / f"{output_name}_seg_mixed_{i}.mp4") output = str(temp_root / f"{output_name}_seg_mixed_{i}.mp4")
ffmpeg_utils.mix_audio( ffmpeg_utils.mix_audio(
current, af["path"], output, current, af["path"], output,
audio_volume=1.0, audio_volume=1.0,
@@ -287,7 +294,8 @@ class VideoComposer:
script: Dict[str, Any], script: Dict[str, Any],
video_map: Dict[int, str], video_map: Dict[int, str],
bgm_path: str = None, bgm_path: str = None,
output_name: str = None output_name: str = None,
project_id: Optional[str] = None,
) -> str: ) -> str:
""" """
基于生成脚本和视频映射进行合成 基于生成脚本和视频映射进行合成
@@ -340,13 +348,30 @@ class VideoComposer:
# 无 background不加底框 # 无 background不加底框
} }
# 让花字时长默认跟随镜头(不改 prompt仅纠正过短/缺失 duration
start_in_scene = float(ft.get("start_time", 0) or 0.0)
if start_in_scene < 0:
start_in_scene = 0.0
if start_in_scene >= duration:
start_in_scene = 0.0
ft_dur = ft.get("duration", None)
try:
ft_dur_val = float(ft_dur) if ft_dur is not None else None
except Exception:
ft_dur_val = None
# If too short, extend to scene end
if ft_dur_val is None or ft_dur_val < 1.5:
ft_dur_val = max(duration - start_in_scene, 1.5)
# Clamp within scene duration
ft_dur_val = max(0.5, min(ft_dur_val, duration))
fancy_texts.append({ fancy_texts.append({
"text": text, "text": text,
"style": fixed_style, "style": fixed_style,
"x": "(W-w)/2", # 居中 "x": "(W-w)/2", # 居中
"y": "180", # 上半区域 "y": "180", # 上半区域
"start": total_duration + float(ft.get("start_time", 0)), "start": total_duration + start_in_scene,
"duration": float(ft.get("duration", duration)) "duration": ft_dur_val
}) })
total_duration += duration total_duration += duration
@@ -354,15 +379,16 @@ class VideoComposer:
# 2. 拼接视频 # 2. 拼接视频
timestamp = int(time.time()) timestamp = int(time.time())
output_name = output_name or f"composed_{timestamp}" output_name = output_name or f"composed_{timestamp}"
temp_root = path_utils.project_compose_dir(project_id, output_name) if project_id else config.TEMP_DIR
merged_path = str(config.TEMP_DIR / f"{output_name}_merged.mp4") merged_path = str(Path(temp_root) / f"{output_name}_merged.mp4")
ffmpeg_utils.concat_videos(video_paths, merged_path, self.target_size) ffmpeg_utils.concat_videos(video_paths, merged_path, self.target_size)
self._add_temp(merged_path) self._add_temp(merged_path)
current_video = merged_path current_video = merged_path
# 3. 处理整体旁白时间轴 (New Logic) # 3. 处理整体旁白时间轴 (New Logic)
voiceover_timeline = script.get("voiceover_timeline", []) voiceover_timeline = script.get("voiceover_timeline", [])
mixed_audio_path = str(config.TEMP_DIR / f"{output_name}_mixed_vo.mp3") mixed_audio_path = str(Path(temp_root) / f"{output_name}_mixed_vo.mp3")
# 初始化静音底轨 (长度为 total_duration) # 初始化静音底轨 (长度为 total_duration)
ffmpeg_utils._run_ffmpeg([ ffmpeg_utils._run_ffmpeg([
@@ -401,17 +427,17 @@ class VideoComposer:
tts_path = factory.generate_voiceover_volcengine( tts_path = factory.generate_voiceover_volcengine(
text=text, text=text,
voice_type=self.voice_type, voice_type=self.voice_type,
output_path=str(config.TEMP_DIR / f"{output_name}_vo_{i}.mp3") output_path=str(Path(temp_root) / f"{output_name}_vo_{i}.mp3")
) )
self._add_temp(tts_path) self._add_temp(tts_path)
# 调整时长 # 调整时长
adjusted_path = str(config.TEMP_DIR / f"{output_name}_vo_adj_{i}.mp3") adjusted_path = str(Path(temp_root) / f"{output_name}_vo_adj_{i}.mp3")
ffmpeg_utils.adjust_audio_duration(tts_path, target_duration, adjusted_path) ffmpeg_utils.adjust_audio_duration(tts_path, target_duration, adjusted_path)
self._add_temp(adjusted_path) self._add_temp(adjusted_path)
# 混合到总音轨 # 混合到总音轨
new_mixed = str(config.TEMP_DIR / f"{output_name}_mixed_{i}.mp3") new_mixed = str(Path(temp_root) / f"{output_name}_mixed_{i}.mp3")
ffmpeg_utils.mix_audio_at_offset(mixed_audio_path, adjusted_path, target_start, new_mixed) ffmpeg_utils.mix_audio_at_offset(mixed_audio_path, adjusted_path, target_start, new_mixed)
mixed_audio_path = new_mixed # Update current mixed path mixed_audio_path = new_mixed # Update current mixed path
self._add_temp(new_mixed) self._add_temp(new_mixed)
@@ -425,7 +451,7 @@ class VideoComposer:
}) })
# 4. 将合成好的旁白混入视频 # 4. 将合成好的旁白混入视频
voiced_path = str(config.TEMP_DIR / f"{output_name}_voiced.mp4") voiced_path = str(Path(temp_root) / f"{output_name}_voiced.mp4")
ffmpeg_utils.mix_audio( ffmpeg_utils.mix_audio(
current_video, mixed_audio_path, voiced_path, current_video, mixed_audio_path, voiced_path,
audio_volume=1.5, audio_volume=1.5,
@@ -436,7 +462,7 @@ class VideoComposer:
# 5. 添加字幕 (使用新的 ffmpeg_utils.add_multiple_subtitles) # 5. 添加字幕 (使用新的 ffmpeg_utils.add_multiple_subtitles)
if subtitles: if subtitles:
subtitled_path = str(config.TEMP_DIR / f"{output_name}_subtitled.mp4") subtitled_path = str(Path(temp_root) / f"{output_name}_subtitled.mp4")
subtitle_style = { subtitle_style = {
"font": ffmpeg_utils._get_font_path(), "font": ffmpeg_utils._get_font_path(),
"fontsize": 60, "fontsize": 60,
@@ -455,7 +481,7 @@ class VideoComposer:
# 6. 添加花字 # 6. 添加花字
if fancy_texts: if fancy_texts:
fancy_path = str(config.TEMP_DIR / f"{output_name}_fancy.mp4") fancy_path = str(Path(temp_root) / f"{output_name}_fancy.mp4")
overlay_configs = [] overlay_configs = []
for ft in fancy_texts: for ft in fancy_texts:
@@ -477,7 +503,7 @@ class VideoComposer:
# 7. 添加 BGM # 7. 添加 BGM
if bgm_path: if bgm_path:
bgm_output = str(config.TEMP_DIR / f"{output_name}_bgm.mp4") bgm_output = str(Path(temp_root) / f"{output_name}_bgm.mp4")
ffmpeg_utils.add_bgm( ffmpeg_utils.add_bgm(
current_video, bgm_path, bgm_output, current_video, bgm_path, bgm_output,
bgm_volume=0.15 bgm_volume=0.15

View File

@@ -113,6 +113,25 @@ class DBManager:
finally: finally:
session.close() session.close()
def update_project_product_info(self, project_id: str, product_info: Dict[str, Any]):
"""
Update project.product_info JSON (read-write with Postgres shared DB).
Used to persist editor state without changing schema.
"""
session = self._get_session()
try:
project = session.query(Project).filter_by(id=project_id).first()
if project:
project.product_info = json.dumps(product_info, ensure_ascii=False)
project.updated_at = time.time()
session.commit()
except Exception as e:
session.rollback()
logger.error(f"Error updating product_info: {e}")
raise
finally:
session.close()
def update_project_status(self, project_id: str, status: str): def update_project_status(self, project_id: str, status: str):
session = self._get_session() session = self._get_session()
try: try:
@@ -260,6 +279,35 @@ class DBManager:
finally: finally:
session.close() session.close()
def update_asset_metadata(self, project_id: str, scene_id: int, asset_type: str, patch: Dict[str, Any]) -> None:
"""Merge-patch asset.metadata JSON without overwriting other fields."""
if not patch:
return
session = self._get_session()
try:
asset = session.query(SceneAsset).filter_by(
project_id=project_id,
scene_id=scene_id,
asset_type=asset_type
).first()
if not asset:
return
try:
existing = json.loads(asset.metadata_json) if asset.metadata_json else {}
except Exception:
existing = {}
if not isinstance(existing, dict):
existing = {}
existing.update(patch)
asset.metadata_json = json.dumps(existing, ensure_ascii=False)
asset.updated_at = time.time()
session.commit()
except Exception as e:
session.rollback()
logger.error(f"Error updating asset metadata: {e}")
finally:
session.close()
# --- Config/Prompt Operations --- # --- Config/Prompt Operations ---
def get_config(self, key: str, default: Any = None) -> Any: def get_config(self, key: str, default: Any = None) -> Any:

View File

@@ -697,8 +697,12 @@ def generate_voiceover_volcengine_long(
# 生成每段音频 # 生成每段音频
chunk_files = [] chunk_files = []
# Keep temp artifacts near output_path when provided to avoid cross-project collisions
base_tmp_dir = Path(output_path).parent if output_path else config.TEMP_DIR
base_tmp_dir.mkdir(parents=True, exist_ok=True)
for i, chunk in enumerate(chunks): for i, chunk in enumerate(chunks):
chunk_path = str(config.TEMP_DIR / f"vo_chunk_{i}_{int(time.time())}.mp3") import uuid
chunk_path = str(base_tmp_dir / f"vo_chunk_{i}_{int(time.time() * 1000)}_{uuid.uuid4().hex[:8]}.mp3")
try: try:
path = generate_voiceover_volcengine( path = generate_voiceover_volcengine(
text=chunk, text=chunk,
@@ -723,13 +727,14 @@ def generate_voiceover_volcengine_long(
return chunk_files[0] return chunk_files[0]
# 创建合并文件列表 # 创建合并文件列表
concat_list = config.TEMP_DIR / f"concat_audio_{os.getpid()}.txt" import uuid
concat_list = base_tmp_dir / f"concat_audio_{int(time.time() * 1000)}_{uuid.uuid4().hex[:8]}.txt"
with open(concat_list, "w") as f: with open(concat_list, "w") as f:
for cf in chunk_files: for cf in chunk_files:
f.write(f"file '{cf}'\n") f.write(f"file '{cf}'\n")
if not output_path: if not output_path:
output_path = str(config.TEMP_DIR / f"vo_volc_merged_{int(time.time())}.mp3") output_path = str(base_tmp_dir / f"vo_volc_merged_{int(time.time() * 1000)}_{uuid.uuid4().hex[:8]}.mp3")
# FFmpeg 合并 # FFmpeg 合并
import subprocess import subprocess

View File

@@ -7,6 +7,7 @@ import re
import subprocess import subprocess
import tempfile import tempfile
import logging import logging
import shutil
from pathlib import Path from pathlib import Path
from typing import List, Dict, Any, Optional, Tuple from typing import List, Dict, Any, Optional, Tuple
@@ -14,9 +15,39 @@ import config
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
# FFmpeg/FFprobe 路径(优先使用项目内的二进制) def _pick_exec(preferred_path: str, fallback_name: str) -> str:
FFMPEG_PATH = str(config.BASE_DIR / "bin" / "ffmpeg") if (config.BASE_DIR / "bin" / "ffmpeg").exists() else "ffmpeg" """
FFPROBE_PATH = str(config.BASE_DIR / "bin" / "ffprobe") if (config.BASE_DIR / "bin" / "ffprobe").exists() else "ffprobe" Pick an executable path.
Why:
- In docker, /app/bin may accidentally contain binaries built for another OS/arch,
causing `Exec format error` at runtime (seen on /app/bin/ffprobe).
Strategy:
- Prefer preferred_path if it exists AND is runnable.
- Otherwise fall back to PATH-resolved command (fallback_name).
"""
if preferred_path and os.path.exists(preferred_path):
try:
# Validate it can be executed (arch OK) and is a real binary.
# ffmpeg/ffprobe both support `-version`.
result = subprocess.run(
[preferred_path, "-version"],
capture_output=True,
text=True,
)
if result.returncode == 0:
return preferred_path
except OSError:
# Exec format error / permission error -> fall back
pass
resolved = shutil.which(fallback_name)
return resolved or fallback_name
# FFmpeg/FFprobe 路径(优先使用项目内的二进制,但会做可执行性自检)
FFMPEG_PATH = _pick_exec(str(config.BASE_DIR / "bin" / "ffmpeg"), "ffmpeg")
FFPROBE_PATH = _pick_exec(str(config.BASE_DIR / "bin" / "ffprobe"), "ffprobe")
# 字体路径:优先使用项目内置字体,然后按平台回退到系统字体 # 字体路径:优先使用项目内置字体,然后按平台回退到系统字体
DEFAULT_FONT_PATHS = [ DEFAULT_FONT_PATHS = [
@@ -159,15 +190,6 @@ def concat_videos(
logger.info(f"Concatenating {len(video_paths)} videos...") logger.info(f"Concatenating {len(video_paths)} videos...")
# 创建 concat 文件列表
concat_file = config.TEMP_DIR / f"concat_{os.getpid()}.txt"
with open(concat_file, "w", encoding="utf-8") as f:
for vp in video_paths:
# 使用绝对路径并转义单引号
abs_path = os.path.abspath(vp)
f.write(f"file '{abs_path}'\n")
width, height = target_size width, height = target_size
# 使用 filter_complex 统一分辨率后拼接 # 使用 filter_complex 统一分辨率后拼接
@@ -203,10 +225,6 @@ def concat_videos(
_run_ffmpeg(cmd) _run_ffmpeg(cmd)
# 清理临时文件
if concat_file.exists():
concat_file.unlink()
logger.info(f"Concatenated video saved: {output_path}") logger.info(f"Concatenated video saved: {output_path}")
return output_path return output_path
@@ -825,10 +843,10 @@ def add_bgm(
bgm_volume: BGM音量 bgm_volume: BGM音量
loop: 是否循环BGM loop: 是否循环BGM
""" """
# 验证 BGM 文件存在 # 验证 BGM 文件存在(默认保持兼容:仍会输出视频,但会明确打日志)
if not bgm_path or not os.path.exists(bgm_path): if not bgm_path or not os.path.exists(bgm_path):
logger.error(f"BGM file not found: {bgm_path}") logger.error(f"BGM file not found (skip add_bgm): {bgm_path}")
# 直接复制原视频,不添加 BGM # 直接复制原视频,不添加 BGM(上层应当提示用户/写入 metadata
import shutil import shutil
shutil.copy(video_path, output_path) shutil.copy(video_path, output_path)
return output_path return output_path

View File

@@ -15,9 +15,52 @@ import io
from modules import storage from modules import storage
import config import config
from modules import path_utils
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
def _env_int(name: str, default: int) -> int:
try:
return int(os.getenv(name, str(default)))
except Exception:
return default
# Tunables: slow channels can be hot; default conservative but adjustable.
IMG_SUBMIT_TIMEOUT_S = _env_int("IMG_SUBMIT_TIMEOUT_S", 180)
IMG_POLL_TIMEOUT_S = _env_int("IMG_POLL_TIMEOUT_S", 30)
IMG_MAX_RETRIES = _env_int("IMG_MAX_RETRIES", 3)
IMG_POLL_INTERVAL_S = _env_int("IMG_POLL_INTERVAL_S", 2)
IMG_POLL_MAX_RETRIES = _env_int("IMG_POLL_MAX_RETRIES", 90) # 90*2s ~= 180s
def _is_retryable_exception(e: Exception) -> bool:
# Network / transient errors
if isinstance(e, (requests.Timeout, requests.ConnectionError)):
return True
msg = str(e).lower()
# Transient provider errors often contain these keywords
if any(k in msg for k in ["timeout", "temporarily", "temporarily unavailable", "gateway", "rate", "try again"]):
return True
return False
def _with_retries(fn, *, max_retries: int, label: str):
last = None
for attempt in range(1, max_retries + 1):
try:
return fn()
except Exception as e:
last = e
retryable = _is_retryable_exception(e)
logger.warning(f"[{label}] attempt {attempt}/{max_retries} failed: {e} (retryable={retryable})")
if not retryable or attempt >= max_retries:
raise
# small backoff
time.sleep(min(2 ** (attempt - 1), 4))
raise last # pragma: no cover
class ImageGenerator: class ImageGenerator:
"""连贯图片生成器 (Volcengine Provider)""" """连贯图片生成器 (Volcengine Provider)"""
@@ -51,7 +94,8 @@ class ImageGenerator:
original_image_path: Any, original_image_path: Any,
previous_image_path: Optional[str] = None, previous_image_path: Optional[str] = None,
model_provider: str = "shubiaobiao", # "shubiaobiao", "gemini", "doubao" model_provider: str = "shubiaobiao", # "shubiaobiao", "gemini", "doubao"
visual_anchor: str = "" # 视觉锚点,强制拼接到 prompt 前 visual_anchor: str = "", # 视觉锚点,强制拼接到 prompt 前
project_id: Optional[str] = None,
) -> Optional[str]: ) -> Optional[str]:
""" """
生成单张分镜图片 (Public) 生成单张分镜图片 (Public)
@@ -78,11 +122,19 @@ class ImageGenerator:
input_images.append(previous_image_path) input_images.append(previous_image_path)
try: try:
out_dir = path_utils.project_images_dir(project_id) if project_id else config.TEMP_DIR
out_name = path_utils.unique_filename(
prefix="scene_image",
ext="png",
project_id=project_id,
scene_id=scene_id,
)
output_path = self._generate_single_image( output_path = self._generate_single_image(
prompt=visual_prompt, prompt=visual_prompt,
reference_images=input_images, reference_images=input_images,
output_filename=f"scene_{scene_id}_{int(time.time())}.png", output_filename=out_name,
provider=model_provider provider=model_provider,
output_dir=out_dir,
) )
if output_path: if output_path:
@@ -101,7 +153,8 @@ class ImageGenerator:
self, self,
scenes: List[Dict[str, Any]], scenes: List[Dict[str, Any]],
reference_images: List[str], reference_images: List[str],
visual_anchor: str = "" # 视觉锚点 visual_anchor: str = "", # 视觉锚点
project_id: Optional[str] = None,
) -> Dict[int, str]: ) -> Dict[int, str]:
""" """
Doubao 组图生成 (Batch) - 拼接 Prompt 一次生成多张 Doubao 组图生成 (Batch) - 拼接 Prompt 一次生成多张
@@ -187,7 +240,15 @@ class ImageGenerator:
if image_url: if image_url:
# Download # Download
img_resp = requests.get(image_url, timeout=60) img_resp = requests.get(image_url, timeout=60)
output_path = config.TEMP_DIR / f"scene_{scene_id}_{int(time.time())}.png" out_dir = path_utils.project_images_dir(project_id) if project_id else config.TEMP_DIR
out_name = path_utils.unique_filename(
prefix="scene_image",
ext="png",
project_id=project_id,
scene_id=scene_id,
extra="group",
)
output_path = out_dir / out_name
with open(output_path, "wb") as f: with open(output_path, "wb") as f:
f.write(img_resp.content) f.write(img_resp.content)
results[scene_id] = str(output_path) results[scene_id] = str(output_path)
@@ -203,21 +264,24 @@ class ImageGenerator:
prompt: str, prompt: str,
reference_images: List[str], reference_images: List[str],
output_filename: str, output_filename: str,
provider: str = "shubiaobiao" provider: str = "shubiaobiao",
output_dir: Optional[Path] = None,
) -> Optional[str]: ) -> Optional[str]:
"""统一入口""" """统一入口"""
out_dir = output_dir or config.TEMP_DIR
if provider == "doubao": if provider == "doubao":
return self._generate_single_image_doubao(prompt, reference_images, output_filename) return self._generate_single_image_doubao(prompt, reference_images, output_filename, out_dir)
elif provider == "gemini": elif provider == "gemini":
return self._generate_single_image_gemini(prompt, reference_images, output_filename) return self._generate_single_image_gemini(prompt, reference_images, output_filename, out_dir)
else: else:
return self._generate_single_image_shubiao(prompt, reference_images, output_filename) return self._generate_single_image_shubiao(prompt, reference_images, output_filename, out_dir)
def _generate_single_image_doubao( def _generate_single_image_doubao(
self, self,
prompt: str, prompt: str,
reference_images: List[str], reference_images: List[str],
output_filename: str output_filename: str,
output_dir: Path
) -> Optional[str]: ) -> Optional[str]:
"""调用 Volcengine Doubao (Image API)""" """调用 Volcengine Doubao (Image API)"""
@@ -255,9 +319,9 @@ class ImageGenerator:
"Authorization": f"Bearer {config.VOLC_API_KEY}" "Authorization": f"Bearer {config.VOLC_API_KEY}"
} }
try: def _call():
logger.info(f"Submitting to Doubao Image: {self.endpoint}") logger.info(f"Submitting to Doubao Image: {self.endpoint}")
resp = requests.post(self.endpoint, json=payload, headers=headers, timeout=180) resp = requests.post(self.endpoint, json=payload, headers=headers, timeout=IMG_SUBMIT_TIMEOUT_S)
if resp.status_code != 200: if resp.status_code != 200:
msg = f"Doubao Image Failed ({resp.status_code}): {resp.text}" msg = f"Doubao Image Failed ({resp.status_code}): {resp.text}"
@@ -272,22 +336,20 @@ class ImageGenerator:
img_resp = requests.get(image_url, timeout=60) img_resp = requests.get(image_url, timeout=60)
img_resp.raise_for_status() img_resp.raise_for_status()
output_path = config.TEMP_DIR / output_filename output_path = output_dir / output_filename
with open(output_path, "wb") as f: with open(output_path, "wb") as f:
f.write(img_resp.content) f.write(img_resp.content)
return str(output_path) return str(output_path)
raise RuntimeError(f"No image URL in Doubao response: {data}") raise RuntimeError(f"No image URL in Doubao response: {data}")
return _with_retries(_call, max_retries=IMG_MAX_RETRIES, label="doubao_image")
except Exception as e:
logger.error(f"Doubao Gen Failed: {e}")
raise e
def _generate_single_image_shubiao( def _generate_single_image_shubiao(
self, self,
prompt: str, prompt: str,
reference_images: List[str], reference_images: List[str],
output_filename: str output_filename: str,
output_dir: Path
) -> Optional[str]: ) -> Optional[str]:
"""调用 api2img.shubiaobiao.com 通道生成图片(同步返回 base64""" """调用 api2img.shubiaobiao.com 通道生成图片(同步返回 base64"""
# 准备参考图,内联 base64 方式 # 准备参考图,内联 base64 方式
@@ -338,9 +400,9 @@ class ImageGenerator:
"Content-Type": "application/json" "Content-Type": "application/json"
} }
try: def _call():
logger.info(f"Submitting to Shubiaobiao Img: {endpoint}") logger.info(f"Submitting to Shubiaobiao Img: {endpoint}")
resp = requests.post(endpoint, json=payload, headers=headers, timeout=120) resp = requests.post(endpoint, json=payload, headers=headers, timeout=IMG_SUBMIT_TIMEOUT_S)
if resp.status_code != 200: if resp.status_code != 200:
msg = f"Shubiaobiao 提交失败 ({resp.status_code}): {resp.text}" msg = f"Shubiaobiao 提交失败 ({resp.status_code}): {resp.text}"
@@ -365,22 +427,20 @@ class ImageGenerator:
logger.error(msg) logger.error(msg)
raise RuntimeError(msg) raise RuntimeError(msg)
output_path = config.TEMP_DIR / output_filename output_path = output_dir / output_filename
with open(output_path, "wb") as f: with open(output_path, "wb") as f:
f.write(base64.b64decode(img_b64)) f.write(base64.b64decode(img_b64))
logger.info(f"Shubiaobiao Generation Success: {output_path}") logger.info(f"Shubiaobiao Generation Success: {output_path}")
return str(output_path) return str(output_path)
return _with_retries(_call, max_retries=IMG_MAX_RETRIES, label="shubiaobiao_image")
except Exception as e:
logger.error(f"Shubiaobiao Generation Exception: {e}")
raise
def _generate_single_image_gemini( def _generate_single_image_gemini(
self, self,
prompt: str, prompt: str,
reference_images: List[str], reference_images: List[str],
output_filename: str output_filename: str,
output_dir: Path
) -> Optional[str]: ) -> Optional[str]:
"""调用 Gemini (Wuyin Keji / NanoBanana-Pro) 生成单张图片""" """调用 Gemini (Wuyin Keji / NanoBanana-Pro) 生成单张图片"""
@@ -420,10 +480,10 @@ class ImageGenerator:
"Content-Type": "application/json;charset:utf-8" "Content-Type": "application/json;charset:utf-8"
} }
def _call():
# 2. 提交任务 # 2. 提交任务
try:
logger.info(f"Submitting to Gemini: {config.GEMINI_IMG_API_URL}") logger.info(f"Submitting to Gemini: {config.GEMINI_IMG_API_URL}")
resp = requests.post(config.GEMINI_IMG_API_URL, json=payload, headers=headers, timeout=30) resp = requests.post(config.GEMINI_IMG_API_URL, json=payload, headers=headers, timeout=IMG_SUBMIT_TIMEOUT_S)
if resp.status_code != 200: if resp.status_code != 200:
msg = f"Gemini 提交失败 ({resp.status_code}): {resp.text}" msg = f"Gemini 提交失败 ({resp.status_code}): {resp.text}"
@@ -443,13 +503,12 @@ class ImageGenerator:
logger.info(f"Gemini Task Submitted, ID: {task_id}") logger.info(f"Gemini Task Submitted, ID: {task_id}")
# 3. 轮询状态 # 3. 轮询状态
max_retries = 60 for _ in range(IMG_POLL_MAX_RETRIES):
for i in range(max_retries): time.sleep(IMG_POLL_INTERVAL_S)
time.sleep(2)
poll_url = f"{config.GEMINI_IMG_DETAIL_URL}?key={config.GEMINI_IMG_KEY}&id={task_id}" poll_url = f"{config.GEMINI_IMG_DETAIL_URL}?key={config.GEMINI_IMG_KEY}&id={task_id}"
try: try:
poll_resp = requests.get(poll_url, headers=headers, timeout=30) poll_resp = requests.get(poll_url, headers=headers, timeout=IMG_POLL_TIMEOUT_S)
except requests.Timeout: except requests.Timeout:
continue continue
except Exception as e: except Exception as e:
@@ -474,7 +533,7 @@ class ImageGenerator:
img_resp = requests.get(image_url, timeout=60) img_resp = requests.get(image_url, timeout=60)
img_resp.raise_for_status() img_resp.raise_for_status()
output_path = config.TEMP_DIR / output_filename output_path = output_dir / output_filename
with open(output_path, "wb") as f: with open(output_path, "wb") as f:
f.write(img_resp.content) f.write(img_resp.content)
@@ -485,7 +544,4 @@ class ImageGenerator:
raise RuntimeError(f"Gemini 生成失败: {fail_reason}") raise RuntimeError(f"Gemini 生成失败: {fail_reason}")
raise RuntimeError("Gemini 生成超时") raise RuntimeError("Gemini 生成超时")
return _with_retries(_call, max_retries=IMG_MAX_RETRIES, label="gemini_image")
except Exception as e:
logger.error(f"Gemini Generation Exception: {e}")
raise

View File

@@ -0,0 +1,248 @@
"""
Legacy project JSON normalizer.
Goal:
- Convert legacy project JSON (from /opt/gloda-factory/temp/project_*.json)
into the script_data schema expected by current Streamlit UI (`app.py`)
and composer (`modules/composer.py`).
Principles:
- Pure rule-based, no AI generation.
- Never drop legacy information: keep full raw doc under `script_data["_legacy"]`
and per-scene under `scene["_legacy"]`.
"""
from __future__ import annotations
from dataclasses import dataclass
from typing import Any, Dict, List, Optional, Tuple
def _as_str(v: Any) -> str:
return v if isinstance(v, str) else ""
def _as_dict(v: Any) -> Dict[str, Any]:
return v if isinstance(v, dict) else {}
def _as_list(v: Any) -> List[Any]:
return v if isinstance(v, list) else []
def _detect_schema_variant(doc: Dict[str, Any]) -> str:
scenes = _as_list(doc.get("scenes"))
if not scenes:
return "Unknown"
prompt_keys = {"image_prompt", "visual_prompt", "video_prompt"}
for s in scenes:
if isinstance(s, dict) and (set(s.keys()) & prompt_keys):
return "Schema_A"
typical_b = {"keyframe", "story_beat", "camera_movement", "image_url"}
for s in scenes:
if isinstance(s, dict) and (set(s.keys()) & typical_b):
return "Schema_B"
return "Unknown"
def _derive_visual_prompt_from_keyframe(scene: Dict[str, Any]) -> str:
"""
Build a readable prompt-like summary from keyframe + story_beat.
This is NOT an AI prompt; it's a structured description to avoid empty fields.
"""
keyframe = _as_dict(scene.get("keyframe") or scene.get("keyframes"))
story_beat = _as_str(scene.get("story_beat"))
parts: List[str] = []
if keyframe:
parts.append("[DerivedFromKeyframe]")
# deterministic ordering for readability
for k in sorted(keyframe.keys()):
v = keyframe.get(k)
if isinstance(v, (str, int, float)) and str(v).strip():
parts.append(f"{k}: {v}")
elif isinstance(v, dict) and v:
# flatten one level
sub = ", ".join(f"{sk}={sv}" for sk, sv in sorted(v.items()) if str(sv).strip())
if sub:
parts.append(f"{k}: {sub}")
if story_beat:
parts.append(f"story_beat: {story_beat}")
return "\n".join(parts).strip()
def _derive_video_prompt_from_motion(scene: Dict[str, Any]) -> str:
camera_movement = _as_str(scene.get("camera_movement"))
rhythm = scene.get("rhythm")
story_beat = _as_str(scene.get("story_beat"))
parts: List[str] = []
parts.append("[DerivedFromMotion]")
if camera_movement:
parts.append(f"camera_movement: {camera_movement}")
if isinstance(rhythm, dict) and rhythm:
# keep stable keys
sub = ", ".join(f"{k}={rhythm.get(k)}" for k in sorted(rhythm.keys()))
parts.append(f"rhythm: {sub}")
if story_beat:
parts.append(f"story_beat: {story_beat}")
return "\n".join(parts).strip()
def _normalize_fancy_text(scene: Dict[str, Any], default_duration: float) -> Dict[str, Any]:
ft = scene.get("fancy_text")
if isinstance(ft, dict):
# Ensure required keys exist
out = dict(ft)
out.setdefault("text", "")
out.setdefault("style", "highlight")
# support either position dict or string
if "position" not in out:
out["position"] = "center"
out.setdefault("start_time", 0.0)
out.setdefault("duration", default_duration)
return out
# legacy doesn't have fancy_text
return {
"text": "",
"style": "highlight",
"position": "center",
"start_time": 0.0,
"duration": default_duration,
}
def _build_voiceover_timeline_from_scenes(normalized_scenes: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
timeline: List[Dict[str, Any]] = []
t = 0.0
for idx, s in enumerate(normalized_scenes):
dur = float(s.get("duration") or 0) or 0.0
legacy = _as_dict(s.get("_legacy"))
vo = _as_str(legacy.get("voiceover") or s.get("voiceover") or "")
if vo.strip():
timeline.append(
{
"id": idx + 1,
"text": vo,
"subtitle": vo,
"start_time": t,
"duration": dur if dur > 0 else 3.0,
}
)
t += dur if dur > 0 else 0.0
return timeline
def normalize_legacy_project(doc: Dict[str, Any]) -> Dict[str, Any]:
"""
Return a script_data dict compatible with current UI.
"""
schema = _detect_schema_variant(doc)
scenes_in = _as_list(doc.get("scenes"))
normalized_scenes: List[Dict[str, Any]] = []
for s in scenes_in:
if not isinstance(s, dict):
continue
scene_id = int(s.get("id") or (len(normalized_scenes) + 1))
duration = float(s.get("duration") or 0) or 0.0
if duration <= 0:
duration = 3.0
# visual prompt
visual_prompt = ""
if schema == "Schema_A":
# legacy key is usually image_prompt
visual_prompt = _as_str(s.get("visual_prompt") or s.get("image_prompt") or "")
elif schema == "Schema_B":
visual_prompt = _derive_visual_prompt_from_keyframe(s)
else:
visual_prompt = _as_str(s.get("visual_prompt") or s.get("image_prompt") or "")
if not visual_prompt and s.get("keyframe"):
visual_prompt = _derive_visual_prompt_from_keyframe(s)
# video prompt
video_prompt = _as_str(s.get("video_prompt") or "")
if not video_prompt:
video_prompt = _derive_video_prompt_from_motion(s)
# fancy text (default safe)
fancy_text = _normalize_fancy_text(s, default_duration=duration)
normalized_scene: Dict[str, Any] = {
"id": scene_id,
"duration": duration,
"visual_prompt": visual_prompt,
"video_prompt": video_prompt,
"fancy_text": fancy_text,
# keep optional fields if present
"timeline": s.get("timeline", ""),
}
# Attach per-scene legacy snapshot (do not mutate the original)
normalized_scene["_legacy"] = {
"schema": schema,
"image_url": s.get("image_url"),
"keyframe": s.get("keyframe") or s.get("keyframes"),
"camera_movement": s.get("camera_movement"),
"story_beat": s.get("story_beat"),
"rhythm": s.get("rhythm"),
"sound_design": s.get("sound_design"),
"voiceover": s.get("voiceover"),
}
normalized_scenes.append(normalized_scene)
# voiceover timeline: normalize existing if present, else derive from scenes voiceover
vtl = doc.get("voiceover_timeline")
voiceover_timeline: List[Dict[str, Any]] = []
if isinstance(vtl, list) and vtl:
for idx, it in enumerate(vtl):
if not isinstance(it, dict):
continue
# unify field names
text = _as_str(it.get("text") or it.get("voiceover") or "")
subtitle = _as_str(it.get("subtitle") or text)
start_time = float(it.get("start_time") or 0.0)
duration = float(it.get("duration") or 3.0)
voiceover_timeline.append(
{
"id": int(it.get("id") or (idx + 1)),
"text": text,
"subtitle": subtitle,
"start_time": start_time,
"duration": duration,
}
)
else:
voiceover_timeline = _build_voiceover_timeline_from_scenes(normalized_scenes)
# script_data expected by UI
script_data: Dict[str, Any] = {
"hook": doc.get("hook", ""),
"selling_points": doc.get("selling_points", []) or [],
"target_audience": doc.get("target_audience", "") or "",
"video_style": doc.get("video_style", "") or "",
"bgm_style": doc.get("bgm_style", "") or "",
"voiceover_timeline": voiceover_timeline,
"scenes": normalized_scenes,
"cta": doc.get("cta", ""),
# Keep analysis for UI fallback display
"analysis": doc.get("analysis", ""),
# Preserve original
"_legacy": doc,
"_legacy_schema": schema,
}
return script_data

View File

@@ -0,0 +1,66 @@
"""
Legacy path mapper for assets generated by the 8502 runtime (/root/video-flow).
Problem:
- Postgres `scene_assets.local_path` may contain paths like `/root/video-flow/temp/...`
which are not visible inside docker containers running 8503 stack.
Solution:
- Mount host directories into containers (e.g. /legacy/temp, /legacy/output)
- Map legacy host paths -> container paths, and produce static URLs accordingly.
"""
from __future__ import annotations
import os
from pathlib import Path
from typing import Optional, Tuple
LEGACY_HOST_TEMP_PREFIX = "/root/video-flow/temp/"
LEGACY_HOST_OUTPUT_PREFIX = "/root/video-flow/output/"
# Container mount points (see docker-compose.yml)
LEGACY_CONTAINER_TEMP_DIR = "/legacy/temp"
LEGACY_CONTAINER_OUTPUT_DIR = "/legacy/output"
LEGACY_STATIC_TEMP_PREFIX = "/static/legacy-temp/"
LEGACY_STATIC_OUTPUT_PREFIX = "/static/legacy-output/"
def map_legacy_local_path(local_path: Optional[str]) -> Tuple[Optional[str], Optional[str]]:
"""
Returns: (container_visible_path, static_url)
- If local_path exists as-is, returns (local_path, None)
- If it's a legacy host path, rewrite to container mount and provide URL
- If unknown, returns (local_path, None)
"""
if not local_path:
return None, None
# If container can see it already, keep
if os.path.exists(local_path):
return local_path, None
# Legacy host -> container mapping by basename
if local_path.startswith(LEGACY_HOST_TEMP_PREFIX):
name = Path(local_path).name
container_path = str(Path(LEGACY_CONTAINER_TEMP_DIR) / name)
url = f"{LEGACY_STATIC_TEMP_PREFIX}{name}"
return container_path, url
if local_path.startswith(LEGACY_HOST_OUTPUT_PREFIX):
name = Path(local_path).name
container_path = str(Path(LEGACY_CONTAINER_OUTPUT_DIR) / name)
url = f"{LEGACY_STATIC_OUTPUT_PREFIX}{name}"
return container_path, url
# Unknown path: keep as-is
return local_path, None

49
modules/limits.py Normal file
View File

@@ -0,0 +1,49 @@
"""
Process-wide concurrency limits for Streamlit single-process deployment.
These limits reduce tail latency and avoid a single user saturating network/CPU
and impacting other concurrent sessions.
"""
from __future__ import annotations
import os
import threading
from contextlib import contextmanager
from typing import Iterator
def _env_int(name: str, default: int) -> int:
try:
return max(1, int(os.getenv(name, str(default))))
except Exception:
return default
MAX_CONCURRENT_IMAGE = _env_int("MAX_CONCURRENT_IMAGE", 6)
MAX_CONCURRENT_VIDEO = _env_int("MAX_CONCURRENT_VIDEO", 1)
_image_sem = threading.BoundedSemaphore(MAX_CONCURRENT_IMAGE)
_video_sem = threading.BoundedSemaphore(MAX_CONCURRENT_VIDEO)
@contextmanager
def acquire_image(blocking: bool = True) -> Iterator[bool]:
ok = _image_sem.acquire(blocking=blocking)
try:
yield ok
finally:
if ok:
_image_sem.release()
@contextmanager
def acquire_video(blocking: bool = True) -> Iterator[bool]:
ok = _video_sem.acquire(blocking=blocking)
try:
yield ok
finally:
if ok:
_video_sem.release()

93
modules/path_utils.py Normal file
View File

@@ -0,0 +1,93 @@
"""
Path utilities for cross-session / cross-project isolation.
Goal:
- Avoid file overwrites across concurrent users/projects by namespacing all temp artifacts
under temp/projects/{project_id}/...
- Provide safe unique filename helpers.
"""
from __future__ import annotations
import os
import re
import time
import uuid
from pathlib import Path
from typing import Optional
import config
_SAFE_CHARS_RE = re.compile(r"[^A-Za-z0-9._-]+")
def sanitize_filename(name: str) -> str:
"""Keep only safe filename characters and strip path separators."""
if not isinstance(name, str):
return "file"
name = name.replace("\\", "_").replace("/", "_").strip()
name = _SAFE_CHARS_RE.sub("_", name)
return name or "file"
def ensure_dir(path: Path) -> Path:
path.mkdir(parents=True, exist_ok=True)
return path
def project_root(project_id: str) -> Path:
pid = sanitize_filename(project_id or "UNKNOWN")
return ensure_dir(config.TEMP_DIR / "projects" / pid)
def project_upload_dir(project_id: str) -> Path:
return ensure_dir(project_root(project_id) / "uploads")
def project_images_dir(project_id: str) -> Path:
return ensure_dir(project_root(project_id) / "images")
def project_videos_dir(project_id: str) -> Path:
return ensure_dir(project_root(project_id) / "videos")
def project_audio_dir(project_id: str) -> Path:
return ensure_dir(project_root(project_id) / "audio")
def project_compose_dir(project_id: str, output_name: str) -> Path:
out = sanitize_filename(output_name or f"compose_{int(time.time())}")
return ensure_dir(project_root(project_id) / "compose" / out)
def unique_filename(
prefix: str,
ext: str,
project_id: Optional[str] = None,
scene_id: Optional[int] = None,
extra: Optional[str] = None,
) -> str:
"""
Build a unique filename.
Example: scene_1_PROJ-xxx_173..._a1b2c3.mp4
"""
pfx = sanitize_filename(prefix or "file")
e = (ext or "").lstrip(".") or "bin"
pid = sanitize_filename(project_id) if project_id else None
sid = str(int(scene_id)) if scene_id is not None else None
ex = sanitize_filename(extra) if extra else None
ts = str(int(time.time() * 1000))
rnd = uuid.uuid4().hex[:8]
parts = [pfx]
if sid:
parts.append(f"s{sid}")
if pid:
parts.append(pid)
if ex:
parts.append(ex)
parts.extend([ts, rnd])
return f"{'_'.join(parts)}.{e}"

View File

@@ -12,6 +12,7 @@ from pathlib import Path
import config import config
from modules import storage from modules import storage
from modules.db_manager import db from modules.db_manager import db
from modules import path_utils
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@@ -76,15 +77,7 @@ class VideoGenerator:
logger.info(f"Recovering task {task_id}: status={status}") logger.info(f"Recovering task {task_id}: status={status}")
if status == "succeeded" and video_url: if status == "succeeded" and video_url:
downloaded_path = self._download_video(video_url, os.path.basename(output_path)) return self._download_video_to(video_url, output_path)
if downloaded_path:
# 如果下载的文件名和目标路径不一致 (download_video 使用 filename 参数拼接到 TEMP_DIR)
# 需要移动或确认。 _download_video 返回完整路径。
# 如果 output_path 是绝对路径且不同,则移动。
if os.path.abspath(downloaded_path) != os.path.abspath(output_path):
import shutil
shutil.move(downloaded_path, output_path)
return True
return False return False
except Exception as e: except Exception as e:
logger.error(f"Failed to recover video task {task_id}: {e}") logger.error(f"Failed to recover video task {task_id}: {e}")
@@ -144,7 +137,15 @@ class VideoGenerator:
if status == "succeeded": if status == "succeeded":
logger.info(f"Scene {scene_id} video generated successfully") logger.info(f"Scene {scene_id} video generated successfully")
# 下载视频 # 下载视频
video_path = self._download_video(result_url, f"scene_{scene_id}_video.mp4") out_dir = path_utils.project_videos_dir(project_id) if project_id else config.TEMP_DIR
fname = path_utils.unique_filename(
prefix="scene_video",
ext="mp4",
project_id=project_id,
scene_id=scene_id,
extra=(task_id[-8:] if isinstance(task_id, str) else None),
)
video_path = self._download_video(result_url, fname, output_dir=out_dir)
if video_path: if video_path:
generated_videos[scene_id] = video_path generated_videos[scene_id] = video_path
# Update DB # Update DB
@@ -235,13 +236,26 @@ class VideoGenerator:
content_url = None content_url = None
if status == "succeeded": if status == "succeeded":
if "content" in result: # Try multiple known shapes for volcengine response
content = result["content"] content = result.get("content")
if isinstance(content, list) and len(content) > 0: # sometimes nested: data.content or data.result.content, etc.
item = content[0] if not content and isinstance(result.get("result"), dict):
content_url = item.get("video_url") or item.get("url") content = result["result"].get("content")
def _extract_url(obj):
if isinstance(obj, dict):
return obj.get("video_url") or obj.get("url")
return None
if isinstance(content, list) and content:
# pick the first item that has a usable url
for item in content:
u = _extract_url(item)
if u:
content_url = u
break
elif isinstance(content, dict): elif isinstance(content, dict):
content_url = content.get("video_url") or content.get("url") content_url = _extract_url(content)
return status, content_url return status, content_url
@@ -249,8 +263,26 @@ class VideoGenerator:
logger.error(f"Check task failed: {e}") logger.error(f"Check task failed: {e}")
return "unknown", None return "unknown", None
def _download_video(self, url: str, filename: str) -> str: def _download_video_to(self, url: str, output_path: str) -> bool:
"""下载视频到临时目录""" """下载视频到指定路径(避免 TEMP_DIR 固定文件名导致覆盖)"""
if not url or not output_path:
return False
try:
out_p = Path(output_path)
out_p.parent.mkdir(parents=True, exist_ok=True)
response = requests.get(url, stream=True, timeout=60)
response.raise_for_status()
with open(out_p, "wb") as f:
for chunk in response.iter_content(chunk_size=8192):
if chunk:
f.write(chunk)
return True
except Exception as e:
logger.error(f"Download video failed: {e}")
return False
def _download_video(self, url: str, filename: str, output_dir: Optional[Path] = None) -> str:
"""下载视频到临时目录(默认使用 config.TEMP_DIR可指定 output_dir 避免覆盖)"""
if not url: if not url:
return None return None
@@ -258,9 +290,12 @@ class VideoGenerator:
response = requests.get(url, stream=True, timeout=60) response = requests.get(url, stream=True, timeout=60)
response.raise_for_status() response.raise_for_status()
output_path = config.TEMP_DIR / filename out_dir = output_dir or config.TEMP_DIR
out_dir.mkdir(parents=True, exist_ok=True)
output_path = out_dir / filename
with open(output_path, "wb") as f: with open(output_path, "wb") as f:
for chunk in response.iter_content(chunk_size=8192): for chunk in response.iter_content(chunk_size=8192):
if chunk:
f.write(chunk) f.write(chunk)
return str(output_path) return str(output_path)

View File

@@ -21,9 +21,22 @@ imageio[ffmpeg]>=2.33.0
Pillow>=10.0.0 Pillow>=10.0.0
numpy>=1.24.0 numpy>=1.24.0
# Web UI # Web UI (Streamlit - 保留原有调试界面)
streamlit>=1.29.0 streamlit>=1.29.0
# FastAPI Backend (新增前后端分离)
fastapi>=0.109.0
uvicorn[standard]>=0.27.0
python-multipart>=0.0.6
# Task Queue (异步任务处理,支持水平扩展)
celery[redis]>=5.3.0
redis>=5.0.0
# Database
sqlalchemy>=2.0.0
psycopg2-binary>=2.9.9
# Config # Config
python-dotenv>=1.0.0 python-dotenv>=1.0.0
PyYAML>=6.0.1 PyYAML>=6.0.1