perf(8502): 并行生图(6并发)+超时重试;视频URL直连预览/下载;路径隔离

This commit is contained in:
Tony Zhang
2025-12-17 12:21:22 +08:00
parent ebcf165c3f
commit 1e210ffccf
12 changed files with 1168 additions and 201 deletions

View File

@@ -11,6 +11,7 @@ from typing import Dict, Any, List, Optional, Union
import config
from modules import ffmpeg_utils, fancy_text, factory, storage
from modules.text_renderer import renderer
from modules import path_utils
logger = logging.getLogger(__name__)
@@ -65,6 +66,7 @@ class VideoComposer:
bgm_path: str = None,
bgm_volume: float = 0.15,
output_name: str = None,
project_id: Optional[str] = None,
upload_to_r2: bool = False
) -> str:
"""
@@ -89,25 +91,27 @@ class VideoComposer:
timestamp = int(time.time())
output_name = output_name or f"composed_{timestamp}"
# Per-project temp dir to avoid cross-project overwrites
temp_root = path_utils.project_compose_dir(project_id, output_name) if project_id else config.TEMP_DIR
logger.info(f"Starting composition: {len(video_paths)} videos")
try:
# Step 1: 拼接视频
merged_path = str(config.TEMP_DIR / f"{output_name}_merged.mp4")
merged_path = str(Path(temp_root) / f"{output_name}_merged.mp4")
ffmpeg_utils.concat_videos(video_paths, merged_path, self.target_size)
self._add_temp(merged_path)
current_video = merged_path
# Step 1.1: 若无音轨,补一条静音底,避免后续滤镜找不到 0:a
silent_path = str(config.TEMP_DIR / f"{output_name}_silent.mp4")
silent_path = str(Path(temp_root) / f"{output_name}_silent.mp4")
ffmpeg_utils.add_silence_audio(current_video, silent_path)
self._add_temp(silent_path)
current_video = silent_path
# Step 2: 添加字幕 (白字黑边,无底框,水平居中)
if subtitles:
subtitled_path = str(config.TEMP_DIR / f"{output_name}_subtitled.mp4")
subtitled_path = str(Path(temp_root) / f"{output_name}_subtitled.mp4")
subtitle_style = {
"font": ffmpeg_utils._get_font_path(),
"fontsize": 60,
@@ -169,7 +173,7 @@ class VideoComposer:
"duration": ft.get("duration", 999)
})
fancy_path = str(config.TEMP_DIR / f"{output_name}_fancy.mp4")
fancy_path = str(Path(temp_root) / f"{output_name}_fancy.mp4")
ffmpeg_utils.overlay_multiple_images(
current_video, overlay_configs, fancy_path
)
@@ -178,13 +182,15 @@ class VideoComposer:
# Step 4: 生成并混合旁白(火山 WS 优先,失败回退 Edge
if voiceover_text:
vo_out = str(Path(temp_root) / f"{output_name}_vo_full.mp3")
vo_path = factory.generate_voiceover_volcengine(
text=voiceover_text,
voice_type=self.voice_type
voice_type=self.voice_type,
output_path=vo_out,
)
self._add_temp(vo_path)
voiced_path = str(config.TEMP_DIR / f"{output_name}_voiced.mp4")
voiced_path = str(Path(temp_root) / f"{output_name}_voiced.mp4")
ffmpeg_utils.mix_audio(
current_video, vo_path, voiced_path,
audio_volume=1.5,
@@ -195,12 +201,12 @@ class VideoComposer:
elif voiceover_segments:
current_video = self._add_segmented_voiceover(
current_video, voiceover_segments, output_name
current_video, voiceover_segments, output_name, Path(temp_root)
)
# Step 5: 添加BGM淡入淡出若 duck 失败会自动退回低音量混合)
if bgm_path:
bgm_output = str(config.TEMP_DIR / f"{output_name}_bgm.mp4")
bgm_output = str(Path(temp_root) / f"{output_name}_bgm.mp4")
ffmpeg_utils.add_bgm(
current_video, bgm_path, bgm_output,
bgm_volume=bgm_volume,
@@ -237,7 +243,8 @@ class VideoComposer:
self,
video_path: str,
segments: List[Dict[str, Any]],
output_name: str
output_name: str,
temp_root: Path,
) -> str:
"""添加分段旁白"""
if not segments:
@@ -254,7 +261,7 @@ class VideoComposer:
audio_path = factory.generate_voiceover_volcengine(
text=text,
voice_type=voice,
output_path=str(config.TEMP_DIR / f"{output_name}_seg_{i}.mp3")
output_path=str(temp_root / f"{output_name}_seg_{i}.mp3")
)
if audio_path:
@@ -270,7 +277,7 @@ class VideoComposer:
# 依次混入音频
current = video_path
for i, af in enumerate(audio_files):
output = str(config.TEMP_DIR / f"{output_name}_seg_mixed_{i}.mp4")
output = str(temp_root / f"{output_name}_seg_mixed_{i}.mp4")
ffmpeg_utils.mix_audio(
current, af["path"], output,
audio_volume=1.0,
@@ -287,7 +294,8 @@ class VideoComposer:
script: Dict[str, Any],
video_map: Dict[int, str],
bgm_path: str = None,
output_name: str = None
output_name: str = None,
project_id: Optional[str] = None,
) -> str:
"""
基于生成脚本和视频映射进行合成
@@ -340,13 +348,30 @@ class VideoComposer:
# 无 background不加底框
}
# 让花字时长默认跟随镜头(不改 prompt仅纠正过短/缺失 duration
start_in_scene = float(ft.get("start_time", 0) or 0.0)
if start_in_scene < 0:
start_in_scene = 0.0
if start_in_scene >= duration:
start_in_scene = 0.0
ft_dur = ft.get("duration", None)
try:
ft_dur_val = float(ft_dur) if ft_dur is not None else None
except Exception:
ft_dur_val = None
# If too short, extend to scene end
if ft_dur_val is None or ft_dur_val < 1.5:
ft_dur_val = max(duration - start_in_scene, 1.5)
# Clamp within scene duration
ft_dur_val = max(0.5, min(ft_dur_val, duration))
fancy_texts.append({
"text": text,
"style": fixed_style,
"x": "(W-w)/2", # 居中
"y": "180", # 上半区域
"start": total_duration + float(ft.get("start_time", 0)),
"duration": float(ft.get("duration", duration))
"start": total_duration + start_in_scene,
"duration": ft_dur_val
})
total_duration += duration
@@ -354,15 +379,16 @@ class VideoComposer:
# 2. 拼接视频
timestamp = int(time.time())
output_name = output_name or f"composed_{timestamp}"
temp_root = path_utils.project_compose_dir(project_id, output_name) if project_id else config.TEMP_DIR
merged_path = str(config.TEMP_DIR / f"{output_name}_merged.mp4")
merged_path = str(Path(temp_root) / f"{output_name}_merged.mp4")
ffmpeg_utils.concat_videos(video_paths, merged_path, self.target_size)
self._add_temp(merged_path)
current_video = merged_path
# 3. 处理整体旁白时间轴 (New Logic)
voiceover_timeline = script.get("voiceover_timeline", [])
mixed_audio_path = str(config.TEMP_DIR / f"{output_name}_mixed_vo.mp3")
mixed_audio_path = str(Path(temp_root) / f"{output_name}_mixed_vo.mp3")
# 初始化静音底轨 (长度为 total_duration)
ffmpeg_utils._run_ffmpeg([
@@ -401,17 +427,17 @@ class VideoComposer:
tts_path = factory.generate_voiceover_volcengine(
text=text,
voice_type=self.voice_type,
output_path=str(config.TEMP_DIR / f"{output_name}_vo_{i}.mp3")
output_path=str(Path(temp_root) / f"{output_name}_vo_{i}.mp3")
)
self._add_temp(tts_path)
# 调整时长
adjusted_path = str(config.TEMP_DIR / f"{output_name}_vo_adj_{i}.mp3")
adjusted_path = str(Path(temp_root) / f"{output_name}_vo_adj_{i}.mp3")
ffmpeg_utils.adjust_audio_duration(tts_path, target_duration, adjusted_path)
self._add_temp(adjusted_path)
# 混合到总音轨
new_mixed = str(config.TEMP_DIR / f"{output_name}_mixed_{i}.mp3")
new_mixed = str(Path(temp_root) / f"{output_name}_mixed_{i}.mp3")
ffmpeg_utils.mix_audio_at_offset(mixed_audio_path, adjusted_path, target_start, new_mixed)
mixed_audio_path = new_mixed # Update current mixed path
self._add_temp(new_mixed)
@@ -425,7 +451,7 @@ class VideoComposer:
})
# 4. 将合成好的旁白混入视频
voiced_path = str(config.TEMP_DIR / f"{output_name}_voiced.mp4")
voiced_path = str(Path(temp_root) / f"{output_name}_voiced.mp4")
ffmpeg_utils.mix_audio(
current_video, mixed_audio_path, voiced_path,
audio_volume=1.5,
@@ -436,7 +462,7 @@ class VideoComposer:
# 5. 添加字幕 (使用新的 ffmpeg_utils.add_multiple_subtitles)
if subtitles:
subtitled_path = str(config.TEMP_DIR / f"{output_name}_subtitled.mp4")
subtitled_path = str(Path(temp_root) / f"{output_name}_subtitled.mp4")
subtitle_style = {
"font": ffmpeg_utils._get_font_path(),
"fontsize": 60,
@@ -455,7 +481,7 @@ class VideoComposer:
# 6. 添加花字
if fancy_texts:
fancy_path = str(config.TEMP_DIR / f"{output_name}_fancy.mp4")
fancy_path = str(Path(temp_root) / f"{output_name}_fancy.mp4")
overlay_configs = []
for ft in fancy_texts:
@@ -477,7 +503,7 @@ class VideoComposer:
# 7. 添加 BGM
if bgm_path:
bgm_output = str(config.TEMP_DIR / f"{output_name}_bgm.mp4")
bgm_output = str(Path(temp_root) / f"{output_name}_bgm.mp4")
ffmpeg_utils.add_bgm(
current_video, bgm_path, bgm_output,
bgm_volume=0.15

View File

@@ -113,6 +113,25 @@ class DBManager:
finally:
session.close()
def update_project_product_info(self, project_id: str, product_info: Dict[str, Any]):
"""
Update project.product_info JSON (read-write with Postgres shared DB).
Used to persist editor state without changing schema.
"""
session = self._get_session()
try:
project = session.query(Project).filter_by(id=project_id).first()
if project:
project.product_info = json.dumps(product_info, ensure_ascii=False)
project.updated_at = time.time()
session.commit()
except Exception as e:
session.rollback()
logger.error(f"Error updating product_info: {e}")
raise
finally:
session.close()
def update_project_status(self, project_id: str, status: str):
session = self._get_session()
try:
@@ -260,6 +279,35 @@ class DBManager:
finally:
session.close()
def update_asset_metadata(self, project_id: str, scene_id: int, asset_type: str, patch: Dict[str, Any]) -> None:
"""Merge-patch asset.metadata JSON without overwriting other fields."""
if not patch:
return
session = self._get_session()
try:
asset = session.query(SceneAsset).filter_by(
project_id=project_id,
scene_id=scene_id,
asset_type=asset_type
).first()
if not asset:
return
try:
existing = json.loads(asset.metadata_json) if asset.metadata_json else {}
except Exception:
existing = {}
if not isinstance(existing, dict):
existing = {}
existing.update(patch)
asset.metadata_json = json.dumps(existing, ensure_ascii=False)
asset.updated_at = time.time()
session.commit()
except Exception as e:
session.rollback()
logger.error(f"Error updating asset metadata: {e}")
finally:
session.close()
# --- Config/Prompt Operations ---
def get_config(self, key: str, default: Any = None) -> Any:

View File

@@ -697,8 +697,12 @@ def generate_voiceover_volcengine_long(
# 生成每段音频
chunk_files = []
# Keep temp artifacts near output_path when provided to avoid cross-project collisions
base_tmp_dir = Path(output_path).parent if output_path else config.TEMP_DIR
base_tmp_dir.mkdir(parents=True, exist_ok=True)
for i, chunk in enumerate(chunks):
chunk_path = str(config.TEMP_DIR / f"vo_chunk_{i}_{int(time.time())}.mp3")
import uuid
chunk_path = str(base_tmp_dir / f"vo_chunk_{i}_{int(time.time() * 1000)}_{uuid.uuid4().hex[:8]}.mp3")
try:
path = generate_voiceover_volcengine(
text=chunk,
@@ -723,13 +727,14 @@ def generate_voiceover_volcengine_long(
return chunk_files[0]
# 创建合并文件列表
concat_list = config.TEMP_DIR / f"concat_audio_{os.getpid()}.txt"
import uuid
concat_list = base_tmp_dir / f"concat_audio_{int(time.time() * 1000)}_{uuid.uuid4().hex[:8]}.txt"
with open(concat_list, "w") as f:
for cf in chunk_files:
f.write(f"file '{cf}'\n")
if not output_path:
output_path = str(config.TEMP_DIR / f"vo_volc_merged_{int(time.time())}.mp3")
output_path = str(base_tmp_dir / f"vo_volc_merged_{int(time.time() * 1000)}_{uuid.uuid4().hex[:8]}.mp3")
# FFmpeg 合并
import subprocess

View File

@@ -7,6 +7,7 @@ import re
import subprocess
import tempfile
import logging
import shutil
from pathlib import Path
from typing import List, Dict, Any, Optional, Tuple
@@ -14,9 +15,39 @@ import config
logger = logging.getLogger(__name__)
# FFmpeg/FFprobe 路径(优先使用项目内的二进制)
FFMPEG_PATH = str(config.BASE_DIR / "bin" / "ffmpeg") if (config.BASE_DIR / "bin" / "ffmpeg").exists() else "ffmpeg"
FFPROBE_PATH = str(config.BASE_DIR / "bin" / "ffprobe") if (config.BASE_DIR / "bin" / "ffprobe").exists() else "ffprobe"
def _pick_exec(preferred_path: str, fallback_name: str) -> str:
"""
Pick an executable path.
Why:
- In docker, /app/bin may accidentally contain binaries built for another OS/arch,
causing `Exec format error` at runtime (seen on /app/bin/ffprobe).
Strategy:
- Prefer preferred_path if it exists AND is runnable.
- Otherwise fall back to PATH-resolved command (fallback_name).
"""
if preferred_path and os.path.exists(preferred_path):
try:
# Validate it can be executed (arch OK) and is a real binary.
# ffmpeg/ffprobe both support `-version`.
result = subprocess.run(
[preferred_path, "-version"],
capture_output=True,
text=True,
)
if result.returncode == 0:
return preferred_path
except OSError:
# Exec format error / permission error -> fall back
pass
resolved = shutil.which(fallback_name)
return resolved or fallback_name
# FFmpeg/FFprobe 路径(优先使用项目内的二进制,但会做可执行性自检)
FFMPEG_PATH = _pick_exec(str(config.BASE_DIR / "bin" / "ffmpeg"), "ffmpeg")
FFPROBE_PATH = _pick_exec(str(config.BASE_DIR / "bin" / "ffprobe"), "ffprobe")
# 字体路径:优先使用项目内置字体,然后按平台回退到系统字体
DEFAULT_FONT_PATHS = [
@@ -159,15 +190,6 @@ def concat_videos(
logger.info(f"Concatenating {len(video_paths)} videos...")
# 创建 concat 文件列表
concat_file = config.TEMP_DIR / f"concat_{os.getpid()}.txt"
with open(concat_file, "w", encoding="utf-8") as f:
for vp in video_paths:
# 使用绝对路径并转义单引号
abs_path = os.path.abspath(vp)
f.write(f"file '{abs_path}'\n")
width, height = target_size
# 使用 filter_complex 统一分辨率后拼接
@@ -203,10 +225,6 @@ def concat_videos(
_run_ffmpeg(cmd)
# 清理临时文件
if concat_file.exists():
concat_file.unlink()
logger.info(f"Concatenated video saved: {output_path}")
return output_path
@@ -825,10 +843,10 @@ def add_bgm(
bgm_volume: BGM音量
loop: 是否循环BGM
"""
# 验证 BGM 文件存在
# 验证 BGM 文件存在(默认保持兼容:仍会输出视频,但会明确打日志)
if not bgm_path or not os.path.exists(bgm_path):
logger.error(f"BGM file not found: {bgm_path}")
# 直接复制原视频,不添加 BGM
logger.error(f"BGM file not found (skip add_bgm): {bgm_path}")
# 直接复制原视频,不添加 BGM(上层应当提示用户/写入 metadata
import shutil
shutil.copy(video_path, output_path)
return output_path

View File

@@ -15,9 +15,52 @@ import io
from modules import storage
import config
from modules import path_utils
logger = logging.getLogger(__name__)
def _env_int(name: str, default: int) -> int:
try:
return int(os.getenv(name, str(default)))
except Exception:
return default
# Tunables: slow channels can be hot; default conservative but adjustable.
IMG_SUBMIT_TIMEOUT_S = _env_int("IMG_SUBMIT_TIMEOUT_S", 180)
IMG_POLL_TIMEOUT_S = _env_int("IMG_POLL_TIMEOUT_S", 30)
IMG_MAX_RETRIES = _env_int("IMG_MAX_RETRIES", 3)
IMG_POLL_INTERVAL_S = _env_int("IMG_POLL_INTERVAL_S", 2)
IMG_POLL_MAX_RETRIES = _env_int("IMG_POLL_MAX_RETRIES", 90) # 90*2s ~= 180s
def _is_retryable_exception(e: Exception) -> bool:
# Network / transient errors
if isinstance(e, (requests.Timeout, requests.ConnectionError)):
return True
msg = str(e).lower()
# Transient provider errors often contain these keywords
if any(k in msg for k in ["timeout", "temporarily", "temporarily unavailable", "gateway", "rate", "try again"]):
return True
return False
def _with_retries(fn, *, max_retries: int, label: str):
last = None
for attempt in range(1, max_retries + 1):
try:
return fn()
except Exception as e:
last = e
retryable = _is_retryable_exception(e)
logger.warning(f"[{label}] attempt {attempt}/{max_retries} failed: {e} (retryable={retryable})")
if not retryable or attempt >= max_retries:
raise
# small backoff
time.sleep(min(2 ** (attempt - 1), 4))
raise last # pragma: no cover
class ImageGenerator:
"""连贯图片生成器 (Volcengine Provider)"""
@@ -51,7 +94,8 @@ class ImageGenerator:
original_image_path: Any,
previous_image_path: Optional[str] = None,
model_provider: str = "shubiaobiao", # "shubiaobiao", "gemini", "doubao"
visual_anchor: str = "" # 视觉锚点,强制拼接到 prompt 前
visual_anchor: str = "", # 视觉锚点,强制拼接到 prompt 前
project_id: Optional[str] = None,
) -> Optional[str]:
"""
生成单张分镜图片 (Public)
@@ -78,11 +122,19 @@ class ImageGenerator:
input_images.append(previous_image_path)
try:
out_dir = path_utils.project_images_dir(project_id) if project_id else config.TEMP_DIR
out_name = path_utils.unique_filename(
prefix="scene_image",
ext="png",
project_id=project_id,
scene_id=scene_id,
)
output_path = self._generate_single_image(
prompt=visual_prompt,
reference_images=input_images,
output_filename=f"scene_{scene_id}_{int(time.time())}.png",
provider=model_provider
output_filename=out_name,
provider=model_provider,
output_dir=out_dir,
)
if output_path:
@@ -101,7 +153,8 @@ class ImageGenerator:
self,
scenes: List[Dict[str, Any]],
reference_images: List[str],
visual_anchor: str = "" # 视觉锚点
visual_anchor: str = "", # 视觉锚点
project_id: Optional[str] = None,
) -> Dict[int, str]:
"""
Doubao 组图生成 (Batch) - 拼接 Prompt 一次生成多张
@@ -187,7 +240,15 @@ class ImageGenerator:
if image_url:
# Download
img_resp = requests.get(image_url, timeout=60)
output_path = config.TEMP_DIR / f"scene_{scene_id}_{int(time.time())}.png"
out_dir = path_utils.project_images_dir(project_id) if project_id else config.TEMP_DIR
out_name = path_utils.unique_filename(
prefix="scene_image",
ext="png",
project_id=project_id,
scene_id=scene_id,
extra="group",
)
output_path = out_dir / out_name
with open(output_path, "wb") as f:
f.write(img_resp.content)
results[scene_id] = str(output_path)
@@ -203,21 +264,24 @@ class ImageGenerator:
prompt: str,
reference_images: List[str],
output_filename: str,
provider: str = "shubiaobiao"
provider: str = "shubiaobiao",
output_dir: Optional[Path] = None,
) -> Optional[str]:
"""统一入口"""
out_dir = output_dir or config.TEMP_DIR
if provider == "doubao":
return self._generate_single_image_doubao(prompt, reference_images, output_filename)
return self._generate_single_image_doubao(prompt, reference_images, output_filename, out_dir)
elif provider == "gemini":
return self._generate_single_image_gemini(prompt, reference_images, output_filename)
return self._generate_single_image_gemini(prompt, reference_images, output_filename, out_dir)
else:
return self._generate_single_image_shubiao(prompt, reference_images, output_filename)
return self._generate_single_image_shubiao(prompt, reference_images, output_filename, out_dir)
def _generate_single_image_doubao(
self,
prompt: str,
reference_images: List[str],
output_filename: str
output_filename: str,
output_dir: Path
) -> Optional[str]:
"""调用 Volcengine Doubao (Image API)"""
@@ -255,9 +319,9 @@ class ImageGenerator:
"Authorization": f"Bearer {config.VOLC_API_KEY}"
}
try:
def _call():
logger.info(f"Submitting to Doubao Image: {self.endpoint}")
resp = requests.post(self.endpoint, json=payload, headers=headers, timeout=180)
resp = requests.post(self.endpoint, json=payload, headers=headers, timeout=IMG_SUBMIT_TIMEOUT_S)
if resp.status_code != 200:
msg = f"Doubao Image Failed ({resp.status_code}): {resp.text}"
@@ -272,22 +336,20 @@ class ImageGenerator:
img_resp = requests.get(image_url, timeout=60)
img_resp.raise_for_status()
output_path = config.TEMP_DIR / output_filename
output_path = output_dir / output_filename
with open(output_path, "wb") as f:
f.write(img_resp.content)
return str(output_path)
raise RuntimeError(f"No image URL in Doubao response: {data}")
except Exception as e:
logger.error(f"Doubao Gen Failed: {e}")
raise e
return _with_retries(_call, max_retries=IMG_MAX_RETRIES, label="doubao_image")
def _generate_single_image_shubiao(
self,
prompt: str,
reference_images: List[str],
output_filename: str
output_filename: str,
output_dir: Path
) -> Optional[str]:
"""调用 api2img.shubiaobiao.com 通道生成图片(同步返回 base64"""
# 准备参考图,内联 base64 方式
@@ -338,9 +400,9 @@ class ImageGenerator:
"Content-Type": "application/json"
}
try:
def _call():
logger.info(f"Submitting to Shubiaobiao Img: {endpoint}")
resp = requests.post(endpoint, json=payload, headers=headers, timeout=120)
resp = requests.post(endpoint, json=payload, headers=headers, timeout=IMG_SUBMIT_TIMEOUT_S)
if resp.status_code != 200:
msg = f"Shubiaobiao 提交失败 ({resp.status_code}): {resp.text}"
@@ -365,22 +427,20 @@ class ImageGenerator:
logger.error(msg)
raise RuntimeError(msg)
output_path = config.TEMP_DIR / output_filename
output_path = output_dir / output_filename
with open(output_path, "wb") as f:
f.write(base64.b64decode(img_b64))
logger.info(f"Shubiaobiao Generation Success: {output_path}")
return str(output_path)
except Exception as e:
logger.error(f"Shubiaobiao Generation Exception: {e}")
raise
return _with_retries(_call, max_retries=IMG_MAX_RETRIES, label="shubiaobiao_image")
def _generate_single_image_gemini(
self,
prompt: str,
reference_images: List[str],
output_filename: str
output_filename: str,
output_dir: Path
) -> Optional[str]:
"""调用 Gemini (Wuyin Keji / NanoBanana-Pro) 生成单张图片"""
@@ -420,10 +480,10 @@ class ImageGenerator:
"Content-Type": "application/json;charset:utf-8"
}
# 2. 提交任务
try:
def _call():
# 2. 提交任务
logger.info(f"Submitting to Gemini: {config.GEMINI_IMG_API_URL}")
resp = requests.post(config.GEMINI_IMG_API_URL, json=payload, headers=headers, timeout=30)
resp = requests.post(config.GEMINI_IMG_API_URL, json=payload, headers=headers, timeout=IMG_SUBMIT_TIMEOUT_S)
if resp.status_code != 200:
msg = f"Gemini 提交失败 ({resp.status_code}): {resp.text}"
@@ -443,13 +503,12 @@ class ImageGenerator:
logger.info(f"Gemini Task Submitted, ID: {task_id}")
# 3. 轮询状态
max_retries = 60
for i in range(max_retries):
time.sleep(2)
for _ in range(IMG_POLL_MAX_RETRIES):
time.sleep(IMG_POLL_INTERVAL_S)
poll_url = f"{config.GEMINI_IMG_DETAIL_URL}?key={config.GEMINI_IMG_KEY}&id={task_id}"
try:
poll_resp = requests.get(poll_url, headers=headers, timeout=30)
poll_resp = requests.get(poll_url, headers=headers, timeout=IMG_POLL_TIMEOUT_S)
except requests.Timeout:
continue
except Exception as e:
@@ -474,7 +533,7 @@ class ImageGenerator:
img_resp = requests.get(image_url, timeout=60)
img_resp.raise_for_status()
output_path = config.TEMP_DIR / output_filename
output_path = output_dir / output_filename
with open(output_path, "wb") as f:
f.write(img_resp.content)
@@ -485,7 +544,4 @@ class ImageGenerator:
raise RuntimeError(f"Gemini 生成失败: {fail_reason}")
raise RuntimeError("Gemini 生成超时")
except Exception as e:
logger.error(f"Gemini Generation Exception: {e}")
raise
return _with_retries(_call, max_retries=IMG_MAX_RETRIES, label="gemini_image")

View File

@@ -0,0 +1,248 @@
"""
Legacy project JSON normalizer.
Goal:
- Convert legacy project JSON (from /opt/gloda-factory/temp/project_*.json)
into the script_data schema expected by current Streamlit UI (`app.py`)
and composer (`modules/composer.py`).
Principles:
- Pure rule-based, no AI generation.
- Never drop legacy information: keep full raw doc under `script_data["_legacy"]`
and per-scene under `scene["_legacy"]`.
"""
from __future__ import annotations
from dataclasses import dataclass
from typing import Any, Dict, List, Optional, Tuple
def _as_str(v: Any) -> str:
return v if isinstance(v, str) else ""
def _as_dict(v: Any) -> Dict[str, Any]:
return v if isinstance(v, dict) else {}
def _as_list(v: Any) -> List[Any]:
return v if isinstance(v, list) else []
def _detect_schema_variant(doc: Dict[str, Any]) -> str:
scenes = _as_list(doc.get("scenes"))
if not scenes:
return "Unknown"
prompt_keys = {"image_prompt", "visual_prompt", "video_prompt"}
for s in scenes:
if isinstance(s, dict) and (set(s.keys()) & prompt_keys):
return "Schema_A"
typical_b = {"keyframe", "story_beat", "camera_movement", "image_url"}
for s in scenes:
if isinstance(s, dict) and (set(s.keys()) & typical_b):
return "Schema_B"
return "Unknown"
def _derive_visual_prompt_from_keyframe(scene: Dict[str, Any]) -> str:
"""
Build a readable prompt-like summary from keyframe + story_beat.
This is NOT an AI prompt; it's a structured description to avoid empty fields.
"""
keyframe = _as_dict(scene.get("keyframe") or scene.get("keyframes"))
story_beat = _as_str(scene.get("story_beat"))
parts: List[str] = []
if keyframe:
parts.append("[DerivedFromKeyframe]")
# deterministic ordering for readability
for k in sorted(keyframe.keys()):
v = keyframe.get(k)
if isinstance(v, (str, int, float)) and str(v).strip():
parts.append(f"{k}: {v}")
elif isinstance(v, dict) and v:
# flatten one level
sub = ", ".join(f"{sk}={sv}" for sk, sv in sorted(v.items()) if str(sv).strip())
if sub:
parts.append(f"{k}: {sub}")
if story_beat:
parts.append(f"story_beat: {story_beat}")
return "\n".join(parts).strip()
def _derive_video_prompt_from_motion(scene: Dict[str, Any]) -> str:
camera_movement = _as_str(scene.get("camera_movement"))
rhythm = scene.get("rhythm")
story_beat = _as_str(scene.get("story_beat"))
parts: List[str] = []
parts.append("[DerivedFromMotion]")
if camera_movement:
parts.append(f"camera_movement: {camera_movement}")
if isinstance(rhythm, dict) and rhythm:
# keep stable keys
sub = ", ".join(f"{k}={rhythm.get(k)}" for k in sorted(rhythm.keys()))
parts.append(f"rhythm: {sub}")
if story_beat:
parts.append(f"story_beat: {story_beat}")
return "\n".join(parts).strip()
def _normalize_fancy_text(scene: Dict[str, Any], default_duration: float) -> Dict[str, Any]:
ft = scene.get("fancy_text")
if isinstance(ft, dict):
# Ensure required keys exist
out = dict(ft)
out.setdefault("text", "")
out.setdefault("style", "highlight")
# support either position dict or string
if "position" not in out:
out["position"] = "center"
out.setdefault("start_time", 0.0)
out.setdefault("duration", default_duration)
return out
# legacy doesn't have fancy_text
return {
"text": "",
"style": "highlight",
"position": "center",
"start_time": 0.0,
"duration": default_duration,
}
def _build_voiceover_timeline_from_scenes(normalized_scenes: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
timeline: List[Dict[str, Any]] = []
t = 0.0
for idx, s in enumerate(normalized_scenes):
dur = float(s.get("duration") or 0) or 0.0
legacy = _as_dict(s.get("_legacy"))
vo = _as_str(legacy.get("voiceover") or s.get("voiceover") or "")
if vo.strip():
timeline.append(
{
"id": idx + 1,
"text": vo,
"subtitle": vo,
"start_time": t,
"duration": dur if dur > 0 else 3.0,
}
)
t += dur if dur > 0 else 0.0
return timeline
def normalize_legacy_project(doc: Dict[str, Any]) -> Dict[str, Any]:
"""
Return a script_data dict compatible with current UI.
"""
schema = _detect_schema_variant(doc)
scenes_in = _as_list(doc.get("scenes"))
normalized_scenes: List[Dict[str, Any]] = []
for s in scenes_in:
if not isinstance(s, dict):
continue
scene_id = int(s.get("id") or (len(normalized_scenes) + 1))
duration = float(s.get("duration") or 0) or 0.0
if duration <= 0:
duration = 3.0
# visual prompt
visual_prompt = ""
if schema == "Schema_A":
# legacy key is usually image_prompt
visual_prompt = _as_str(s.get("visual_prompt") or s.get("image_prompt") or "")
elif schema == "Schema_B":
visual_prompt = _derive_visual_prompt_from_keyframe(s)
else:
visual_prompt = _as_str(s.get("visual_prompt") or s.get("image_prompt") or "")
if not visual_prompt and s.get("keyframe"):
visual_prompt = _derive_visual_prompt_from_keyframe(s)
# video prompt
video_prompt = _as_str(s.get("video_prompt") or "")
if not video_prompt:
video_prompt = _derive_video_prompt_from_motion(s)
# fancy text (default safe)
fancy_text = _normalize_fancy_text(s, default_duration=duration)
normalized_scene: Dict[str, Any] = {
"id": scene_id,
"duration": duration,
"visual_prompt": visual_prompt,
"video_prompt": video_prompt,
"fancy_text": fancy_text,
# keep optional fields if present
"timeline": s.get("timeline", ""),
}
# Attach per-scene legacy snapshot (do not mutate the original)
normalized_scene["_legacy"] = {
"schema": schema,
"image_url": s.get("image_url"),
"keyframe": s.get("keyframe") or s.get("keyframes"),
"camera_movement": s.get("camera_movement"),
"story_beat": s.get("story_beat"),
"rhythm": s.get("rhythm"),
"sound_design": s.get("sound_design"),
"voiceover": s.get("voiceover"),
}
normalized_scenes.append(normalized_scene)
# voiceover timeline: normalize existing if present, else derive from scenes voiceover
vtl = doc.get("voiceover_timeline")
voiceover_timeline: List[Dict[str, Any]] = []
if isinstance(vtl, list) and vtl:
for idx, it in enumerate(vtl):
if not isinstance(it, dict):
continue
# unify field names
text = _as_str(it.get("text") or it.get("voiceover") or "")
subtitle = _as_str(it.get("subtitle") or text)
start_time = float(it.get("start_time") or 0.0)
duration = float(it.get("duration") or 3.0)
voiceover_timeline.append(
{
"id": int(it.get("id") or (idx + 1)),
"text": text,
"subtitle": subtitle,
"start_time": start_time,
"duration": duration,
}
)
else:
voiceover_timeline = _build_voiceover_timeline_from_scenes(normalized_scenes)
# script_data expected by UI
script_data: Dict[str, Any] = {
"hook": doc.get("hook", ""),
"selling_points": doc.get("selling_points", []) or [],
"target_audience": doc.get("target_audience", "") or "",
"video_style": doc.get("video_style", "") or "",
"bgm_style": doc.get("bgm_style", "") or "",
"voiceover_timeline": voiceover_timeline,
"scenes": normalized_scenes,
"cta": doc.get("cta", ""),
# Keep analysis for UI fallback display
"analysis": doc.get("analysis", ""),
# Preserve original
"_legacy": doc,
"_legacy_schema": schema,
}
return script_data

View File

@@ -0,0 +1,66 @@
"""
Legacy path mapper for assets generated by the 8502 runtime (/root/video-flow).
Problem:
- Postgres `scene_assets.local_path` may contain paths like `/root/video-flow/temp/...`
which are not visible inside docker containers running 8503 stack.
Solution:
- Mount host directories into containers (e.g. /legacy/temp, /legacy/output)
- Map legacy host paths -> container paths, and produce static URLs accordingly.
"""
from __future__ import annotations
import os
from pathlib import Path
from typing import Optional, Tuple
LEGACY_HOST_TEMP_PREFIX = "/root/video-flow/temp/"
LEGACY_HOST_OUTPUT_PREFIX = "/root/video-flow/output/"
# Container mount points (see docker-compose.yml)
LEGACY_CONTAINER_TEMP_DIR = "/legacy/temp"
LEGACY_CONTAINER_OUTPUT_DIR = "/legacy/output"
LEGACY_STATIC_TEMP_PREFIX = "/static/legacy-temp/"
LEGACY_STATIC_OUTPUT_PREFIX = "/static/legacy-output/"
def map_legacy_local_path(local_path: Optional[str]) -> Tuple[Optional[str], Optional[str]]:
"""
Returns: (container_visible_path, static_url)
- If local_path exists as-is, returns (local_path, None)
- If it's a legacy host path, rewrite to container mount and provide URL
- If unknown, returns (local_path, None)
"""
if not local_path:
return None, None
# If container can see it already, keep
if os.path.exists(local_path):
return local_path, None
# Legacy host -> container mapping by basename
if local_path.startswith(LEGACY_HOST_TEMP_PREFIX):
name = Path(local_path).name
container_path = str(Path(LEGACY_CONTAINER_TEMP_DIR) / name)
url = f"{LEGACY_STATIC_TEMP_PREFIX}{name}"
return container_path, url
if local_path.startswith(LEGACY_HOST_OUTPUT_PREFIX):
name = Path(local_path).name
container_path = str(Path(LEGACY_CONTAINER_OUTPUT_DIR) / name)
url = f"{LEGACY_STATIC_OUTPUT_PREFIX}{name}"
return container_path, url
# Unknown path: keep as-is
return local_path, None

49
modules/limits.py Normal file
View File

@@ -0,0 +1,49 @@
"""
Process-wide concurrency limits for Streamlit single-process deployment.
These limits reduce tail latency and avoid a single user saturating network/CPU
and impacting other concurrent sessions.
"""
from __future__ import annotations
import os
import threading
from contextlib import contextmanager
from typing import Iterator
def _env_int(name: str, default: int) -> int:
try:
return max(1, int(os.getenv(name, str(default))))
except Exception:
return default
MAX_CONCURRENT_IMAGE = _env_int("MAX_CONCURRENT_IMAGE", 6)
MAX_CONCURRENT_VIDEO = _env_int("MAX_CONCURRENT_VIDEO", 1)
_image_sem = threading.BoundedSemaphore(MAX_CONCURRENT_IMAGE)
_video_sem = threading.BoundedSemaphore(MAX_CONCURRENT_VIDEO)
@contextmanager
def acquire_image(blocking: bool = True) -> Iterator[bool]:
ok = _image_sem.acquire(blocking=blocking)
try:
yield ok
finally:
if ok:
_image_sem.release()
@contextmanager
def acquire_video(blocking: bool = True) -> Iterator[bool]:
ok = _video_sem.acquire(blocking=blocking)
try:
yield ok
finally:
if ok:
_video_sem.release()

93
modules/path_utils.py Normal file
View File

@@ -0,0 +1,93 @@
"""
Path utilities for cross-session / cross-project isolation.
Goal:
- Avoid file overwrites across concurrent users/projects by namespacing all temp artifacts
under temp/projects/{project_id}/...
- Provide safe unique filename helpers.
"""
from __future__ import annotations
import os
import re
import time
import uuid
from pathlib import Path
from typing import Optional
import config
_SAFE_CHARS_RE = re.compile(r"[^A-Za-z0-9._-]+")
def sanitize_filename(name: str) -> str:
"""Keep only safe filename characters and strip path separators."""
if not isinstance(name, str):
return "file"
name = name.replace("\\", "_").replace("/", "_").strip()
name = _SAFE_CHARS_RE.sub("_", name)
return name or "file"
def ensure_dir(path: Path) -> Path:
path.mkdir(parents=True, exist_ok=True)
return path
def project_root(project_id: str) -> Path:
pid = sanitize_filename(project_id or "UNKNOWN")
return ensure_dir(config.TEMP_DIR / "projects" / pid)
def project_upload_dir(project_id: str) -> Path:
return ensure_dir(project_root(project_id) / "uploads")
def project_images_dir(project_id: str) -> Path:
return ensure_dir(project_root(project_id) / "images")
def project_videos_dir(project_id: str) -> Path:
return ensure_dir(project_root(project_id) / "videos")
def project_audio_dir(project_id: str) -> Path:
return ensure_dir(project_root(project_id) / "audio")
def project_compose_dir(project_id: str, output_name: str) -> Path:
out = sanitize_filename(output_name or f"compose_{int(time.time())}")
return ensure_dir(project_root(project_id) / "compose" / out)
def unique_filename(
prefix: str,
ext: str,
project_id: Optional[str] = None,
scene_id: Optional[int] = None,
extra: Optional[str] = None,
) -> str:
"""
Build a unique filename.
Example: scene_1_PROJ-xxx_173..._a1b2c3.mp4
"""
pfx = sanitize_filename(prefix or "file")
e = (ext or "").lstrip(".") or "bin"
pid = sanitize_filename(project_id) if project_id else None
sid = str(int(scene_id)) if scene_id is not None else None
ex = sanitize_filename(extra) if extra else None
ts = str(int(time.time() * 1000))
rnd = uuid.uuid4().hex[:8]
parts = [pfx]
if sid:
parts.append(f"s{sid}")
if pid:
parts.append(pid)
if ex:
parts.append(ex)
parts.extend([ts, rnd])
return f"{'_'.join(parts)}.{e}"

View File

@@ -12,6 +12,7 @@ from pathlib import Path
import config
from modules import storage
from modules.db_manager import db
from modules import path_utils
logger = logging.getLogger(__name__)
@@ -76,15 +77,7 @@ class VideoGenerator:
logger.info(f"Recovering task {task_id}: status={status}")
if status == "succeeded" and video_url:
downloaded_path = self._download_video(video_url, os.path.basename(output_path))
if downloaded_path:
# 如果下载的文件名和目标路径不一致 (download_video 使用 filename 参数拼接到 TEMP_DIR)
# 需要移动或确认。 _download_video 返回完整路径。
# 如果 output_path 是绝对路径且不同,则移动。
if os.path.abspath(downloaded_path) != os.path.abspath(output_path):
import shutil
shutil.move(downloaded_path, output_path)
return True
return self._download_video_to(video_url, output_path)
return False
except Exception as e:
logger.error(f"Failed to recover video task {task_id}: {e}")
@@ -144,7 +137,15 @@ class VideoGenerator:
if status == "succeeded":
logger.info(f"Scene {scene_id} video generated successfully")
# 下载视频
video_path = self._download_video(result_url, f"scene_{scene_id}_video.mp4")
out_dir = path_utils.project_videos_dir(project_id) if project_id else config.TEMP_DIR
fname = path_utils.unique_filename(
prefix="scene_video",
ext="mp4",
project_id=project_id,
scene_id=scene_id,
extra=(task_id[-8:] if isinstance(task_id, str) else None),
)
video_path = self._download_video(result_url, fname, output_dir=out_dir)
if video_path:
generated_videos[scene_id] = video_path
# Update DB
@@ -235,13 +236,26 @@ class VideoGenerator:
content_url = None
if status == "succeeded":
if "content" in result:
content = result["content"]
if isinstance(content, list) and len(content) > 0:
item = content[0]
content_url = item.get("video_url") or item.get("url")
elif isinstance(content, dict):
content_url = content.get("video_url") or content.get("url")
# Try multiple known shapes for volcengine response
content = result.get("content")
# sometimes nested: data.content or data.result.content, etc.
if not content and isinstance(result.get("result"), dict):
content = result["result"].get("content")
def _extract_url(obj):
if isinstance(obj, dict):
return obj.get("video_url") or obj.get("url")
return None
if isinstance(content, list) and content:
# pick the first item that has a usable url
for item in content:
u = _extract_url(item)
if u:
content_url = u
break
elif isinstance(content, dict):
content_url = _extract_url(content)
return status, content_url
@@ -249,8 +263,26 @@ class VideoGenerator:
logger.error(f"Check task failed: {e}")
return "unknown", None
def _download_video(self, url: str, filename: str) -> str:
"""下载视频到临时目录"""
def _download_video_to(self, url: str, output_path: str) -> bool:
"""下载视频到指定路径(避免 TEMP_DIR 固定文件名导致覆盖)"""
if not url or not output_path:
return False
try:
out_p = Path(output_path)
out_p.parent.mkdir(parents=True, exist_ok=True)
response = requests.get(url, stream=True, timeout=60)
response.raise_for_status()
with open(out_p, "wb") as f:
for chunk in response.iter_content(chunk_size=8192):
if chunk:
f.write(chunk)
return True
except Exception as e:
logger.error(f"Download video failed: {e}")
return False
def _download_video(self, url: str, filename: str, output_dir: Optional[Path] = None) -> str:
"""下载视频到临时目录(默认使用 config.TEMP_DIR可指定 output_dir 避免覆盖)"""
if not url:
return None
@@ -258,10 +290,13 @@ class VideoGenerator:
response = requests.get(url, stream=True, timeout=60)
response.raise_for_status()
output_path = config.TEMP_DIR / filename
out_dir = output_dir or config.TEMP_DIR
out_dir.mkdir(parents=True, exist_ok=True)
output_path = out_dir / filename
with open(output_path, "wb") as f:
for chunk in response.iter_content(chunk_size=8192):
f.write(chunk)
if chunk:
f.write(chunk)
return str(output_path)
except Exception as e: