perf(8502): 并行生图(6并发)+超时重试；视频URL直连预览/下载；路径隔离

2025-12-17 12:21:22 +08:00
parent ebcf165c3f
commit 1e210ffccf
12 changed files with 1168 additions and 201 deletions
--- a/modules/composer.py
+++ b/modules/composer.py
@@ -11,6 +11,7 @@ from typing import Dict, Any, List, Optional, Union
 import config
 from modules import ffmpeg_utils, fancy_text, factory, storage
 from modules.text_renderer import renderer
+from modules import path_utils

 logger = logging.getLogger(__name__)

@@ -65,6 +66,7 @@ class VideoComposer:
        bgm_path: str = None,
        bgm_volume: float = 0.15,
        output_name: str = None,
+        project_id: Optional[str] = None,
        upload_to_r2: bool = False
    ) -> str:
        """
@@ -89,25 +91,27 @@ class VideoComposer:
        
        timestamp = int(time.time())
        output_name = output_name or f"composed_{timestamp}"
+        # Per-project temp dir to avoid cross-project overwrites
+        temp_root = path_utils.project_compose_dir(project_id, output_name) if project_id else config.TEMP_DIR
        
        logger.info(f"Starting composition: {len(video_paths)} videos")
        
        try:
            # Step 1: 拼接视频
-            merged_path = str(config.TEMP_DIR / f"{output_name}_merged.mp4")
+            merged_path = str(Path(temp_root) / f"{output_name}_merged.mp4")
            ffmpeg_utils.concat_videos(video_paths, merged_path, self.target_size)
            self._add_temp(merged_path)
            current_video = merged_path

            # Step 1.1: 若无音轨，补一条静音底，避免后续滤镜找不到 0:a
-            silent_path = str(config.TEMP_DIR / f"{output_name}_silent.mp4")
+            silent_path = str(Path(temp_root) / f"{output_name}_silent.mp4")
            ffmpeg_utils.add_silence_audio(current_video, silent_path)
            self._add_temp(silent_path)
            current_video = silent_path
            
            # Step 2: 添加字幕 (白字黑边，无底框，水平居中)
            if subtitles:
-                subtitled_path = str(config.TEMP_DIR / f"{output_name}_subtitled.mp4")
+                subtitled_path = str(Path(temp_root) / f"{output_name}_subtitled.mp4")
                subtitle_style = {
                    "font": ffmpeg_utils._get_font_path(),
                    "fontsize": 60,
@@ -169,7 +173,7 @@ class VideoComposer:
                        "duration": ft.get("duration", 999)
                    })
                
-                fancy_path = str(config.TEMP_DIR / f"{output_name}_fancy.mp4")
+                fancy_path = str(Path(temp_root) / f"{output_name}_fancy.mp4")
                ffmpeg_utils.overlay_multiple_images(
                    current_video, overlay_configs, fancy_path
                )
@@ -178,13 +182,15 @@ class VideoComposer:
            
            # Step 4: 生成并混合旁白（火山 WS 优先，失败回退 Edge）
            if voiceover_text:
+                vo_out = str(Path(temp_root) / f"{output_name}_vo_full.mp3")
                vo_path = factory.generate_voiceover_volcengine(
                    text=voiceover_text,
-                    voice_type=self.voice_type
+                    voice_type=self.voice_type,
+                    output_path=vo_out,
                )
                self._add_temp(vo_path)
                
-                voiced_path = str(config.TEMP_DIR / f"{output_name}_voiced.mp4")
+                voiced_path = str(Path(temp_root) / f"{output_name}_voiced.mp4")
                ffmpeg_utils.mix_audio(
                    current_video, vo_path, voiced_path,
                    audio_volume=1.5,
@@ -195,12 +201,12 @@ class VideoComposer:
            
            elif voiceover_segments:
                current_video = self._add_segmented_voiceover(
-                    current_video, voiceover_segments, output_name
+                    current_video, voiceover_segments, output_name, Path(temp_root)
                )
            
            # Step 5: 添加BGM（淡入淡出，若 duck 失败会自动退回低音量混合）
            if bgm_path:
-                bgm_output = str(config.TEMP_DIR / f"{output_name}_bgm.mp4")
+                bgm_output = str(Path(temp_root) / f"{output_name}_bgm.mp4")
                ffmpeg_utils.add_bgm(
                    current_video, bgm_path, bgm_output,
                    bgm_volume=bgm_volume,
@@ -237,7 +243,8 @@ class VideoComposer:
        self,
        video_path: str,
        segments: List[Dict[str, Any]],
-        output_name: str
+        output_name: str,
+        temp_root: Path,
    ) -> str:
        """添加分段旁白"""
        if not segments:
@@ -254,7 +261,7 @@ class VideoComposer:
            audio_path = factory.generate_voiceover_volcengine(
                text=text,
                voice_type=voice,
-                output_path=str(config.TEMP_DIR / f"{output_name}_seg_{i}.mp3")
+                output_path=str(temp_root / f"{output_name}_seg_{i}.mp3")
            )
            
            if audio_path:
@@ -270,7 +277,7 @@ class VideoComposer:
        # 依次混入音频
        current = video_path
        for i, af in enumerate(audio_files):
-            output = str(config.TEMP_DIR / f"{output_name}_seg_mixed_{i}.mp4")
+            output = str(temp_root / f"{output_name}_seg_mixed_{i}.mp4")
            ffmpeg_utils.mix_audio(
                current, af["path"], output,
                audio_volume=1.0,
@@ -287,7 +294,8 @@ class VideoComposer:
        script: Dict[str, Any],
        video_map: Dict[int, str],
        bgm_path: str = None,
-        output_name: str = None
+        output_name: str = None,
+        project_id: Optional[str] = None,
    ) -> str:
        """
        基于生成脚本和视频映射进行合成
@@ -340,13 +348,30 @@ class VideoComposer:
                            # 无 background，不加底框
                        }

+                        # 让花字时长默认跟随镜头（不改 prompt，仅纠正过短/缺失 duration）
+                        start_in_scene = float(ft.get("start_time", 0) or 0.0)
+                        if start_in_scene < 0:
+                            start_in_scene = 0.0
+                        if start_in_scene >= duration:
+                            start_in_scene = 0.0
+                        ft_dur = ft.get("duration", None)
+                        try:
+                            ft_dur_val = float(ft_dur) if ft_dur is not None else None
+                        except Exception:
+                            ft_dur_val = None
+                        # If too short, extend to scene end
+                        if ft_dur_val is None or ft_dur_val < 1.5:
+                            ft_dur_val = max(duration - start_in_scene, 1.5)
+                        # Clamp within scene duration
+                        ft_dur_val = max(0.5, min(ft_dur_val, duration))
+
                        fancy_texts.append({
                            "text": text,
                            "style": fixed_style,
                            "x": "(W-w)/2",  # 居中
                            "y": "180",      # 上半区域
-                            "start": total_duration + float(ft.get("start_time", 0)),
-                            "duration": float(ft.get("duration", duration))
+                            "start": total_duration + start_in_scene,
+                            "duration": ft_dur_val
                        })
            
            total_duration += duration
@@ -354,15 +379,16 @@ class VideoComposer:
        # 2. 拼接视频
        timestamp = int(time.time())
        output_name = output_name or f"composed_{timestamp}"
+        temp_root = path_utils.project_compose_dir(project_id, output_name) if project_id else config.TEMP_DIR
        
-        merged_path = str(config.TEMP_DIR / f"{output_name}_merged.mp4")
+        merged_path = str(Path(temp_root) / f"{output_name}_merged.mp4")
        ffmpeg_utils.concat_videos(video_paths, merged_path, self.target_size)
        self._add_temp(merged_path)
        current_video = merged_path
        
        # 3. 处理整体旁白时间轴 (New Logic)
        voiceover_timeline = script.get("voiceover_timeline", [])
-        mixed_audio_path = str(config.TEMP_DIR / f"{output_name}_mixed_vo.mp3")
+        mixed_audio_path = str(Path(temp_root) / f"{output_name}_mixed_vo.mp3")
        
        # 初始化静音底轨 (长度为 total_duration)
        ffmpeg_utils._run_ffmpeg([
@@ -401,17 +427,17 @@ class VideoComposer:
                tts_path = factory.generate_voiceover_volcengine(
                    text=text,
                    voice_type=self.voice_type,
-                    output_path=str(config.TEMP_DIR / f"{output_name}_vo_{i}.mp3")
+                    output_path=str(Path(temp_root) / f"{output_name}_vo_{i}.mp3")
                )
                self._add_temp(tts_path)
                
                # 调整时长
-                adjusted_path = str(config.TEMP_DIR / f"{output_name}_vo_adj_{i}.mp3")
+                adjusted_path = str(Path(temp_root) / f"{output_name}_vo_adj_{i}.mp3")
                ffmpeg_utils.adjust_audio_duration(tts_path, target_duration, adjusted_path)
                self._add_temp(adjusted_path)
                
                # 混合到总音轨
-                new_mixed = str(config.TEMP_DIR / f"{output_name}_mixed_{i}.mp3")
+                new_mixed = str(Path(temp_root) / f"{output_name}_mixed_{i}.mp3")
                ffmpeg_utils.mix_audio_at_offset(mixed_audio_path, adjusted_path, target_start, new_mixed)
                mixed_audio_path = new_mixed # Update current mixed path
                self._add_temp(new_mixed)
@@ -425,7 +451,7 @@ class VideoComposer:
                })
        
        # 4. 将合成好的旁白混入视频
-        voiced_path = str(config.TEMP_DIR / f"{output_name}_voiced.mp4")
+        voiced_path = str(Path(temp_root) / f"{output_name}_voiced.mp4")
        ffmpeg_utils.mix_audio(
            current_video, mixed_audio_path, voiced_path,
            audio_volume=1.5,
@@ -436,7 +462,7 @@ class VideoComposer:
        
        # 5. 添加字幕 (使用新的 ffmpeg_utils.add_multiple_subtitles)
        if subtitles:
-            subtitled_path = str(config.TEMP_DIR / f"{output_name}_subtitled.mp4")
+            subtitled_path = str(Path(temp_root) / f"{output_name}_subtitled.mp4")
            subtitle_style = {
                "font": ffmpeg_utils._get_font_path(),
                "fontsize": 60,
@@ -455,7 +481,7 @@ class VideoComposer:
            
        # 6. 添加花字
        if fancy_texts:
-            fancy_path = str(config.TEMP_DIR / f"{output_name}_fancy.mp4")
+            fancy_path = str(Path(temp_root) / f"{output_name}_fancy.mp4")
            
            overlay_configs = []
            for ft in fancy_texts:
@@ -477,7 +503,7 @@ class VideoComposer:
            
        # 7. 添加 BGM
        if bgm_path:
-            bgm_output = str(config.TEMP_DIR / f"{output_name}_bgm.mp4")
+            bgm_output = str(Path(temp_root) / f"{output_name}_bgm.mp4")
            ffmpeg_utils.add_bgm(
                current_video, bgm_path, bgm_output,
                bgm_volume=0.15