fix: 字幕居中bug修复，BGM声音太小bug修复，默认system prompt微调 - 强化旁白字数控制

- composer.py: BGM音量调整为0.45，禁用ducking压缩 - ffmpeg_utils.py: 字幕居中修复，多行字幕每行单独居中 - script_gen.py: 清理调试代码
2025-12-15 16:18:00 +08:00
parent 33a165a615
commit 54fff30ee0
3 changed files with 67 additions and 25 deletions
--- a/modules/composer.py
+++ b/modules/composer.py
@@ -105,7 +105,7 @@ class VideoComposer:
            self._add_temp(silent_path)
            current_video = silent_path
            
-            # Step 2: 添加字幕 (白字黑边，无底框，下半区域居中)
+            # Step 2: 添加字幕 (白字黑边，无底框，水平居中)
            if subtitles:
                subtitled_path = str(config.TEMP_DIR / f"{output_name}_subtitled.mp4")
                subtitle_style = {
@@ -115,7 +115,8 @@ class VideoComposer:
                    "borderw": 5,
                    "bordercolor": "black",
                    "box": 0,  # 无底框
-                    "y": "h-200",  # 下半区域居中
+                    "x": "(w-text_w)/2",  # 水平居中
+                    "y": "h-200",  # 底部区域
                }
                ffmpeg_utils.add_multiple_subtitles(
                    current_video, subtitles, subtitled_path, default_style=subtitle_style
@@ -443,7 +444,8 @@ class VideoComposer:
                "borderw": 5,
                "bordercolor": "black",
                "box": 0,  # 无底框
-                "y": "h-200",  # 下半区域居中
+                "x": "(w-text_w)/2",  # 水平居中
+                "y": "h-200",  # 底部区域
            }
            ffmpeg_utils.add_multiple_subtitles(
                current_video, subtitles, subtitled_path, default_style=subtitle_style
--- a/modules/ffmpeg_utils.py
+++ b/modules/ffmpeg_utils.py
@@ -524,26 +524,41 @@ def add_multiple_subtitles(
        fontcolor = style.get("fontcolor", "white")
        borderw = style.get("borderw", 3)
        bordercolor = style.get("bordercolor", "black")
-        x = style.get("x", "(w-text_w)/2")
-        y = style.get("y", "h-200")
+        base_y = style.get("y", "h-200")
        
        # 默认启用背景框以提高可读性
        box = style.get("box", 1)
        boxcolor = style.get("boxcolor", "black@0.5")
        boxborderw = style.get("boxborderw", 10)
        
+        # 多行字幕：拆分成多个 drawtext 滤镜，每行单独居中
+        lines = text.split("\n") if "\n" in text else [text]
+        line_height = int(fontsize * 1.3)  # 行高
+        
+        for line_idx, line in enumerate(lines):
+            if not line.strip():
+                continue
+                
            # 转义：反斜杠、单引号、冒号、百分号
-        escaped_text = text.replace("\\", "\\\\").replace("'", "\\'").replace(":", "\\:").replace("%", "\\%")
+            escaped_line = line.replace("\\", "\\\\").replace("'", "\\'").replace(":", "\\:").replace("%", "\\%")
+            
+            # 计算每行的 y 位置（从底部往上排列）
+            # base_y 是最后一行的位置，往上依次排列
+            line_offset = (len(lines) - 1 - line_idx) * line_height
+            if isinstance(base_y, str) and base_y.startswith("h-"):
+                y_expr = f"({base_y})-{line_offset}"
+            else:
+                y_expr = f"({base_y})-{line_offset}"
            
            drawtext = (
-            f"drawtext=text='{escaped_text}':"
+                f"drawtext=text='{escaped_line}':"
                f"fontfile='{font}':"
                f"fontsize={fontsize}:"
                f"fontcolor={fontcolor}:"
                f"borderw={borderw}:"
                f"bordercolor={bordercolor}:"
                f"box={box}:boxcolor={boxcolor}:boxborderw={boxborderw}:"
-            f"x={x}:y={y}:"
+                f"x=(w-text_w)/2:y={y_expr}:"  # 每行都水平居中
                f"enable='between(t,{start},{start + duration})'"
            )
            filters.append(drawtext)
@@ -809,6 +824,16 @@ def add_bgm(
        bgm_volume: BGM音量
        loop: 是否循环BGM
    """
+    # 验证 BGM 文件存在
+    if not bgm_path or not os.path.exists(bgm_path):
+        logger.error(f"BGM file not found: {bgm_path}")
+        # 直接复制原视频，不添加 BGM
+        import shutil
+        shutil.copy(video_path, output_path)
+        return output_path
+    
+    logger.info(f"Adding BGM: {bgm_path} (volume={bgm_volume})")
+    
    info = get_video_info(video_path)
    video_duration = info["duration"]
    
--- a/modules/script_gen.py
+++ b/modules/script_gen.py
@@ -36,7 +36,7 @@ class ScriptGenerator:
 - 必须包含：目标人群分析、卖点提炼、分镜设计

 ## 分镜设计原则
-1. **单分镜单主体**：每个分镜聚焦一个视觉主体或动作，避免复杂运镜，因为 AI 生视频在长时间（>3秒）容易出现画面异常。
+1. **单分镜单主体**：每个分镜聚焦一个视觉主体或动作，避免复杂运镜。
 2. **旁白跨分镜**：一段完整的旁白/卖点可以跨越多个分镜。在 voiceover_timeline 中，通过 start_time 和 duration (秒) 控制旁白的绝对时间位置，无需与分镜一一对应。
 3. **节奏感**：分镜之间保持视觉连贯，通过景别变化（特写 -> 中景 -> 全景）制造节奏。
 4. **语速控制**：旁白语速约 4 字/秒，12字旁白约需 3 秒。
@@ -84,13 +84,28 @@ class ScriptGenerator:
 }

 ## 注意事项
-1. **visual_prompt**: 
+1. **visual_prompt (生图提示)**: 
   - 必须是英文。
-   - 描述要具体，例如 "Close-up shot of a hair clip, soft lighting, minimalist background".
-   - **CRITICAL**: 禁止 AI 额外生成装饰性文字、标语、水印。但必须保留商品包装自带的文字和 Logo（这是商品真实外观的一部分）。
-   - 正确写法: "Product front view, keep original packaging design --no added text --no watermarks"
+   - 描述要具体，例如 "Close-up shot of a hair clip on a young woman's hair, soft lighting, minimalist background".
+   - **人物出镜规则 (重要)**:
+     - 对于穿戴类商品（服装、饰品、发饰、鞋包、眼镜、手表等）：**必须包含人物模特**，展示穿戴效果。
+       - 例如：发饰 → "A young Asian woman with the hair clip styling her ponytail"
+       - 例如：衣服 → "A stylish young woman wearing the dress, full body shot"
+       - 例如：包包 → "A fashionable woman carrying the handbag on her shoulder"
+     - 对于使用类商品（护肤品、化妆品、电子产品等）：**建议包含人物使用场景**。
+       - 例如：护肤品 → "Close-up of a woman's hand applying the cream to her face"
+     - 对于纯展示类商品（食品、家居摆件等）：可以纯产品展示，不强制人物。
+   - **禁止 AI 额外生成装饰性文字、标语、水印**。但必须保留商品包装自带的文字和 Logo。
   - **EMPHASIS**: Strictly follow the appearance of the product in the reference images.
-2. **video_prompt**: 必须是英文，描述动作，例如 "Slow zoom in, the hair clip rotates slightly"。注意保持动作简单，避免复杂运镜和人体动作。
+2. **video_prompt (视频动效提示)**: 
+   - 必须是英文。
+   - **动作简单化 (重要)**：AI 生视频容易在复杂动作上出现瑕疵，因此：
+     - ✅ 推荐动作：slow zoom in/out, subtle camera pan, gentle rotation, soft light flickering, particles floating
+     - ✅ 人物推荐：slight head turn, gentle smile, hair flowing softly, holding still with minimal movement
+     - ❌ 避免动作：fast motion, walking, running, dancing, hand gestures, complex body movements, drastic camera movements
+   - 示例：
+     - Good: "Slow zoom in on the hair clip, the woman's hair gently flows in soft breeze"
+     - Bad: "The woman shakes her head dramatically to show the clip stays in place"
 3. **voiceover_timeline**:
   - 这是整个视频的旁白和字幕时间轴，独立于分镜。
   - `start_time` 是旁白开始的绝对时间 (秒)，`duration` 是旁白持续时长 (秒)。