chore: sync code and project files

2026-01-09 14:09:16 +08:00
parent 3d1fb37769
commit 30d7eb4b35
94 changed files with 12706 additions and 255 deletions
--- a/modules/ffmpeg_utils.py
+++ b/modules/ffmpeg_utils.py
@@ -172,7 +172,8 @@ def get_video_info(video_path: str) -> Dict[str, Any]:
 def concat_videos(
    video_paths: List[str],
    output_path: str,
-    target_size: Tuple[int, int] = (1080, 1920)
+    target_size: Tuple[int, int] = (1080, 1920),
+    fades: Optional[List[Dict[str, float]]] = None
 ) -> str:
    """
    使用 FFmpeg concat demuxer 拼接多段视频
@@ -197,10 +198,72 @@ def concat_videos(
    filter_parts = []
    for i in range(len(video_paths)):
        # scale 保持宽高比，pad 填充黑边居中
-        filter_parts.append(
+        chain = (
            f"[{i}:v]scale={width}:{height}:force_original_aspect_ratio=decrease,"
-            f"pad={width}:{height}:(ow-iw)/2:(oh-ih)/2:black,setsar=1[v{i}]"
+            f"pad={width}:{height}:(ow-iw)/2:(oh-ih)/2:black,setsar=1"
        )
+        # 可选：片段末尾“火山式转场”（不改时长、不重叠）
+        if fades and i < len(fades):
+            fx = fades[i] or {}
+            fi = float(fx.get("in", 0) or 0.0)
+            fo = float(fx.get("out", 0) or 0.0)
+            t_type = str(fx.get("type") or "")
+            t_dur = float(fx.get("dur") or 0.0)
+
+            try:
+                dur = float(get_video_info(video_paths[i]).get("duration") or 0.0)
+            except Exception:
+                dur = 0.0
+
+            # 基础淡入/淡出
+            if fi > 0:
+                chain += f",fade=t=in:st=0:d={fi}"
+            if fo > 0 and dur > 0:
+                st = max(dur - fo, 0.0)
+                chain += f",fade=t=out:st={st}:d={fo}"
+
+            # 末尾动效（WYSIWYG：前端预览必须与此一致）
+            if t_type and t_dur > 0 and dur > 0:
+                st = max(dur - t_dur, 0.0)
+                td = max(t_dur, 0.001)
+                p = f"if(between(t\\,{st}\\,{dur})\\,(t-{st})/{td}\\,0)"
+                if t_type == "fade":
+                    chain += f",fade=t=out:st={st}:d={t_dur}"
+                elif t_type == "fadeWhite":
+                    chain += f",fade=t=out:st={st}:d={t_dur}:color=white"
+                elif t_type == "blurOut":
+                    chain += f",gblur=sigma='10*{p}':steps=1"
+                elif t_type == "blurFade":
+                    chain += f",gblur=sigma='8*{p}':steps=1,fade=t=out:st={st}:d={t_dur}"
+                elif t_type == "flash":
+                    chain += f",eq=brightness='0.7*(1-abs(0.5-{p})*2)'"
+                elif t_type == "desaturate":
+                    chain += f",hue=s='1-0.9*{p}'"
+                elif t_type == "colorPop":
+                    chain += f",hue=s='1+0.8*{p}',eq=contrast='1+0.3*{p}'"
+                elif t_type == "hueShift":
+                    chain += f",hue=h='60*{p}'"
+                elif t_type == "darken":
+                    chain += f",eq=brightness='-0.4*{p}'"
+                elif t_type in ("slideLeft", "slideRight", "slideUp", "slideDown"):
+                    off = 80
+                    if t_type == "slideLeft":
+                        chain += f",pad={width+off}:{height}:{off/2}-{off}*{p}:0:black,crop={width}:{height}:{off/2}:0"
+                    if t_type == "slideRight":
+                        chain += f",pad={width+off}:{height}:{off/2}+{off}*{p}:0:black,crop={width}:{height}:{off/2}:0"
+                    if t_type == "slideUp":
+                        chain += f",pad={width}:{height+off}:0:{off/2}-{off}*{p}:black,crop={width}:{height}:0:{off/2}"
+                    if t_type == "slideDown":
+                        chain += f",pad={width}:{height+off}:0:{off/2}+{off}*{p}:black,crop={width}:{height}:0:{off/2}"
+                elif t_type in ("zoomOut", "zoomIn"):
+                    if t_type == "zoomOut":
+                        chain += f",scale=w='{width}*(1-0.10*{p})':h='{height}*(1-0.10*{p})':eval=frame,pad={width}:{height}:(ow-iw)/2:(oh-ih)/2:black"
+                    else:
+                        chain += f",scale=w='{width}*(1+0.10*{p})':h='{height}*(1+0.10*{p})':eval=frame,crop={width}:{height}"
+                elif t_type == "rotateOut":
+                    chain += f",rotate=a='0.12*{p}':c=black@1:ow={width}:oh={height}"
+        chain += f"[v{i}]"
+        filter_parts.append(chain)
    
    # 拼接所有视频流
    concat_inputs = "".join([f"[v{i}]" for i in range(len(video_paths))])
@@ -232,7 +295,8 @@ def concat_videos(
 def concat_videos_with_audio(
    video_paths: List[str],
    output_path: str,
-    target_size: Tuple[int, int] = (1080, 1920)
+    target_size: Tuple[int, int] = (1080, 1920),
+    fades: Optional[List[Dict[str, float]]] = None
 ) -> str:
    """
    拼接视频并保留音频轨道
@@ -250,10 +314,70 @@ def concat_videos_with_audio(
    
    # 视频处理
    for i in range(n):
-        filter_parts.append(
+        chain = (
            f"[{i}:v]scale={width}:{height}:force_original_aspect_ratio=decrease,"
-            f"pad={width}:{height}:(ow-iw)/2:(oh-ih)/2:black,setsar=1[v{i}]"
+            f"pad={width}:{height}:(ow-iw)/2:(oh-ih)/2:black,setsar=1"
        )
+        # 可选：片段末尾“火山式转场”（不改时长、不重叠）
+        if fades and i < len(fades):
+            fx = fades[i] or {}
+            fi = float(fx.get("in", 0) or 0.0)
+            fo = float(fx.get("out", 0) or 0.0)
+            t_type = str(fx.get("type") or "")
+            t_dur = float(fx.get("dur") or 0.0)
+
+            try:
+                dur = float(get_video_info(video_paths[i]).get("duration") or 0.0)
+            except Exception:
+                dur = 0.0
+
+            if fi > 0:
+                chain += f",fade=t=in:st=0:d={fi}"
+            if fo > 0 and dur > 0:
+                st = max(dur - fo, 0.0)
+                chain += f",fade=t=out:st={st}:d={fo}"
+
+            if t_type and t_dur > 0 and dur > 0:
+                st = max(dur - t_dur, 0.0)
+                td = max(t_dur, 0.001)
+                p = f"if(between(t\\,{st}\\,{dur})\\,(t-{st})/{td}\\,0)"
+                if t_type == "fade":
+                    chain += f",fade=t=out:st={st}:d={t_dur}"
+                elif t_type == "fadeWhite":
+                    chain += f",fade=t=out:st={st}:d={t_dur}:color=white"
+                elif t_type == "blurOut":
+                    chain += f",gblur=sigma='10*{p}':steps=1"
+                elif t_type == "blurFade":
+                    chain += f",gblur=sigma='8*{p}':steps=1,fade=t=out:st={st}:d={t_dur}"
+                elif t_type == "flash":
+                    chain += f",eq=brightness='0.7*(1-abs(0.5-{p})*2)'"
+                elif t_type == "desaturate":
+                    chain += f",hue=s='1-0.9*{p}'"
+                elif t_type == "colorPop":
+                    chain += f",hue=s='1+0.8*{p}',eq=contrast='1+0.3*{p}'"
+                elif t_type == "hueShift":
+                    chain += f",hue=h='60*{p}'"
+                elif t_type == "darken":
+                    chain += f",eq=brightness='-0.4*{p}'"
+                elif t_type in ("slideLeft", "slideRight", "slideUp", "slideDown"):
+                    off = 80
+                    if t_type == "slideLeft":
+                        chain += f",pad={width+off}:{height}:{off/2}-{off}*{p}:0:black,crop={width}:{height}:{off/2}:0"
+                    if t_type == "slideRight":
+                        chain += f",pad={width+off}:{height}:{off/2}+{off}*{p}:0:black,crop={width}:{height}:{off/2}:0"
+                    if t_type == "slideUp":
+                        chain += f",pad={width}:{height+off}:0:{off/2}-{off}*{p}:black,crop={width}:{height}:0:{off/2}"
+                    if t_type == "slideDown":
+                        chain += f",pad={width}:{height+off}:0:{off/2}+{off}*{p}:black,crop={width}:{height}:0:{off/2}"
+                elif t_type in ("zoomOut", "zoomIn"):
+                    if t_type == "zoomOut":
+                        chain += f",scale=w='{width}*(1-0.10*{p})':h='{height}*(1-0.10*{p})':eval=frame,pad={width}:{height}:(ow-iw)/2:(oh-ih)/2:black"
+                    else:
+                        chain += f",scale=w='{width}*(1+0.10*{p})':h='{height}*(1+0.10*{p})':eval=frame,crop={width}:{height}"
+                elif t_type == "rotateOut":
+                    chain += f",rotate=a='0.12*{p}':c=black@1:ow={width}:oh={height}"
+        chain += f"[v{i}]"
+        filter_parts.append(chain)
    
    # 音频处理（静音填充如果没有音频）
    for i in range(n):
@@ -469,6 +593,127 @@ def adjust_audio_duration(
    return output_path


+def _atempo_chain(speed: float) -> str:
+    """
+    构造 atempo 链，支持 <0.5 或 >2.0 的倍速（通过链式 atempo）。
+    """
+    try:
+        s = float(speed)
+    except Exception:
+        s = 1.0
+    if s <= 0:
+        s = 1.0
+    parts = []
+    # atempo 支持 0.5~2.0
+    while s > 2.0:
+        parts.append("atempo=2.0")
+        s /= 2.0
+    while s < 0.5:
+        parts.append("atempo=0.5")
+        s /= 0.5
+    parts.append(f"atempo={s}")
+    return ",".join(parts)
+
+
+def change_audio_speed(input_path: str, speed: float, output_path: str) -> str:
+    """改变音频播放倍速（纯播放倍速）。"""
+    if not os.path.exists(input_path):
+        return None
+    af = _atempo_chain(speed)
+    cmd = [FFMPEG_PATH, "-y", "-i", input_path, "-filter:a", af, output_path]
+    _run_ffmpeg(cmd)
+    return output_path
+
+
+def fit_audio_to_duration_by_speed(input_path: str, target_duration: float, output_path: str) -> str:
+    """
+    通过“改变播放倍速”来贴合目标时长（可快可慢），并裁剪/补齐到严格时长。
+    适用于旁白：用户拉伸片段期望语速变化，而不是静音补齐。
+    """
+    if not os.path.exists(input_path):
+        return None
+    try:
+        td = float(target_duration or 0)
+    except Exception:
+        td = 0.0
+    if td <= 0:
+        import shutil
+        shutil.copy(input_path, output_path)
+        return output_path
+
+    cur = float(get_audio_info(input_path).get("duration") or 0.0)
+    if cur <= 0:
+        import shutil
+        shutil.copy(input_path, output_path)
+        return output_path
+
+    speed = cur / td
+    af_speed = _atempo_chain(speed)
+    # 贴合后仍做一次 atrim+apad 保证严格时长（避免累计误差）
+    af = f"{af_speed},atrim=0:{td},apad=pad_dur=0,atrim=0:{td}"
+    cmd = [FFMPEG_PATH, "-y", "-i", input_path, "-filter:a", af, output_path]
+    _run_ffmpeg(cmd)
+    return output_path
+
+
+def force_audio_duration(input_path: str, target_duration: float, output_path: str) -> str:
+    """不改变倍速，仅裁剪/补齐到严格时长（用于倍速已在上游完成的场景）。"""
+    if not os.path.exists(input_path):
+        return None
+    try:
+        td = float(target_duration or 0)
+    except Exception:
+        td = 0.0
+    if td <= 0:
+        import shutil
+        shutil.copy(input_path, output_path)
+        return output_path
+    af = f"atrim=0:{td},apad=pad_dur=0,atrim=0:{td}"
+    cmd = [FFMPEG_PATH, "-y", "-i", input_path, "-filter:a", af, output_path]
+    _run_ffmpeg(cmd)
+    return output_path
+
+
+def _which(cmd: str) -> Optional[str]:
+    try:
+        import shutil
+        return shutil.which(cmd)
+    except Exception:
+        return None
+
+
+def normalize_sticker_to_png(input_path: str, output_path: str) -> str:
+    """
+    将贴纸规范化为 PNG（用于 ffmpeg overlay）。
+    - PNG/WEBP：直接返回原图或拷贝
+    - SVG：优先用 rsvg-convert 转 PNG；否则尝试 ffmpeg 直接解码
+    """
+    if not input_path or not os.path.exists(input_path):
+        return None
+    ext = Path(input_path).suffix.lower()
+    if ext in [".png"]:
+        return input_path
+    if ext in [".webp"]:
+        # 转 PNG，避免某些 ffmpeg build 对 webp 支持不一致
+        cmd = [FFMPEG_PATH, "-y", "-i", input_path, output_path]
+        _run_ffmpeg(cmd)
+        return output_path
+    if ext == ".svg":
+        rsvg = _which("rsvg-convert")
+        if rsvg:
+            import subprocess
+            subprocess.check_call([rsvg, "-o", output_path, input_path])
+            return output_path
+        # fallback: ffmpeg decode svg（依赖 build）
+        cmd = [FFMPEG_PATH, "-y", "-i", input_path, output_path]
+        _run_ffmpeg(cmd)
+        return output_path
+    # 其他格式：尽量用 ffmpeg 转
+    cmd = [FFMPEG_PATH, "-y", "-i", input_path, output_path]
+    _run_ffmpeg(cmd)
+    return output_path
+
+
 def get_audio_info(file_path: str) -> Dict[str, Any]:
    """获取音频信息"""
    return get_video_info(file_path)
@@ -830,6 +1075,12 @@ def add_bgm(
    loop: bool = True,
    ducking: bool = True,
    duck_gain_db: float = -6.0,
+    # 新增：按时间段闪避（更可控，和旁白时间轴严格对齐）
+    duck_volume: float = 0.25,
+    duck_ranges: Optional[List[Tuple[float, float]]] = None,
+    # 新增：BGM 片段可有起点/时长（不强制从 0 覆盖整段视频）
+    start_time: float = 0.0,
+    clip_duration: Optional[float] = None,
    fade_in: float = 1.0,
    fade_out: float = 1.0
 ) -> str:
@@ -856,30 +1107,46 @@ def add_bgm(
    info = get_video_info(video_path)
    video_duration = info["duration"]
    
-    if loop:
-        bgm_chain = (
-            f"[1:a]aloop=-1:size=2e+09,asetpts=N/SR/TB,"
-            f"atrim=0:{video_duration},"
-            f"afade=t=in:st=0:d={fade_in},"
-            f"afade=t=out:st={max(video_duration - fade_out, 0)}:d={fade_out},"
-            f"volume={bgm_volume}[bgm]"
-        )
-    else:
-        bgm_chain = (
-            f"[1:a]"
-            f"afade=t=in:st=0:d={fade_in},"
-            f"afade=t=out:st={max(video_duration - fade_out, 0)}:d={fade_out},"
-            f"volume={bgm_volume}[bgm]"
-        )
+    # 片段时长：默认覆盖整段视频
+    dur = float(clip_duration) if (clip_duration is not None and float(clip_duration) > 0) else float(video_duration)
+    st = max(0.0, float(start_time or 0.0))
+    end_for_fade = max(dur - float(fade_out or 0.0), 0.0)

-    if ducking:
-        # 使用安全参数的 sidechaincompress，避免 unsupported 参数
+    # 基础链：loop/trim -> fades -> base volume
+    if loop:
+        bgm_chain = f"[1:a]aloop=-1:size=2e+09,asetpts=N/SR/TB,atrim=0:{dur}"
+    else:
+        bgm_chain = f"[1:a]atrim=0:{dur}"
+
+    bgm_chain += f",afade=t=in:st=0:d={float(fade_in or 0.0)},afade=t=out:st={end_for_fade}:d={float(fade_out or 0.0)},volume={bgm_volume}"
+
+    # 延迟到 start_time
+    if st > 1e-6:
+        ms = int(st * 1000)
+        bgm_chain += f",adelay={ms}|{ms}"
+
+    # 闪避（按时间段）
+    # 注意：使用 enable 让 filter 只在区间内生效（外部直接 passthrough）
+    if ducking and duck_ranges:
+        dv = max(0.05, min(1.0, float(duck_volume or 0.25)))
+        for (rs, re) in duck_ranges:
+            rsf = max(0.0, float(rs))
+            ref = max(rsf, float(re))
+            bgm_chain += f",volume={dv}:enable='between(t,{rsf},{ref})'"
+
+    bgm_chain += "[bgm]"
+
+    # 如果提供了 duck_ranges，就用确定性的 amix（ducking 已在 bgm_chain 内完成）
+    if ducking and duck_ranges:
+        filter_complex = f"{bgm_chain};[0:a][bgm]amix=inputs=2:duration=first:dropout_transition=0:normalize=0[outa]"
+    elif ducking:
+        # 否则退回 sidechaincompress（对原视频音频进行侧链压缩）
        filter_complex = (
            f"{bgm_chain};"
            f"[0:a][bgm]sidechaincompress=threshold=0.1:ratio=4:attack=5:release=250:makeup=1:mix=1:level_in=1:level_sc=1[outa]"
        )
    else:
-        filter_complex = f"{bgm_chain};[0:a][bgm]amix=inputs=2:duration=first[outa]"
+        filter_complex = f"{bgm_chain};[0:a][bgm]amix=inputs=2:duration=first:dropout_transition=0:normalize=0[outa]"
    
    cmd = [
        FFMPEG_PATH, "-y",