feat: video-flow initial commit
- app.py: Streamlit UI for video generation workflow - main_flow.py: CLI tool with argparse support - modules/: Business logic modules (script_gen, image_gen, video_gen, composer, etc.) - config.py: Configuration with API keys and paths - requirements.txt: Python dependencies - docs/: System prompt documentation
This commit is contained in:
960
modules/ffmpeg_utils.py
Normal file
960
modules/ffmpeg_utils.py
Normal file
@@ -0,0 +1,960 @@
|
||||
"""
|
||||
FFmpeg 视频处理工具模块
|
||||
支持规模化批量视频处理:拼接、字幕、叠加、混音
|
||||
"""
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
import tempfile
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import List, Dict, Any, Optional, Tuple
|
||||
|
||||
import config
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# FFmpeg/FFprobe 路径(优先使用项目内的二进制)
|
||||
FFMPEG_PATH = str(config.BASE_DIR / "bin" / "ffmpeg") if (config.BASE_DIR / "bin" / "ffmpeg").exists() else "ffmpeg"
|
||||
FFPROBE_PATH = str(config.BASE_DIR / "bin" / "ffprobe") if (config.BASE_DIR / "bin" / "ffprobe").exists() else "ffprobe"
|
||||
|
||||
# 字体路径优先使用项目自带中文字体,其次使用 Linux 系统字体,最后再回退到 macOS 路径
|
||||
DEFAULT_FONT_PATHS = [
|
||||
# 优先使用 Linux 系统级中文字体 (服务器环境最稳健)
|
||||
"/usr/share/fonts/truetype/droid/DroidSansFallbackFull.ttf",
|
||||
"/usr/share/fonts/truetype/wqy/wqy-zenhei.ttc",
|
||||
|
||||
# 项目内字体 (注意:需确保文件不是 LFS 指针)
|
||||
str(config.FONTS_DIR / "HarmonyOS-Sans-SC-Regular.ttf"),
|
||||
str(config.FONTS_DIR / "AlibabaPuHuiTi-Regular.ttf"),
|
||||
|
||||
# macOS 字体(仅本地调试生效)
|
||||
"/System/Library/Fonts/PingFang.ttc",
|
||||
"/System/Library/Fonts/STHeiti Medium.ttc",
|
||||
"/System/Library/Fonts/Supplemental/Arial Unicode.ttf",
|
||||
]
|
||||
|
||||
|
||||
def _get_font_path() -> str:
|
||||
for p in DEFAULT_FONT_PATHS:
|
||||
if os.path.exists(p) and os.path.getsize(p) > 1000:
|
||||
return p
|
||||
return "Arial" # 极端情况下退回英文字体,避免崩溃
|
||||
|
||||
|
||||
def _sanitize_text(text: str) -> str:
|
||||
"""
|
||||
去除可能导致 ffmpeg 命令行错误的特殊控制字符,但保留 Emoji、数字、标点和各国语言。
|
||||
"""
|
||||
if not text:
|
||||
return ""
|
||||
|
||||
# 不再过滤任何字符,只确保不是 None
|
||||
return text
|
||||
|
||||
|
||||
def add_silence_audio(video_path: str, output_path: str) -> str:
|
||||
"""
|
||||
给无音轨的视频补一条静音轨(立体声 44.1k),避免后续 filter 找不到 0:a
|
||||
"""
|
||||
cmd = [
|
||||
FFMPEG_PATH, "-y",
|
||||
"-i", video_path,
|
||||
"-f", "lavfi",
|
||||
"-i", "anullsrc=channel_layout=stereo:sample_rate=44100",
|
||||
"-shortest",
|
||||
"-c:v", "copy",
|
||||
"-c:a", "aac",
|
||||
output_path
|
||||
]
|
||||
_run_ffmpeg(cmd)
|
||||
return output_path
|
||||
|
||||
|
||||
def _run_ffmpeg(cmd: List[str], check: bool = True) -> subprocess.CompletedProcess:
|
||||
"""执行 FFmpeg 命令"""
|
||||
logger.debug(f"FFmpeg command: {' '.join(cmd)}")
|
||||
try:
|
||||
result = subprocess.run(
|
||||
cmd,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=check
|
||||
)
|
||||
# 无论成功失败,输出 stderr 以便排查字体等警告
|
||||
if result.stderr:
|
||||
print(f"[FFmpeg stderr] {result.stderr}", flush=True)
|
||||
if result.returncode != 0:
|
||||
logger.error(f"FFmpeg stderr: {result.stderr}")
|
||||
return result
|
||||
except subprocess.CalledProcessError as e:
|
||||
logger.error(f"FFmpeg failed: {e.stderr}")
|
||||
raise
|
||||
|
||||
|
||||
def get_video_info(video_path: str) -> Dict[str, Any]:
|
||||
"""获取视频信息(时长、分辨率、帧率等)"""
|
||||
cmd = [
|
||||
FFPROBE_PATH,
|
||||
"-v", "quiet",
|
||||
"-print_format", "json",
|
||||
"-show_format",
|
||||
"-show_streams",
|
||||
video_path
|
||||
]
|
||||
result = subprocess.run(cmd, capture_output=True, text=True)
|
||||
if result.returncode != 0:
|
||||
raise ValueError(f"Failed to probe video: {video_path}")
|
||||
|
||||
import json
|
||||
data = json.loads(result.stdout)
|
||||
|
||||
# 提取关键信息
|
||||
info = {
|
||||
"duration": float(data.get("format", {}).get("duration", 0)),
|
||||
"width": 0,
|
||||
"height": 0,
|
||||
"fps": 30
|
||||
}
|
||||
|
||||
for stream in data.get("streams", []):
|
||||
if stream.get("codec_type") == "video":
|
||||
info["width"] = stream.get("width", 0)
|
||||
info["height"] = stream.get("height", 0)
|
||||
# 解析帧率 (如 "30/1" 或 "29.97")
|
||||
fps_str = stream.get("r_frame_rate", "30/1")
|
||||
if "/" in fps_str:
|
||||
num, den = fps_str.split("/")
|
||||
info["fps"] = float(num) / float(den) if float(den) != 0 else 30
|
||||
else:
|
||||
info["fps"] = float(fps_str)
|
||||
break
|
||||
|
||||
return info
|
||||
|
||||
|
||||
def concat_videos(
|
||||
video_paths: List[str],
|
||||
output_path: str,
|
||||
target_size: Tuple[int, int] = (1080, 1920)
|
||||
) -> str:
|
||||
"""
|
||||
使用 FFmpeg concat demuxer 拼接多段视频
|
||||
|
||||
Args:
|
||||
video_paths: 视频文件路径列表
|
||||
output_path: 输出文件路径
|
||||
target_size: 目标分辨率 (width, height),默认竖屏 1080x1920
|
||||
|
||||
Returns:
|
||||
输出文件路径
|
||||
"""
|
||||
if not video_paths:
|
||||
raise ValueError("No video paths provided")
|
||||
|
||||
logger.info(f"Concatenating {len(video_paths)} videos...")
|
||||
|
||||
# 创建 concat 文件列表
|
||||
concat_file = config.TEMP_DIR / f"concat_{os.getpid()}.txt"
|
||||
|
||||
with open(concat_file, "w", encoding="utf-8") as f:
|
||||
for vp in video_paths:
|
||||
# 使用绝对路径并转义单引号
|
||||
abs_path = os.path.abspath(vp)
|
||||
f.write(f"file '{abs_path}'\n")
|
||||
|
||||
width, height = target_size
|
||||
|
||||
# 使用 filter_complex 统一分辨率后拼接
|
||||
# 每个视频先 scale + pad 到目标尺寸
|
||||
filter_parts = []
|
||||
for i in range(len(video_paths)):
|
||||
# scale 保持宽高比,pad 填充黑边居中
|
||||
filter_parts.append(
|
||||
f"[{i}:v]scale={width}:{height}:force_original_aspect_ratio=decrease,"
|
||||
f"pad={width}:{height}:(ow-iw)/2:(oh-ih)/2:black,setsar=1[v{i}]"
|
||||
)
|
||||
|
||||
# 拼接所有视频流
|
||||
concat_inputs = "".join([f"[v{i}]" for i in range(len(video_paths))])
|
||||
filter_parts.append(f"{concat_inputs}concat=n={len(video_paths)}:v=1:a=0[outv]")
|
||||
|
||||
filter_complex = ";".join(filter_parts)
|
||||
|
||||
# 构建 ffmpeg 命令
|
||||
cmd = [FFMPEG_PATH, "-y"]
|
||||
for vp in video_paths:
|
||||
cmd.extend(["-i", vp])
|
||||
|
||||
cmd.extend([
|
||||
"-filter_complex", filter_complex,
|
||||
"-map", "[outv]",
|
||||
"-c:v", "libx264",
|
||||
"-preset", "fast",
|
||||
"-crf", "23",
|
||||
"-pix_fmt", "yuv420p",
|
||||
output_path
|
||||
])
|
||||
|
||||
_run_ffmpeg(cmd)
|
||||
|
||||
# 清理临时文件
|
||||
if concat_file.exists():
|
||||
concat_file.unlink()
|
||||
|
||||
logger.info(f"Concatenated video saved: {output_path}")
|
||||
return output_path
|
||||
|
||||
|
||||
def concat_videos_with_audio(
|
||||
video_paths: List[str],
|
||||
output_path: str,
|
||||
target_size: Tuple[int, int] = (1080, 1920)
|
||||
) -> str:
|
||||
"""
|
||||
拼接视频并保留音频轨道
|
||||
"""
|
||||
if not video_paths:
|
||||
raise ValueError("No video paths provided")
|
||||
|
||||
logger.info(f"Concatenating {len(video_paths)} videos with audio...")
|
||||
|
||||
width, height = target_size
|
||||
n = len(video_paths)
|
||||
|
||||
# 构建 filter_complex
|
||||
filter_parts = []
|
||||
|
||||
# 视频处理
|
||||
for i in range(n):
|
||||
filter_parts.append(
|
||||
f"[{i}:v]scale={width}:{height}:force_original_aspect_ratio=decrease,"
|
||||
f"pad={width}:{height}:(ow-iw)/2:(oh-ih)/2:black,setsar=1[v{i}]"
|
||||
)
|
||||
|
||||
# 音频处理(静音填充如果没有音频)
|
||||
for i in range(n):
|
||||
filter_parts.append(f"[{i}:a]aformat=sample_rates=44100:channel_layouts=stereo[a{i}]")
|
||||
|
||||
# 拼接
|
||||
v_concat = "".join([f"[v{i}]" for i in range(n)])
|
||||
a_concat = "".join([f"[a{i}]" for i in range(n)])
|
||||
filter_parts.append(f"{v_concat}concat=n={n}:v=1:a=0[outv]")
|
||||
filter_parts.append(f"{a_concat}concat=n={n}:v=0:a=1[outa]")
|
||||
|
||||
filter_complex = ";".join(filter_parts)
|
||||
|
||||
cmd = [FFMPEG_PATH, "-y"]
|
||||
for vp in video_paths:
|
||||
cmd.extend(["-i", vp])
|
||||
|
||||
cmd.extend([
|
||||
"-filter_complex", filter_complex,
|
||||
"-map", "[outv]",
|
||||
"-map", "[outa]",
|
||||
"-c:v", "libx264",
|
||||
"-preset", "fast",
|
||||
"-crf", "23",
|
||||
"-c:a", "aac",
|
||||
"-b:a", "128k",
|
||||
"-pix_fmt", "yuv420p",
|
||||
output_path
|
||||
])
|
||||
|
||||
try:
|
||||
_run_ffmpeg(cmd)
|
||||
except subprocess.CalledProcessError:
|
||||
# 如果音频拼接失败,回退到无音频版本
|
||||
logger.warning("Audio concat failed, falling back to video only")
|
||||
return concat_videos(video_paths, output_path, target_size)
|
||||
|
||||
logger.info(f"Concatenated video with audio saved: {output_path}")
|
||||
return output_path
|
||||
|
||||
|
||||
def add_subtitle(
|
||||
video_path: str,
|
||||
text: str,
|
||||
start: float,
|
||||
duration: float,
|
||||
output_path: str,
|
||||
style: Dict[str, Any] = None
|
||||
) -> str:
|
||||
"""
|
||||
使用 drawtext filter 添加单条字幕
|
||||
|
||||
Args:
|
||||
video_path: 输入视频路径
|
||||
text: 字幕文本
|
||||
start: 开始时间(秒)
|
||||
duration: 持续时间(秒)
|
||||
output_path: 输出路径
|
||||
style: 样式配置 {
|
||||
fontsize: 字体大小,
|
||||
fontcolor: 字体颜色,
|
||||
borderw: 描边宽度,
|
||||
bordercolor: 描边颜色,
|
||||
x: x位置 (可用表达式如 "(w-text_w)/2"),
|
||||
y: y位置,
|
||||
font: 字体路径或名称
|
||||
}
|
||||
|
||||
Returns:
|
||||
输出文件路径
|
||||
"""
|
||||
style = style or {}
|
||||
|
||||
# 默认样式
|
||||
fontsize = style.get("fontsize", 48)
|
||||
fontcolor = style.get("fontcolor", "white")
|
||||
borderw = style.get("borderw", 3)
|
||||
bordercolor = style.get("bordercolor", "black")
|
||||
x = style.get("x", "(w-text_w)/2") # 默认水平居中
|
||||
y = style.get("y", "h-200") # 默认底部偏上
|
||||
|
||||
# 优先使用动态检测到的有效字体,而不是硬编码的可能损坏的路径
|
||||
default_font_path = _get_font_path()
|
||||
font = style.get("font", default_font_path)
|
||||
|
||||
# 转义特殊字符
|
||||
escaped_text = text.replace("'", "\\'").replace(":", "\\:")
|
||||
|
||||
# drawtext filter
|
||||
drawtext = (
|
||||
f"drawtext=text='{escaped_text}':"
|
||||
f"fontfile='{font}':"
|
||||
f"fontsize={fontsize}:"
|
||||
f"fontcolor={fontcolor}:"
|
||||
f"borderw={borderw}:"
|
||||
f"bordercolor={bordercolor}:"
|
||||
f"x={x}:y={y}:"
|
||||
f"enable='between(t,{start},{start + duration})'"
|
||||
)
|
||||
|
||||
cmd = [
|
||||
FFMPEG_PATH, "-y",
|
||||
"-i", video_path,
|
||||
"-vf", drawtext,
|
||||
"-c:v", "libx264",
|
||||
"-preset", "fast",
|
||||
"-crf", "23",
|
||||
"-c:a", "copy",
|
||||
"-pix_fmt", "yuv420p",
|
||||
output_path
|
||||
]
|
||||
|
||||
_run_ffmpeg(cmd)
|
||||
logger.info(f"Added subtitle: '{text[:20]}...' at {start}s")
|
||||
return output_path
|
||||
|
||||
|
||||
def wrap_text(text: str, max_chars: int = 18) -> str:
|
||||
"""
|
||||
简单的文本换行处理
|
||||
"""
|
||||
if not text: return ""
|
||||
|
||||
# 如果已经有换行符,假设用户已经手动处理
|
||||
if "\n" in text:
|
||||
return text
|
||||
|
||||
result = ""
|
||||
count = 0
|
||||
for char in text:
|
||||
if count >= max_chars:
|
||||
result += "\n"
|
||||
count = 0
|
||||
result += char
|
||||
# 简单估算:中文算1个,英文也算1个(等宽字体)
|
||||
# 实际上中英文混合较复杂,这里简化处理
|
||||
count += 1
|
||||
return result
|
||||
|
||||
|
||||
def mix_audio_at_offset(
|
||||
base_audio: str,
|
||||
overlay_audio: str,
|
||||
offset: float,
|
||||
output_path: str,
|
||||
base_volume: float = 1.0,
|
||||
overlay_volume: float = 1.0
|
||||
) -> str:
|
||||
"""
|
||||
在指定偏移位置混合音频
|
||||
"""
|
||||
# 如果 base_audio 不存在,创建一个静音底
|
||||
if not os.path.exists(base_audio):
|
||||
logger.warning(f"Base audio not found: {base_audio}")
|
||||
return overlay_audio
|
||||
|
||||
cmd = [
|
||||
FFMPEG_PATH, "-y",
|
||||
"-i", base_audio,
|
||||
"-i", overlay_audio,
|
||||
"-filter_complex",
|
||||
f"[0:a]volume={base_volume}[a0];[1:a]volume={overlay_volume},adelay={int(offset*1000)}|{int(offset*1000)}[a1];[a0][a1]amix=inputs=2:duration=first:dropout_transition=0:normalize=0[out]",
|
||||
"-map", "[out]",
|
||||
"-c:a", "mp3", # Use MP3 for audio only mixing
|
||||
output_path
|
||||
]
|
||||
_run_ffmpeg(cmd)
|
||||
return output_path
|
||||
|
||||
|
||||
def adjust_audio_duration(
|
||||
input_path: str,
|
||||
target_duration: float,
|
||||
output_path: str
|
||||
) -> str:
|
||||
"""
|
||||
调整音频时长(仅在音频过长时加速,音频较短时保持原速)
|
||||
|
||||
用户需求:
|
||||
- 音频时长 > 目标时长 → 加速播放
|
||||
- 音频时长 <= 目标时长 → 保持原速(不慢放)
|
||||
"""
|
||||
if not os.path.exists(input_path):
|
||||
return None
|
||||
|
||||
current_duration = float(get_audio_info(input_path).get("duration", 0))
|
||||
if current_duration <= 0:
|
||||
return input_path
|
||||
|
||||
# 只在音频过长时才加速,音频较短时保持原速
|
||||
if current_duration <= target_duration:
|
||||
# 音频时长 <= 目标时长,不需要调整,直接复制
|
||||
import shutil
|
||||
shutil.copy(input_path, output_path)
|
||||
logger.info(f"Audio ({current_duration:.2f}s) <= target ({target_duration:.2f}s), keeping original speed")
|
||||
return output_path
|
||||
|
||||
# 音频过长,需要加速
|
||||
speed_ratio = current_duration / target_duration
|
||||
|
||||
# 限制加速范围 (最多2倍速),避免声音变调太严重
|
||||
speed_ratio = min(speed_ratio, 2.0)
|
||||
|
||||
logger.info(f"Audio ({current_duration:.2f}s) > target ({target_duration:.2f}s), speeding up {speed_ratio:.2f}x")
|
||||
|
||||
cmd = [
|
||||
FFMPEG_PATH, "-y",
|
||||
"-i", input_path,
|
||||
"-filter:a", f"atempo={speed_ratio}",
|
||||
output_path
|
||||
]
|
||||
_run_ffmpeg(cmd)
|
||||
return output_path
|
||||
|
||||
|
||||
def get_audio_info(file_path: str) -> Dict[str, Any]:
|
||||
"""获取音频信息"""
|
||||
return get_video_info(file_path)
|
||||
|
||||
|
||||
def wrap_text_smart(text: str, max_chars: int = 15) -> str:
|
||||
"""
|
||||
智能字幕换行(上短下长策略)
|
||||
"""
|
||||
if not text or len(text) <= max_chars:
|
||||
return text
|
||||
|
||||
# 优先在标点或空格处换行
|
||||
split_chars = [",", "。", "!", "?", " ", ",", ".", "!", "?"]
|
||||
best_split = -1
|
||||
|
||||
# 寻找中间附近的分割点
|
||||
mid = len(text) // 2
|
||||
|
||||
for i in range(len(text)):
|
||||
if text[i] in split_chars:
|
||||
# 偏好后半部分(上短下长)
|
||||
if abs(i - mid) < abs(best_split - mid):
|
||||
best_split = i
|
||||
|
||||
if best_split != -1 and best_split < len(text) - 1:
|
||||
return text[:best_split+1] + "\n" + text[best_split+1:]
|
||||
|
||||
# 强制换行(上短下长)
|
||||
split_idx = int(len(text) * 0.4) # 上面 40%
|
||||
return text[:split_idx] + "\n" + text[split_idx:]
|
||||
|
||||
|
||||
def add_multiple_subtitles(
|
||||
video_path: str,
|
||||
subtitles: List[Dict[str, Any]],
|
||||
output_path: str,
|
||||
default_style: Dict[str, Any] = None
|
||||
) -> str:
|
||||
"""
|
||||
添加多条字幕
|
||||
"""
|
||||
if not subtitles:
|
||||
# 无字幕直接复制
|
||||
import shutil
|
||||
shutil.copy(video_path, output_path)
|
||||
return output_path
|
||||
|
||||
default_style = default_style or {}
|
||||
# 强制使用完整字体(先用项目内 NotoSansSC,如果不存在则回退 Droid)
|
||||
font = "/root/video-flow/assets/fonts/NotoSansSC-Regular.otf"
|
||||
if not (os.path.exists(font) and os.path.getsize(font) > 1024 * 100): # 至少100KB以上认为有效
|
||||
font = "/usr/share/fonts/truetype/droid/DroidSansFallbackFull.ttf"
|
||||
if not (os.path.exists(font) and os.path.getsize(font) > 1024 * 100):
|
||||
font = _get_font_path()
|
||||
|
||||
print(f"[SubDebug] Using font for subtitles: {font}", flush=True)
|
||||
|
||||
# 构建多个 drawtext filter
|
||||
filters = []
|
||||
for sub in subtitles:
|
||||
raw_text = sub.get("text", "")
|
||||
# 打印原始文本的 repr 和 hex,以便排查特殊字符
|
||||
print(f"[SubDebug] Subtitle text repr: {repr(raw_text)}", flush=True)
|
||||
print(f"[SubDebug] Subtitle text hex: {' '.join(hex(ord(c)) for c in raw_text)}", flush=True)
|
||||
|
||||
text = _sanitize_text(raw_text)
|
||||
# 自动换行
|
||||
text = wrap_text(text)
|
||||
|
||||
start = sub.get("start", 0)
|
||||
duration = sub.get("duration", 3)
|
||||
style = {**default_style, **sub.get("style", {})}
|
||||
|
||||
fontsize = style.get("fontsize", 48)
|
||||
fontcolor = style.get("fontcolor", "white")
|
||||
borderw = style.get("borderw", 3)
|
||||
bordercolor = style.get("bordercolor", "black")
|
||||
x = style.get("x", "(w-text_w)/2")
|
||||
y = style.get("y", "h-200")
|
||||
|
||||
# 默认启用背景框以提高可读性
|
||||
box = style.get("box", 1)
|
||||
boxcolor = style.get("boxcolor", "black@0.5")
|
||||
boxborderw = style.get("boxborderw", 10)
|
||||
|
||||
# 转义:反斜杠、单引号、冒号、百分号
|
||||
escaped_text = text.replace("\\", "\\\\").replace("'", "\\'").replace(":", "\\:").replace("%", "\\%")
|
||||
|
||||
drawtext = (
|
||||
f"drawtext=text='{escaped_text}':"
|
||||
f"fontfile='{font}':"
|
||||
f"fontsize={fontsize}:"
|
||||
f"fontcolor={fontcolor}:"
|
||||
f"borderw={borderw}:"
|
||||
f"bordercolor={bordercolor}:"
|
||||
f"box={box}:boxcolor={boxcolor}:boxborderw={boxborderw}:"
|
||||
f"x={x}:y={y}:"
|
||||
f"enable='between(t,{start},{start + duration})'"
|
||||
)
|
||||
filters.append(drawtext)
|
||||
|
||||
# 用逗号连接多个 filter
|
||||
vf = ",".join(filters)
|
||||
|
||||
cmd = [
|
||||
FFMPEG_PATH, "-y",
|
||||
"-i", video_path,
|
||||
"-vf", vf,
|
||||
"-c:v", "libx264",
|
||||
"-preset", "fast",
|
||||
"-crf", "23",
|
||||
"-c:a", "copy",
|
||||
"-pix_fmt", "yuv420p",
|
||||
output_path
|
||||
]
|
||||
|
||||
_run_ffmpeg(cmd)
|
||||
logger.info(f"Added {len(subtitles)} subtitles")
|
||||
return output_path
|
||||
|
||||
|
||||
def overlay_image(
|
||||
video_path: str,
|
||||
image_path: str,
|
||||
output_path: str,
|
||||
position: Tuple[int, int] = None,
|
||||
start: float = 0,
|
||||
duration: float = None,
|
||||
fade_in: float = 0,
|
||||
fade_out: float = 0
|
||||
) -> str:
|
||||
"""
|
||||
叠加透明PNG图片(花字、水印等)到视频
|
||||
|
||||
Args:
|
||||
video_path: 输入视频路径
|
||||
image_path: PNG图片路径(支持透明通道)
|
||||
output_path: 输出路径
|
||||
position: (x, y) 位置,None则居中
|
||||
start: 开始时间(秒)
|
||||
duration: 持续时间(秒),None则到视频结束
|
||||
fade_in: 淡入时间(秒)
|
||||
fade_out: 淡出时间(秒)
|
||||
|
||||
Returns:
|
||||
输出文件路径
|
||||
"""
|
||||
# 获取视频信息
|
||||
info = get_video_info(video_path)
|
||||
video_duration = info["duration"]
|
||||
|
||||
if duration is None:
|
||||
duration = video_duration - start
|
||||
|
||||
# 位置
|
||||
if position:
|
||||
x, y = position
|
||||
pos_str = f"x={x}:y={y}"
|
||||
else:
|
||||
pos_str = "x=(W-w)/2:y=(H-h)/2" # 居中
|
||||
|
||||
# 时间控制
|
||||
enable = f"enable='between(t,{start},{start + duration})'"
|
||||
|
||||
# 构建 overlay filter
|
||||
overlay_filter = f"overlay={pos_str}:{enable}"
|
||||
|
||||
# 添加淡入淡出效果
|
||||
if fade_in > 0 or fade_out > 0:
|
||||
fade_filter = []
|
||||
if fade_in > 0:
|
||||
fade_filter.append(f"fade=t=in:st={start}:d={fade_in}:alpha=1")
|
||||
if fade_out > 0:
|
||||
fade_out_start = start + duration - fade_out
|
||||
fade_filter.append(f"fade=t=out:st={fade_out_start}:d={fade_out}:alpha=1")
|
||||
|
||||
img_filter = ",".join(fade_filter) if fade_filter else ""
|
||||
filter_complex = f"[1:v]{img_filter}[img];[0:v][img]{overlay_filter}[outv]"
|
||||
else:
|
||||
filter_complex = f"[0:v][1:v]{overlay_filter}[outv]"
|
||||
|
||||
cmd = [
|
||||
FFMPEG_PATH, "-y",
|
||||
"-i", video_path,
|
||||
"-i", image_path,
|
||||
"-filter_complex", filter_complex,
|
||||
"-map", "[outv]",
|
||||
"-map", "0:a?",
|
||||
"-c:v", "libx264",
|
||||
"-preset", "fast",
|
||||
"-crf", "23",
|
||||
"-c:a", "copy",
|
||||
"-pix_fmt", "yuv420p",
|
||||
output_path
|
||||
]
|
||||
|
||||
_run_ffmpeg(cmd)
|
||||
logger.info(f"Overlaid image at {position or 'center'}, {start}s-{start+duration}s")
|
||||
return output_path
|
||||
|
||||
|
||||
def overlay_multiple_images(
|
||||
video_path: str,
|
||||
images: List[Dict[str, Any]],
|
||||
output_path: str
|
||||
) -> str:
|
||||
"""
|
||||
叠加多个透明PNG图片
|
||||
|
||||
Args:
|
||||
video_path: 输入视频路径
|
||||
images: 图片配置列表 [{path, x, y, start, duration}]
|
||||
output_path: 输出路径
|
||||
|
||||
Returns:
|
||||
输出文件路径
|
||||
"""
|
||||
if not images:
|
||||
import shutil
|
||||
shutil.copy(video_path, output_path)
|
||||
return output_path
|
||||
|
||||
# 构建复杂 filter_complex
|
||||
inputs = ["-i", video_path]
|
||||
for img in images:
|
||||
inputs.extend(["-i", img["path"]])
|
||||
|
||||
# 链式 overlay
|
||||
filter_parts = []
|
||||
prev_output = "0:v"
|
||||
|
||||
for i, img in enumerate(images):
|
||||
x = img.get("x", "(W-w)/2")
|
||||
y = img.get("y", "(H-h)/2")
|
||||
start = img.get("start", 0)
|
||||
duration = img.get("duration", 999)
|
||||
|
||||
enable = f"enable='between(t,{start},{start + duration})'"
|
||||
|
||||
if i == len(images) - 1:
|
||||
out_label = "outv"
|
||||
else:
|
||||
out_label = f"tmp{i}"
|
||||
|
||||
filter_parts.append(
|
||||
f"[{prev_output}][{i+1}:v]overlay=x={x}:y={y}:{enable}[{out_label}]"
|
||||
)
|
||||
prev_output = out_label
|
||||
|
||||
filter_complex = ";".join(filter_parts)
|
||||
|
||||
cmd = [FFMPEG_PATH, "-y"] + inputs + [
|
||||
"-filter_complex", filter_complex,
|
||||
"-map", "[outv]",
|
||||
"-map", "0:a?",
|
||||
"-c:v", "libx264",
|
||||
"-preset", "fast",
|
||||
"-crf", "23",
|
||||
"-c:a", "copy",
|
||||
"-pix_fmt", "yuv420p",
|
||||
output_path
|
||||
]
|
||||
|
||||
_run_ffmpeg(cmd)
|
||||
logger.info(f"Overlaid {len(images)} images")
|
||||
return output_path
|
||||
|
||||
|
||||
def mix_audio(
|
||||
video_path: str,
|
||||
audio_path: str,
|
||||
output_path: str,
|
||||
audio_volume: float = 1.0,
|
||||
video_volume: float = 0.1,
|
||||
audio_start: float = 0
|
||||
) -> str:
|
||||
"""
|
||||
混合音频到视频(旁白、BGM等)
|
||||
|
||||
Args:
|
||||
video_path: 输入视频路径
|
||||
audio_path: 音频文件路径
|
||||
output_path: 输出路径
|
||||
audio_volume: 新音频音量(0-1)
|
||||
video_volume: 原视频音量(0-1)
|
||||
audio_start: 音频开始时间(秒)
|
||||
|
||||
Returns:
|
||||
输出文件路径
|
||||
"""
|
||||
logger.info(f"Mixing audio: {audio_path}")
|
||||
|
||||
# 检查视频是否有音频轨道
|
||||
info = get_video_info(video_path)
|
||||
video_duration = info["duration"]
|
||||
|
||||
# 构建 filter_complex
|
||||
# adelay 用于延迟音频开始时间(毫秒)
|
||||
delay_ms = int(audio_start * 1000)
|
||||
|
||||
filter_complex = (
|
||||
f"[0:a]volume={video_volume}[va];"
|
||||
f"[1:a]adelay={delay_ms}|{delay_ms},volume={audio_volume}[aa];"
|
||||
f"[va][aa]amix=inputs=2:duration=longest:dropout_transition=0:normalize=0[outa]"
|
||||
)
|
||||
|
||||
cmd = [
|
||||
FFMPEG_PATH, "-y",
|
||||
"-i", video_path,
|
||||
"-i", audio_path,
|
||||
"-filter_complex", filter_complex,
|
||||
"-map", "0:v",
|
||||
"-map", "[outa]",
|
||||
"-c:v", "copy",
|
||||
"-c:a", "aac",
|
||||
"-b:a", "192k",
|
||||
output_path
|
||||
]
|
||||
|
||||
try:
|
||||
_run_ffmpeg(cmd)
|
||||
except subprocess.CalledProcessError:
|
||||
# 如果原视频没有音频,直接添加新音频
|
||||
logger.warning("Video has no audio track, adding audio directly")
|
||||
cmd = [
|
||||
FFMPEG_PATH, "-y",
|
||||
"-i", video_path,
|
||||
"-i", audio_path,
|
||||
"-map", "0:v",
|
||||
"-map", "1:a",
|
||||
"-c:v", "copy",
|
||||
"-c:a", "aac",
|
||||
"-b:a", "192k",
|
||||
output_path
|
||||
]
|
||||
_run_ffmpeg(cmd)
|
||||
|
||||
logger.info(f"Audio mixed: {output_path}")
|
||||
return output_path
|
||||
|
||||
|
||||
def add_bgm(
|
||||
video_path: str,
|
||||
bgm_path: str,
|
||||
output_path: str,
|
||||
bgm_volume: float = 0.06,
|
||||
loop: bool = True,
|
||||
ducking: bool = True,
|
||||
duck_gain_db: float = -6.0,
|
||||
fade_in: float = 1.0,
|
||||
fade_out: float = 1.0
|
||||
) -> str:
|
||||
"""
|
||||
添加背景音乐(自动循环以匹配视频长度)
|
||||
|
||||
Args:
|
||||
video_path: 输入视频路径
|
||||
bgm_path: BGM文件路径
|
||||
output_path: 输出路径
|
||||
bgm_volume: BGM音量
|
||||
loop: 是否循环BGM
|
||||
"""
|
||||
info = get_video_info(video_path)
|
||||
video_duration = info["duration"]
|
||||
|
||||
if loop:
|
||||
bgm_chain = (
|
||||
f"[1:a]aloop=-1:size=2e+09,asetpts=N/SR/TB,"
|
||||
f"atrim=0:{video_duration},"
|
||||
f"afade=t=in:st=0:d={fade_in},"
|
||||
f"afade=t=out:st={max(video_duration - fade_out, 0)}:d={fade_out},"
|
||||
f"volume={bgm_volume}[bgm]"
|
||||
)
|
||||
else:
|
||||
bgm_chain = (
|
||||
f"[1:a]"
|
||||
f"afade=t=in:st=0:d={fade_in},"
|
||||
f"afade=t=out:st={max(video_duration - fade_out, 0)}:d={fade_out},"
|
||||
f"volume={bgm_volume}[bgm]"
|
||||
)
|
||||
|
||||
if ducking:
|
||||
# 使用安全参数的 sidechaincompress,避免 unsupported 参数
|
||||
filter_complex = (
|
||||
f"{bgm_chain};"
|
||||
f"[0:a][bgm]sidechaincompress=threshold=0.1:ratio=4:attack=5:release=250:makeup=1:mix=1:level_in=1:level_sc=1[outa]"
|
||||
)
|
||||
else:
|
||||
filter_complex = f"{bgm_chain};[0:a][bgm]amix=inputs=2:duration=first[outa]"
|
||||
|
||||
cmd = [
|
||||
FFMPEG_PATH, "-y",
|
||||
"-i", video_path,
|
||||
"-stream_loop", "-1" if loop else "0",
|
||||
"-i", bgm_path,
|
||||
"-filter_complex", filter_complex,
|
||||
"-map", "0:v",
|
||||
"-map", "[outa]",
|
||||
"-c:v", "copy",
|
||||
"-c:a", "aac",
|
||||
"-b:a", "192k",
|
||||
"-t", str(video_duration),
|
||||
output_path
|
||||
]
|
||||
|
||||
try:
|
||||
_run_ffmpeg(cmd)
|
||||
except subprocess.CalledProcessError:
|
||||
# sidechain失败时,回退为 amix(保留原有音频 + 低音量BGM)
|
||||
logger.warning("Sidechain failed, fallback to simple amix for BGM")
|
||||
filter_complex = f"{bgm_chain};[0:a][bgm]amix=inputs=2:duration=first[outa]"
|
||||
cmd = [
|
||||
FFMPEG_PATH, "-y",
|
||||
"-i", video_path,
|
||||
"-stream_loop", "-1" if loop else "0",
|
||||
"-i", bgm_path,
|
||||
"-filter_complex", filter_complex,
|
||||
"-map", "0:v",
|
||||
"-map", "[outa]",
|
||||
"-c:v", "copy",
|
||||
"-c:a", "aac",
|
||||
"-b:a", "192k",
|
||||
"-t", str(video_duration),
|
||||
output_path
|
||||
]
|
||||
_run_ffmpeg(cmd)
|
||||
|
||||
logger.info(f"BGM added: {output_path}")
|
||||
return output_path
|
||||
|
||||
|
||||
def trim_video(
|
||||
video_path: str,
|
||||
output_path: str,
|
||||
start: float = 0,
|
||||
duration: float = None,
|
||||
end: float = None
|
||||
) -> str:
|
||||
"""
|
||||
裁剪视频
|
||||
|
||||
Args:
|
||||
video_path: 输入视频路径
|
||||
output_path: 输出路径
|
||||
start: 开始时间(秒)
|
||||
duration: 持续时间(秒)
|
||||
end: 结束时间(秒),与 duration 二选一
|
||||
"""
|
||||
cmd = [
|
||||
FFMPEG_PATH, "-y",
|
||||
"-i", video_path,
|
||||
"-ss", str(start)
|
||||
]
|
||||
|
||||
if duration:
|
||||
cmd.extend(["-t", str(duration)])
|
||||
elif end:
|
||||
cmd.extend(["-to", str(end)])
|
||||
|
||||
cmd.extend([
|
||||
"-c:v", "libx264",
|
||||
"-preset", "fast",
|
||||
"-crf", "23",
|
||||
"-c:a", "copy",
|
||||
output_path
|
||||
])
|
||||
|
||||
_run_ffmpeg(cmd)
|
||||
logger.info(f"Trimmed video: {start}s - {end or start + duration}s")
|
||||
return output_path
|
||||
|
||||
|
||||
def speed_up_video(
|
||||
video_path: str,
|
||||
output_path: str,
|
||||
speed: float = 1.5
|
||||
) -> str:
|
||||
"""
|
||||
加速/减速视频
|
||||
|
||||
Args:
|
||||
video_path: 输入视频路径
|
||||
output_path: 输出路径
|
||||
speed: 速度倍率(>1 加速,<1 减速)
|
||||
"""
|
||||
# setpts 控制视频速度,atempo 控制音频速度
|
||||
video_filter = f"setpts={1/speed}*PTS"
|
||||
|
||||
# atempo 只支持 0.5-2.0,超出需要链式处理
|
||||
if speed > 2.0:
|
||||
audio_filter = "atempo=2.0,atempo=" + str(speed / 2.0)
|
||||
elif speed < 0.5:
|
||||
audio_filter = "atempo=0.5,atempo=" + str(speed / 0.5)
|
||||
else:
|
||||
audio_filter = f"atempo={speed}"
|
||||
|
||||
cmd = [
|
||||
FFMPEG_PATH, "-y",
|
||||
"-i", video_path,
|
||||
"-vf", video_filter,
|
||||
"-af", audio_filter,
|
||||
"-c:v", "libx264",
|
||||
"-preset", "fast",
|
||||
"-crf", "23",
|
||||
"-c:a", "aac",
|
||||
output_path
|
||||
]
|
||||
|
||||
_run_ffmpeg(cmd)
|
||||
logger.info(f"Speed changed to {speed}x: {output_path}")
|
||||
return output_path
|
||||
Reference in New Issue
Block a user