Files
video-flow/modules/utils.py
Tony Zhang 33a165a615 feat: video-flow initial commit
- app.py: Streamlit UI for video generation workflow
- main_flow.py: CLI tool with argparse support
- modules/: Business logic modules (script_gen, image_gen, video_gen, composer, etc.)
- config.py: Configuration with API keys and paths
- requirements.txt: Python dependencies
- docs/: System prompt documentation
2025-12-12 19:18:27 +08:00

178 lines
5.9 KiB
Python

"""
Gloda Video Factory - Utility Functions
Handles font management, Auto-QC, and helper effects.
"""
import os
import logging
from pathlib import Path
from typing import Optional, Tuple
import urllib.request
import math
import numpy as np
from PIL import Image
from moviepy.editor import ImageClip, VideoFileClip, AudioFileClip
import config
logger = logging.getLogger(__name__)
# Google Fonts CDN URL
ROBOTO_BOLD_URL = "https://github.com/googlefonts/roboto/raw/main/src/hinted/Roboto-Bold.ttf"
NOTO_SC_BOLD_URL = "https://raw.githubusercontent.com/google/fonts/main/ofl/notosanssc/NotoSansSC-Bold.ttf"
FONT_PATH_EN = config.FONTS_DIR / "Roboto-Bold.ttf"
FONT_PATH_CN = config.FONTS_DIR / "NotoSansSC-Bold.ttf"
def ensure_fonts() -> Path:
"""Ensure required fonts (EN & CN) are available."""
config.FONTS_DIR.mkdir(parents=True, exist_ok=True)
# English Font
if not FONT_PATH_EN.exists():
logger.info(f"Downloading Roboto-Bold font...")
try:
urllib.request.urlretrieve(ROBOTO_BOLD_URL, FONT_PATH_EN)
except Exception as e:
logger.error(f"Failed to download EN font: {e}")
# Chinese Font
if not FONT_PATH_CN.exists():
logger.info(f"Downloading NotoSansSC-Bold font...")
try:
# Using a reliable mirror or source if Github raw is flaky, but trying Github first
urllib.request.urlretrieve(NOTO_SC_BOLD_URL, FONT_PATH_CN)
except Exception as e:
logger.error(f"Failed to download CN font: {e}")
# Return CN font as default for mixed text
if FONT_PATH_CN.exists():
return FONT_PATH_CN
return FONT_PATH_EN
def check_imagemagick() -> bool:
"""Check if ImageMagick is installed."""
import shutil
if shutil.which("convert"):
return True
else:
logger.warning("ImageMagick not found. Text overlays may fail.")
return False
def verify_assets(video_path: str, audio_path: str) -> Tuple[bool, str]:
"""
Auto-QC: Verify generated assets quality.
Checks:
1. File size sanity check
2. Duration matching (+/- 2s tolerance)
3. Audio silence check
Returns:
(Passed: bool, Reason: str)
"""
logger.info(f"Running Auto-QC on:\nVideo: {video_path}\nAudio: {audio_path}")
try:
# 1. File Size Check
vid_size = os.path.getsize(video_path)
if vid_size < 50 * 1024: # < 50KB
return False, f"Video file too small ({vid_size/1024:.1f}KB). Likely error/black screen."
aud_size = os.path.getsize(audio_path)
if aud_size < 5 * 1024: # < 5KB
return False, f"Audio file too small ({aud_size/1024:.1f}KB)."
# 2. Duration Check
try:
v_clip = VideoFileClip(video_path)
a_clip = AudioFileClip(audio_path)
v_dur = v_clip.duration
a_dur = a_clip.duration
# Check for silence (RMS)
# Read first 2 seconds of audio
chunk = a_clip.to_soundarray(fps=44100, nbytes=2, buffersize=1000)
if chunk is not None:
rms = np.sqrt(np.mean(chunk**2))
if rms < 0.001:
v_clip.close()
a_clip.close()
return False, "Audio appears to be silent (RMS < 0.001)"
v_clip.close()
a_clip.close()
# Tolerance check
if abs(v_dur - a_dur) > 2.0:
return False, f"Duration mismatch: Video={v_dur:.1f}s, Audio={a_dur:.1f}s"
except Exception as e:
return False, f"Media analysis failed: {str(e)}"
return True, "QC Passed"
except Exception as e:
logger.error(f"Auto-QC Error: {e}")
return False, f"QC System Error: {e}"
def apply_ken_burns(
image_path: str,
duration: float = 5.0,
zoom_ratio: float = 1.2,
output_path: Optional[str] = None
) -> str:
"""Apply Ken Burns effect (slow zoom in) to a static image."""
if output_path is None:
base_name = Path(image_path).stem
output_path = str(config.OUTPUT_DIR / f"{base_name}_ken_burns.mp4")
logger.info(f"Applying Ken Burns effect to {image_path}")
img = Image.open(image_path)
img_width, img_height = img.size
target_width = config.VIDEO_SETTINGS["width"]
target_height = config.VIDEO_SETTINGS["height"]
fps = config.VIDEO_SETTINGS["fps"]
scale_w = (target_width * zoom_ratio) / img_width
scale_h = (target_height * zoom_ratio) / img_height
base_scale = max(scale_w, scale_h)
new_width = int(img_width * base_scale)
new_height = int(img_height * base_scale)
img_resized = img.resize((new_width, new_height), Image.Resampling.LANCZOS)
img_array = np.array(img_resized)
def make_frame(t):
progress = t / duration
eased_progress = 0.5 - 0.5 * np.cos(np.pi * progress)
current_zoom = 1 + (zoom_ratio - 1) * eased_progress
crop_width = int(target_width / current_zoom * (new_width / target_width))
crop_height = int(target_height / current_zoom * (new_height / target_height))
crop_width = min(crop_width, new_width)
crop_height = min(crop_height, new_height)
x_start = (new_width - crop_width) // 2
y_start = (new_height - crop_height) // 2
cropped = img_array[y_start:y_start + crop_height, x_start:x_start + crop_width]
cropped_pil = Image.fromarray(cropped)
resized = cropped_pil.resize((target_width, target_height), Image.Resampling.LANCZOS)
return np.array(resized)
clip = ImageClip(make_frame, duration=duration)
clip = clip.set_fps(fps)
clip.write_videofile(output_path, fps=fps, codec=config.VIDEO_SETTINGS["codec"], audio=False, logger=None)
clip.close()
return output_path