- app.py: Streamlit UI for video generation workflow - main_flow.py: CLI tool with argparse support - modules/: Business logic modules (script_gen, image_gen, video_gen, composer, etc.) - config.py: Configuration with API keys and paths - requirements.txt: Python dependencies - docs/: System prompt documentation
178 lines
5.9 KiB
Python
178 lines
5.9 KiB
Python
"""
|
|
Gloda Video Factory - Utility Functions
|
|
Handles font management, Auto-QC, and helper effects.
|
|
"""
|
|
|
|
import os
|
|
import logging
|
|
from pathlib import Path
|
|
from typing import Optional, Tuple
|
|
import urllib.request
|
|
import math
|
|
|
|
import numpy as np
|
|
from PIL import Image
|
|
from moviepy.editor import ImageClip, VideoFileClip, AudioFileClip
|
|
|
|
import config
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Google Fonts CDN URL
|
|
ROBOTO_BOLD_URL = "https://github.com/googlefonts/roboto/raw/main/src/hinted/Roboto-Bold.ttf"
|
|
NOTO_SC_BOLD_URL = "https://raw.githubusercontent.com/google/fonts/main/ofl/notosanssc/NotoSansSC-Bold.ttf"
|
|
|
|
FONT_PATH_EN = config.FONTS_DIR / "Roboto-Bold.ttf"
|
|
FONT_PATH_CN = config.FONTS_DIR / "NotoSansSC-Bold.ttf"
|
|
|
|
|
|
def ensure_fonts() -> Path:
|
|
"""Ensure required fonts (EN & CN) are available."""
|
|
config.FONTS_DIR.mkdir(parents=True, exist_ok=True)
|
|
|
|
# English Font
|
|
if not FONT_PATH_EN.exists():
|
|
logger.info(f"Downloading Roboto-Bold font...")
|
|
try:
|
|
urllib.request.urlretrieve(ROBOTO_BOLD_URL, FONT_PATH_EN)
|
|
except Exception as e:
|
|
logger.error(f"Failed to download EN font: {e}")
|
|
|
|
# Chinese Font
|
|
if not FONT_PATH_CN.exists():
|
|
logger.info(f"Downloading NotoSansSC-Bold font...")
|
|
try:
|
|
# Using a reliable mirror or source if Github raw is flaky, but trying Github first
|
|
urllib.request.urlretrieve(NOTO_SC_BOLD_URL, FONT_PATH_CN)
|
|
except Exception as e:
|
|
logger.error(f"Failed to download CN font: {e}")
|
|
|
|
# Return CN font as default for mixed text
|
|
if FONT_PATH_CN.exists():
|
|
return FONT_PATH_CN
|
|
return FONT_PATH_EN
|
|
|
|
|
|
def check_imagemagick() -> bool:
|
|
"""Check if ImageMagick is installed."""
|
|
import shutil
|
|
if shutil.which("convert"):
|
|
return True
|
|
else:
|
|
logger.warning("ImageMagick not found. Text overlays may fail.")
|
|
return False
|
|
|
|
|
|
def verify_assets(video_path: str, audio_path: str) -> Tuple[bool, str]:
|
|
"""
|
|
Auto-QC: Verify generated assets quality.
|
|
|
|
Checks:
|
|
1. File size sanity check
|
|
2. Duration matching (+/- 2s tolerance)
|
|
3. Audio silence check
|
|
|
|
Returns:
|
|
(Passed: bool, Reason: str)
|
|
"""
|
|
logger.info(f"Running Auto-QC on:\nVideo: {video_path}\nAudio: {audio_path}")
|
|
|
|
try:
|
|
# 1. File Size Check
|
|
vid_size = os.path.getsize(video_path)
|
|
if vid_size < 50 * 1024: # < 50KB
|
|
return False, f"Video file too small ({vid_size/1024:.1f}KB). Likely error/black screen."
|
|
|
|
aud_size = os.path.getsize(audio_path)
|
|
if aud_size < 5 * 1024: # < 5KB
|
|
return False, f"Audio file too small ({aud_size/1024:.1f}KB)."
|
|
|
|
# 2. Duration Check
|
|
try:
|
|
v_clip = VideoFileClip(video_path)
|
|
a_clip = AudioFileClip(audio_path)
|
|
|
|
v_dur = v_clip.duration
|
|
a_dur = a_clip.duration
|
|
|
|
# Check for silence (RMS)
|
|
# Read first 2 seconds of audio
|
|
chunk = a_clip.to_soundarray(fps=44100, nbytes=2, buffersize=1000)
|
|
if chunk is not None:
|
|
rms = np.sqrt(np.mean(chunk**2))
|
|
if rms < 0.001:
|
|
v_clip.close()
|
|
a_clip.close()
|
|
return False, "Audio appears to be silent (RMS < 0.001)"
|
|
|
|
v_clip.close()
|
|
a_clip.close()
|
|
|
|
# Tolerance check
|
|
if abs(v_dur - a_dur) > 2.0:
|
|
return False, f"Duration mismatch: Video={v_dur:.1f}s, Audio={a_dur:.1f}s"
|
|
|
|
except Exception as e:
|
|
return False, f"Media analysis failed: {str(e)}"
|
|
|
|
return True, "QC Passed"
|
|
|
|
except Exception as e:
|
|
logger.error(f"Auto-QC Error: {e}")
|
|
return False, f"QC System Error: {e}"
|
|
|
|
|
|
def apply_ken_burns(
|
|
image_path: str,
|
|
duration: float = 5.0,
|
|
zoom_ratio: float = 1.2,
|
|
output_path: Optional[str] = None
|
|
) -> str:
|
|
"""Apply Ken Burns effect (slow zoom in) to a static image."""
|
|
if output_path is None:
|
|
base_name = Path(image_path).stem
|
|
output_path = str(config.OUTPUT_DIR / f"{base_name}_ken_burns.mp4")
|
|
|
|
logger.info(f"Applying Ken Burns effect to {image_path}")
|
|
|
|
img = Image.open(image_path)
|
|
img_width, img_height = img.size
|
|
target_width = config.VIDEO_SETTINGS["width"]
|
|
target_height = config.VIDEO_SETTINGS["height"]
|
|
fps = config.VIDEO_SETTINGS["fps"]
|
|
|
|
scale_w = (target_width * zoom_ratio) / img_width
|
|
scale_h = (target_height * zoom_ratio) / img_height
|
|
base_scale = max(scale_w, scale_h)
|
|
|
|
new_width = int(img_width * base_scale)
|
|
new_height = int(img_height * base_scale)
|
|
img_resized = img.resize((new_width, new_height), Image.Resampling.LANCZOS)
|
|
img_array = np.array(img_resized)
|
|
|
|
def make_frame(t):
|
|
progress = t / duration
|
|
eased_progress = 0.5 - 0.5 * np.cos(np.pi * progress)
|
|
current_zoom = 1 + (zoom_ratio - 1) * eased_progress
|
|
|
|
crop_width = int(target_width / current_zoom * (new_width / target_width))
|
|
crop_height = int(target_height / current_zoom * (new_height / target_height))
|
|
|
|
crop_width = min(crop_width, new_width)
|
|
crop_height = min(crop_height, new_height)
|
|
|
|
x_start = (new_width - crop_width) // 2
|
|
y_start = (new_height - crop_height) // 2
|
|
|
|
cropped = img_array[y_start:y_start + crop_height, x_start:x_start + crop_width]
|
|
cropped_pil = Image.fromarray(cropped)
|
|
resized = cropped_pil.resize((target_width, target_height), Image.Resampling.LANCZOS)
|
|
return np.array(resized)
|
|
|
|
clip = ImageClip(make_frame, duration=duration)
|
|
clip = clip.set_fps(fps)
|
|
clip.write_videofile(output_path, fps=fps, codec=config.VIDEO_SETTINGS["codec"], audio=False, logger=None)
|
|
clip.close()
|
|
|
|
return output_path
|