feat: video-flow initial commit
- app.py: Streamlit UI for video generation workflow - main_flow.py: CLI tool with argparse support - modules/: Business logic modules (script_gen, image_gen, video_gen, composer, etc.) - config.py: Configuration with API keys and paths - requirements.txt: Python dependencies - docs/: System prompt documentation
This commit is contained in:
177
modules/utils.py
Normal file
177
modules/utils.py
Normal file
@@ -0,0 +1,177 @@
|
||||
"""
|
||||
Gloda Video Factory - Utility Functions
|
||||
Handles font management, Auto-QC, and helper effects.
|
||||
"""
|
||||
|
||||
import os
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Optional, Tuple
|
||||
import urllib.request
|
||||
import math
|
||||
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
from moviepy.editor import ImageClip, VideoFileClip, AudioFileClip
|
||||
|
||||
import config
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Google Fonts CDN URL
|
||||
ROBOTO_BOLD_URL = "https://github.com/googlefonts/roboto/raw/main/src/hinted/Roboto-Bold.ttf"
|
||||
NOTO_SC_BOLD_URL = "https://raw.githubusercontent.com/google/fonts/main/ofl/notosanssc/NotoSansSC-Bold.ttf"
|
||||
|
||||
FONT_PATH_EN = config.FONTS_DIR / "Roboto-Bold.ttf"
|
||||
FONT_PATH_CN = config.FONTS_DIR / "NotoSansSC-Bold.ttf"
|
||||
|
||||
|
||||
def ensure_fonts() -> Path:
|
||||
"""Ensure required fonts (EN & CN) are available."""
|
||||
config.FONTS_DIR.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# English Font
|
||||
if not FONT_PATH_EN.exists():
|
||||
logger.info(f"Downloading Roboto-Bold font...")
|
||||
try:
|
||||
urllib.request.urlretrieve(ROBOTO_BOLD_URL, FONT_PATH_EN)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to download EN font: {e}")
|
||||
|
||||
# Chinese Font
|
||||
if not FONT_PATH_CN.exists():
|
||||
logger.info(f"Downloading NotoSansSC-Bold font...")
|
||||
try:
|
||||
# Using a reliable mirror or source if Github raw is flaky, but trying Github first
|
||||
urllib.request.urlretrieve(NOTO_SC_BOLD_URL, FONT_PATH_CN)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to download CN font: {e}")
|
||||
|
||||
# Return CN font as default for mixed text
|
||||
if FONT_PATH_CN.exists():
|
||||
return FONT_PATH_CN
|
||||
return FONT_PATH_EN
|
||||
|
||||
|
||||
def check_imagemagick() -> bool:
|
||||
"""Check if ImageMagick is installed."""
|
||||
import shutil
|
||||
if shutil.which("convert"):
|
||||
return True
|
||||
else:
|
||||
logger.warning("ImageMagick not found. Text overlays may fail.")
|
||||
return False
|
||||
|
||||
|
||||
def verify_assets(video_path: str, audio_path: str) -> Tuple[bool, str]:
|
||||
"""
|
||||
Auto-QC: Verify generated assets quality.
|
||||
|
||||
Checks:
|
||||
1. File size sanity check
|
||||
2. Duration matching (+/- 2s tolerance)
|
||||
3. Audio silence check
|
||||
|
||||
Returns:
|
||||
(Passed: bool, Reason: str)
|
||||
"""
|
||||
logger.info(f"Running Auto-QC on:\nVideo: {video_path}\nAudio: {audio_path}")
|
||||
|
||||
try:
|
||||
# 1. File Size Check
|
||||
vid_size = os.path.getsize(video_path)
|
||||
if vid_size < 50 * 1024: # < 50KB
|
||||
return False, f"Video file too small ({vid_size/1024:.1f}KB). Likely error/black screen."
|
||||
|
||||
aud_size = os.path.getsize(audio_path)
|
||||
if aud_size < 5 * 1024: # < 5KB
|
||||
return False, f"Audio file too small ({aud_size/1024:.1f}KB)."
|
||||
|
||||
# 2. Duration Check
|
||||
try:
|
||||
v_clip = VideoFileClip(video_path)
|
||||
a_clip = AudioFileClip(audio_path)
|
||||
|
||||
v_dur = v_clip.duration
|
||||
a_dur = a_clip.duration
|
||||
|
||||
# Check for silence (RMS)
|
||||
# Read first 2 seconds of audio
|
||||
chunk = a_clip.to_soundarray(fps=44100, nbytes=2, buffersize=1000)
|
||||
if chunk is not None:
|
||||
rms = np.sqrt(np.mean(chunk**2))
|
||||
if rms < 0.001:
|
||||
v_clip.close()
|
||||
a_clip.close()
|
||||
return False, "Audio appears to be silent (RMS < 0.001)"
|
||||
|
||||
v_clip.close()
|
||||
a_clip.close()
|
||||
|
||||
# Tolerance check
|
||||
if abs(v_dur - a_dur) > 2.0:
|
||||
return False, f"Duration mismatch: Video={v_dur:.1f}s, Audio={a_dur:.1f}s"
|
||||
|
||||
except Exception as e:
|
||||
return False, f"Media analysis failed: {str(e)}"
|
||||
|
||||
return True, "QC Passed"
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Auto-QC Error: {e}")
|
||||
return False, f"QC System Error: {e}"
|
||||
|
||||
|
||||
def apply_ken_burns(
|
||||
image_path: str,
|
||||
duration: float = 5.0,
|
||||
zoom_ratio: float = 1.2,
|
||||
output_path: Optional[str] = None
|
||||
) -> str:
|
||||
"""Apply Ken Burns effect (slow zoom in) to a static image."""
|
||||
if output_path is None:
|
||||
base_name = Path(image_path).stem
|
||||
output_path = str(config.OUTPUT_DIR / f"{base_name}_ken_burns.mp4")
|
||||
|
||||
logger.info(f"Applying Ken Burns effect to {image_path}")
|
||||
|
||||
img = Image.open(image_path)
|
||||
img_width, img_height = img.size
|
||||
target_width = config.VIDEO_SETTINGS["width"]
|
||||
target_height = config.VIDEO_SETTINGS["height"]
|
||||
fps = config.VIDEO_SETTINGS["fps"]
|
||||
|
||||
scale_w = (target_width * zoom_ratio) / img_width
|
||||
scale_h = (target_height * zoom_ratio) / img_height
|
||||
base_scale = max(scale_w, scale_h)
|
||||
|
||||
new_width = int(img_width * base_scale)
|
||||
new_height = int(img_height * base_scale)
|
||||
img_resized = img.resize((new_width, new_height), Image.Resampling.LANCZOS)
|
||||
img_array = np.array(img_resized)
|
||||
|
||||
def make_frame(t):
|
||||
progress = t / duration
|
||||
eased_progress = 0.5 - 0.5 * np.cos(np.pi * progress)
|
||||
current_zoom = 1 + (zoom_ratio - 1) * eased_progress
|
||||
|
||||
crop_width = int(target_width / current_zoom * (new_width / target_width))
|
||||
crop_height = int(target_height / current_zoom * (new_height / target_height))
|
||||
|
||||
crop_width = min(crop_width, new_width)
|
||||
crop_height = min(crop_height, new_height)
|
||||
|
||||
x_start = (new_width - crop_width) // 2
|
||||
y_start = (new_height - crop_height) // 2
|
||||
|
||||
cropped = img_array[y_start:y_start + crop_height, x_start:x_start + crop_width]
|
||||
cropped_pil = Image.fromarray(cropped)
|
||||
resized = cropped_pil.resize((target_width, target_height), Image.Resampling.LANCZOS)
|
||||
return np.array(resized)
|
||||
|
||||
clip = ImageClip(make_frame, duration=duration)
|
||||
clip = clip.set_fps(fps)
|
||||
clip.write_videofile(output_path, fps=fps, codec=config.VIDEO_SETTINGS["codec"], audio=False, logger=None)
|
||||
clip.close()
|
||||
|
||||
return output_path
|
||||
Reference in New Issue
Block a user