feat: video-flow initial commit

- app.py: Streamlit UI for video generation workflow - main_flow.py: CLI tool with argparse support - modules/: Business logic modules (script_gen, image_gen, video_gen, composer, etc.) - config.py: Configuration with API keys and paths - requirements.txt: Python dependencies - docs/: System prompt documentation
2025-12-12 19:18:27 +08:00
commit 33a165a615
34 changed files with 12012 additions and 0 deletions
--- a/modules/utils.py
+++ b/modules/utils.py
@@ -0,0 +1,177 @@
+"""
+Gloda Video Factory - Utility Functions
+Handles font management, Auto-QC, and helper effects.
+"""
+
+import os
+import logging
+from pathlib import Path
+from typing import Optional, Tuple
+import urllib.request
+import math
+
+import numpy as np
+from PIL import Image
+from moviepy.editor import ImageClip, VideoFileClip, AudioFileClip
+
+import config
+
+logger = logging.getLogger(__name__)
+
+# Google Fonts CDN URL
+ROBOTO_BOLD_URL = "https://github.com/googlefonts/roboto/raw/main/src/hinted/Roboto-Bold.ttf"
+NOTO_SC_BOLD_URL = "https://raw.githubusercontent.com/google/fonts/main/ofl/notosanssc/NotoSansSC-Bold.ttf"
+
+FONT_PATH_EN = config.FONTS_DIR / "Roboto-Bold.ttf"
+FONT_PATH_CN = config.FONTS_DIR / "NotoSansSC-Bold.ttf"
+
+
+def ensure_fonts() -> Path:
+    """Ensure required fonts (EN & CN) are available."""
+    config.FONTS_DIR.mkdir(parents=True, exist_ok=True)
+    
+    # English Font
+    if not FONT_PATH_EN.exists():
+        logger.info(f"Downloading Roboto-Bold font...")
+        try:
+            urllib.request.urlretrieve(ROBOTO_BOLD_URL, FONT_PATH_EN)
+        except Exception as e:
+            logger.error(f"Failed to download EN font: {e}")
+
+    # Chinese Font
+    if not FONT_PATH_CN.exists():
+        logger.info(f"Downloading NotoSansSC-Bold font...")
+        try:
+            # Using a reliable mirror or source if Github raw is flaky, but trying Github first
+            urllib.request.urlretrieve(NOTO_SC_BOLD_URL, FONT_PATH_CN)
+        except Exception as e:
+            logger.error(f"Failed to download CN font: {e}")
+    
+    # Return CN font as default for mixed text
+    if FONT_PATH_CN.exists():
+        return FONT_PATH_CN
+    return FONT_PATH_EN
+
+
+def check_imagemagick() -> bool:
+    """Check if ImageMagick is installed."""
+    import shutil
+    if shutil.which("convert"):
+        return True
+    else:
+        logger.warning("ImageMagick not found. Text overlays may fail.")
+        return False
+
+
+def verify_assets(video_path: str, audio_path: str) -> Tuple[bool, str]:
+    """
+    Auto-QC: Verify generated assets quality.
+    
+    Checks:
+    1. File size sanity check
+    2. Duration matching (+/- 2s tolerance)
+    3. Audio silence check
+    
+    Returns:
+        (Passed: bool, Reason: str)
+    """
+    logger.info(f"Running Auto-QC on:\nVideo: {video_path}\nAudio: {audio_path}")
+    
+    try:
+        # 1. File Size Check
+        vid_size = os.path.getsize(video_path)
+        if vid_size < 50 * 1024:  # < 50KB
+            return False, f"Video file too small ({vid_size/1024:.1f}KB). Likely error/black screen."
+            
+        aud_size = os.path.getsize(audio_path)
+        if aud_size < 5 * 1024:  # < 5KB
+            return False, f"Audio file too small ({aud_size/1024:.1f}KB)."
+
+        # 2. Duration Check
+        try:
+            v_clip = VideoFileClip(video_path)
+            a_clip = AudioFileClip(audio_path)
+            
+            v_dur = v_clip.duration
+            a_dur = a_clip.duration
+            
+            # Check for silence (RMS)
+            # Read first 2 seconds of audio
+            chunk = a_clip.to_soundarray(fps=44100, nbytes=2, buffersize=1000)
+            if chunk is not None:
+                rms = np.sqrt(np.mean(chunk**2))
+                if rms < 0.001:
+                    v_clip.close()
+                    a_clip.close()
+                    return False, "Audio appears to be silent (RMS < 0.001)"
+            
+            v_clip.close()
+            a_clip.close()
+            
+            # Tolerance check
+            if abs(v_dur - a_dur) > 2.0:
+                return False, f"Duration mismatch: Video={v_dur:.1f}s, Audio={a_dur:.1f}s"
+                
+        except Exception as e:
+            return False, f"Media analysis failed: {str(e)}"
+
+        return True, "QC Passed"
+        
+    except Exception as e:
+        logger.error(f"Auto-QC Error: {e}")
+        return False, f"QC System Error: {e}"
+
+
+def apply_ken_burns(
+    image_path: str,
+    duration: float = 5.0,
+    zoom_ratio: float = 1.2,
+    output_path: Optional[str] = None
+) -> str:
+    """Apply Ken Burns effect (slow zoom in) to a static image."""
+    if output_path is None:
+        base_name = Path(image_path).stem
+        output_path = str(config.OUTPUT_DIR / f"{base_name}_ken_burns.mp4")
+    
+    logger.info(f"Applying Ken Burns effect to {image_path}")
+    
+    img = Image.open(image_path)
+    img_width, img_height = img.size
+    target_width = config.VIDEO_SETTINGS["width"]
+    target_height = config.VIDEO_SETTINGS["height"]
+    fps = config.VIDEO_SETTINGS["fps"]
+    
+    scale_w = (target_width * zoom_ratio) / img_width
+    scale_h = (target_height * zoom_ratio) / img_height
+    base_scale = max(scale_w, scale_h)
+    
+    new_width = int(img_width * base_scale)
+    new_height = int(img_height * base_scale)
+    img_resized = img.resize((new_width, new_height), Image.Resampling.LANCZOS)
+    img_array = np.array(img_resized)
+    
+    def make_frame(t):
+        progress = t / duration
+        eased_progress = 0.5 - 0.5 * np.cos(np.pi * progress)
+        current_zoom = 1 + (zoom_ratio - 1) * eased_progress
+        
+        crop_width = int(target_width / current_zoom * (new_width / target_width))
+        crop_height = int(target_height / current_zoom * (new_height / target_height))
+        
+        crop_width = min(crop_width, new_width)
+        crop_height = min(crop_height, new_height)
+        
+        x_start = (new_width - crop_width) // 2
+        y_start = (new_height - crop_height) // 2
+        
+        cropped = img_array[y_start:y_start + crop_height, x_start:x_start + crop_width]
+        cropped_pil = Image.fromarray(cropped)
+        resized = cropped_pil.resize((target_width, target_height), Image.Resampling.LANCZOS)
+        return np.array(resized)
+    
+    clip = ImageClip(make_frame, duration=duration)
+    clip = clip.set_fps(fps)
+    clip.write_videofile(output_path, fps=fps, codec=config.VIDEO_SETTINGS["codec"], audio=False, logger=None)
+    clip.close()
+    
+    return output_path