video-flow/modules/utils.py

"""
Gloda Video Factory - Utility Functions
Handles font management, Auto-QC, and helper effects.
"""

import os
import logging
from pathlib import Path
from typing import Optional, Tuple
import urllib.request
import math

import numpy as np
from PIL import Image
from moviepy.editor import ImageClip, VideoFileClip, AudioFileClip

import config

logger = logging.getLogger(__name__)

# Google Fonts CDN URL
ROBOTO_BOLD_URL = "https://github.com/googlefonts/roboto/raw/main/src/hinted/Roboto-Bold.ttf"
NOTO_SC_BOLD_URL = "https://raw.githubusercontent.com/google/fonts/main/ofl/notosanssc/NotoSansSC-Bold.ttf"

FONT_PATH_EN = config.FONTS_DIR / "Roboto-Bold.ttf"
FONT_PATH_CN = config.FONTS_DIR / "NotoSansSC-Bold.ttf"


def ensure_fonts() -> Path:
    """Ensure required fonts (EN & CN) are available."""
    config.FONTS_DIR.mkdir(parents=True, exist_ok=True)

    # English Font
    if not FONT_PATH_EN.exists():
        logger.info(f"Downloading Roboto-Bold font...")
        try:
            urllib.request.urlretrieve(ROBOTO_BOLD_URL, FONT_PATH_EN)
        except Exception as e:
            logger.error(f"Failed to download EN font: {e}")

    # Chinese Font
    if not FONT_PATH_CN.exists():
        logger.info(f"Downloading NotoSansSC-Bold font...")
        try:
            # Using a reliable mirror or source if Github raw is flaky, but trying Github first
            urllib.request.urlretrieve(NOTO_SC_BOLD_URL, FONT_PATH_CN)
        except Exception as e:
            logger.error(f"Failed to download CN font: {e}")

    # Return CN font as default for mixed text
    if FONT_PATH_CN.exists():
        return FONT_PATH_CN
    return FONT_PATH_EN


def check_imagemagick() -> bool:
    """Check if ImageMagick is installed."""
    import shutil
    if shutil.which("convert"):
        return True
    else:
        logger.warning("ImageMagick not found. Text overlays may fail.")
        return False


def verify_assets(video_path: str, audio_path: str) -> Tuple[bool, str]:
    """
    Auto-QC: Verify generated assets quality.

    Checks:
    1. File size sanity check
    2. Duration matching (+/- 2s tolerance)
    3. Audio silence check

    Returns:
        (Passed: bool, Reason: str)
    """
    logger.info(f"Running Auto-QC on:\nVideo: {video_path}\nAudio: {audio_path}")

    try:
        # 1. File Size Check
        vid_size = os.path.getsize(video_path)
        if vid_size < 50 * 1024:  # < 50KB
            return False, f"Video file too small ({vid_size/1024:.1f}KB). Likely error/black screen."

        aud_size = os.path.getsize(audio_path)
        if aud_size < 5 * 1024:  # < 5KB
            return False, f"Audio file too small ({aud_size/1024:.1f}KB)."

        # 2. Duration Check
        try:
            v_clip = VideoFileClip(video_path)
            a_clip = AudioFileClip(audio_path)

            v_dur = v_clip.duration
            a_dur = a_clip.duration

            # Check for silence (RMS)
            # Read first 2 seconds of audio
            chunk = a_clip.to_soundarray(fps=44100, nbytes=2, buffersize=1000)
            if chunk is not None:
                rms = np.sqrt(np.mean(chunk**2))
                if rms < 0.001:
                    v_clip.close()
                    a_clip.close()
                    return False, "Audio appears to be silent (RMS < 0.001)"

            v_clip.close()
            a_clip.close()

            # Tolerance check
            if abs(v_dur - a_dur) > 2.0:
                return False, f"Duration mismatch: Video={v_dur:.1f}s, Audio={a_dur:.1f}s"

        except Exception as e:
            return False, f"Media analysis failed: {str(e)}"

        return True, "QC Passed"

    except Exception as e:
        logger.error(f"Auto-QC Error: {e}")
        return False, f"QC System Error: {e}"


def apply_ken_burns(
    image_path: str,
    duration: float = 5.0,
    zoom_ratio: float = 1.2,
    output_path: Optional[str] = None
) -> str:
    """Apply Ken Burns effect (slow zoom in) to a static image."""
    if output_path is None:
        base_name = Path(image_path).stem
        output_path = str(config.OUTPUT_DIR / f"{base_name}_ken_burns.mp4")

    logger.info(f"Applying Ken Burns effect to {image_path}")

    img = Image.open(image_path)
    img_width, img_height = img.size
    target_width = config.VIDEO_SETTINGS["width"]
    target_height = config.VIDEO_SETTINGS["height"]
    fps = config.VIDEO_SETTINGS["fps"]

    scale_w = (target_width * zoom_ratio) / img_width
    scale_h = (target_height * zoom_ratio) / img_height
    base_scale = max(scale_w, scale_h)

    new_width = int(img_width * base_scale)
    new_height = int(img_height * base_scale)
    img_resized = img.resize((new_width, new_height), Image.Resampling.LANCZOS)
    img_array = np.array(img_resized)

    def make_frame(t):
        progress = t / duration
        eased_progress = 0.5 - 0.5 * np.cos(np.pi * progress)
        current_zoom = 1 + (zoom_ratio - 1) * eased_progress

        crop_width = int(target_width / current_zoom * (new_width / target_width))
        crop_height = int(target_height / current_zoom * (new_height / target_height))

        crop_width = min(crop_width, new_width)
        crop_height = min(crop_height, new_height)

        x_start = (new_width - crop_width) // 2
        y_start = (new_height - crop_height) // 2

        cropped = img_array[y_start:y_start + crop_height, x_start:x_start + crop_width]
        cropped_pil = Image.fromarray(cropped)
        resized = cropped_pil.resize((target_width, target_height), Image.Resampling.LANCZOS)
        return np.array(resized)

    clip = ImageClip(make_frame, duration=duration)
    clip = clip.set_fps(fps)
    clip.write_videofile(output_path, fps=fps, codec=config.VIDEO_SETTINGS["codec"], audio=False, logger=None)
    clip.close()

    return output_path