feat: video-flow initial commit

- app.py: Streamlit UI for video generation workflow - main_flow.py: CLI tool with argparse support - modules/: Business logic modules (script_gen, image_gen, video_gen, composer, etc.) - config.py: Configuration with API keys and paths - requirements.txt: Python dependencies - docs/: System prompt documentation
2025-12-12 19:18:27 +08:00
commit 33a165a615
34 changed files with 12012 additions and 0 deletions
--- a/modules/asr.py
+++ b/modules/asr.py
@@ -0,0 +1,81 @@
+"""
+MatchMe Studio - ASR Module (Whisper via ShuBiaoBiao)
+"""
+import logging
+import subprocess
+from pathlib import Path
+from typing import Optional
+from openai import OpenAI
+
+import config
+
+logger = logging.getLogger(__name__)
+
+client = OpenAI(
+    api_key=config.SHUBIAOBIAO_KEY,
+    base_url=config.SHUBIAOBIAO_BASE_URL
+)
+
+
+def extract_audio_from_video(video_path: str) -> str:
+    """Extract audio track from video using ffmpeg."""
+    video_path = Path(video_path)
+    audio_path = config.TEMP_DIR / f"{video_path.stem}_audio.mp3"
+    
+    cmd = [
+        "ffmpeg", "-y",
+        "-i", str(video_path),
+        "-vn",  # No video
+        "-acodec", "libmp3lame",
+        "-ar", "16000",  # 16kHz for Whisper
+        "-ac", "1",  # Mono
+        str(audio_path)
+    ]
+    
+    try:
+        subprocess.run(cmd, check=True, capture_output=True)
+        logger.info(f"Audio extracted to {audio_path}")
+        return str(audio_path)
+    except subprocess.CalledProcessError as e:
+        logger.error(f"FFmpeg error: {e.stderr.decode()}")
+        raise RuntimeError("Failed to extract audio from video")
+
+
+def transcribe(audio_path: str) -> str:
+    """Transcribe audio to text using Whisper API."""
+    logger.info(f"Transcribing {audio_path}...")
+    
+    try:
+        with open(audio_path, "rb") as audio_file:
+            response = client.audio.transcriptions.create(
+                model="whisper-1",
+                file=audio_file,
+                language="zh",  # Chinese
+                response_format="text"
+            )
+        
+        text = response if isinstance(response, str) else response.text
+        logger.info(f"Transcription complete: {len(text)} chars")
+        return text
+        
+    except Exception as e:
+        logger.error(f"Whisper API error: {e}")
+        raise
+
+
+def transcribe_video(video_path: str) -> str:
+    """Extract audio from video and transcribe."""
+    audio_path = extract_audio_from_video(video_path)
+    return transcribe(audio_path)
+
+
+
+
+
+
+
+
+
+
+
+