- app.py: Streamlit UI for video generation workflow - main_flow.py: CLI tool with argparse support - modules/: Business logic modules (script_gen, image_gen, video_gen, composer, etc.) - config.py: Configuration with API keys and paths - requirements.txt: Python dependencies - docs/: System prompt documentation
82 lines
2.0 KiB
Python
82 lines
2.0 KiB
Python
"""
|
|
MatchMe Studio - ASR Module (Whisper via ShuBiaoBiao)
|
|
"""
|
|
import logging
|
|
import subprocess
|
|
from pathlib import Path
|
|
from typing import Optional
|
|
from openai import OpenAI
|
|
|
|
import config
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
client = OpenAI(
|
|
api_key=config.SHUBIAOBIAO_KEY,
|
|
base_url=config.SHUBIAOBIAO_BASE_URL
|
|
)
|
|
|
|
|
|
def extract_audio_from_video(video_path: str) -> str:
|
|
"""Extract audio track from video using ffmpeg."""
|
|
video_path = Path(video_path)
|
|
audio_path = config.TEMP_DIR / f"{video_path.stem}_audio.mp3"
|
|
|
|
cmd = [
|
|
"ffmpeg", "-y",
|
|
"-i", str(video_path),
|
|
"-vn", # No video
|
|
"-acodec", "libmp3lame",
|
|
"-ar", "16000", # 16kHz for Whisper
|
|
"-ac", "1", # Mono
|
|
str(audio_path)
|
|
]
|
|
|
|
try:
|
|
subprocess.run(cmd, check=True, capture_output=True)
|
|
logger.info(f"Audio extracted to {audio_path}")
|
|
return str(audio_path)
|
|
except subprocess.CalledProcessError as e:
|
|
logger.error(f"FFmpeg error: {e.stderr.decode()}")
|
|
raise RuntimeError("Failed to extract audio from video")
|
|
|
|
|
|
def transcribe(audio_path: str) -> str:
|
|
"""Transcribe audio to text using Whisper API."""
|
|
logger.info(f"Transcribing {audio_path}...")
|
|
|
|
try:
|
|
with open(audio_path, "rb") as audio_file:
|
|
response = client.audio.transcriptions.create(
|
|
model="whisper-1",
|
|
file=audio_file,
|
|
language="zh", # Chinese
|
|
response_format="text"
|
|
)
|
|
|
|
text = response if isinstance(response, str) else response.text
|
|
logger.info(f"Transcription complete: {len(text)} chars")
|
|
return text
|
|
|
|
except Exception as e:
|
|
logger.error(f"Whisper API error: {e}")
|
|
raise
|
|
|
|
|
|
def transcribe_video(video_path: str) -> str:
|
|
"""Extract audio from video and transcribe."""
|
|
audio_path = extract_audio_from_video(video_path)
|
|
return transcribe(audio_path)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|