Files
video-flow/modules/asr.py
Tony Zhang 33a165a615 feat: video-flow initial commit
- app.py: Streamlit UI for video generation workflow
- main_flow.py: CLI tool with argparse support
- modules/: Business logic modules (script_gen, image_gen, video_gen, composer, etc.)
- config.py: Configuration with API keys and paths
- requirements.txt: Python dependencies
- docs/: System prompt documentation
2025-12-12 19:18:27 +08:00

82 lines
2.0 KiB
Python

"""
MatchMe Studio - ASR Module (Whisper via ShuBiaoBiao)
"""
import logging
import subprocess
from pathlib import Path
from typing import Optional
from openai import OpenAI
import config
logger = logging.getLogger(__name__)
client = OpenAI(
api_key=config.SHUBIAOBIAO_KEY,
base_url=config.SHUBIAOBIAO_BASE_URL
)
def extract_audio_from_video(video_path: str) -> str:
"""Extract audio track from video using ffmpeg."""
video_path = Path(video_path)
audio_path = config.TEMP_DIR / f"{video_path.stem}_audio.mp3"
cmd = [
"ffmpeg", "-y",
"-i", str(video_path),
"-vn", # No video
"-acodec", "libmp3lame",
"-ar", "16000", # 16kHz for Whisper
"-ac", "1", # Mono
str(audio_path)
]
try:
subprocess.run(cmd, check=True, capture_output=True)
logger.info(f"Audio extracted to {audio_path}")
return str(audio_path)
except subprocess.CalledProcessError as e:
logger.error(f"FFmpeg error: {e.stderr.decode()}")
raise RuntimeError("Failed to extract audio from video")
def transcribe(audio_path: str) -> str:
"""Transcribe audio to text using Whisper API."""
logger.info(f"Transcribing {audio_path}...")
try:
with open(audio_path, "rb") as audio_file:
response = client.audio.transcriptions.create(
model="whisper-1",
file=audio_file,
language="zh", # Chinese
response_format="text"
)
text = response if isinstance(response, str) else response.text
logger.info(f"Transcription complete: {len(text)} chars")
return text
except Exception as e:
logger.error(f"Whisper API error: {e}")
raise
def transcribe_video(video_path: str) -> str:
"""Extract audio from video and transcribe."""
audio_path = extract_audio_from_video(video_path)
return transcribe(audio_path)