""" MatchMe Studio - ASR Module (Whisper via ShuBiaoBiao) """ import logging import subprocess from pathlib import Path from typing import Optional from openai import OpenAI import config logger = logging.getLogger(__name__) client = OpenAI( api_key=config.SHUBIAOBIAO_KEY, base_url=config.SHUBIAOBIAO_BASE_URL ) def extract_audio_from_video(video_path: str) -> str: """Extract audio track from video using ffmpeg.""" video_path = Path(video_path) audio_path = config.TEMP_DIR / f"{video_path.stem}_audio.mp3" cmd = [ "ffmpeg", "-y", "-i", str(video_path), "-vn", # No video "-acodec", "libmp3lame", "-ar", "16000", # 16kHz for Whisper "-ac", "1", # Mono str(audio_path) ] try: subprocess.run(cmd, check=True, capture_output=True) logger.info(f"Audio extracted to {audio_path}") return str(audio_path) except subprocess.CalledProcessError as e: logger.error(f"FFmpeg error: {e.stderr.decode()}") raise RuntimeError("Failed to extract audio from video") def transcribe(audio_path: str) -> str: """Transcribe audio to text using Whisper API.""" logger.info(f"Transcribing {audio_path}...") try: with open(audio_path, "rb") as audio_file: response = client.audio.transcriptions.create( model="whisper-1", file=audio_file, language="zh", # Chinese response_format="text" ) text = response if isinstance(response, str) else response.text logger.info(f"Transcription complete: {len(text)} chars") return text except Exception as e: logger.error(f"Whisper API error: {e}") raise def transcribe_video(video_path: str) -> str: """Extract audio from video and transcribe.""" audio_path = extract_audio_from_video(video_path) return transcribe(audio_path)