feat: video-flow initial commit
- app.py: Streamlit UI for video generation workflow - main_flow.py: CLI tool with argparse support - modules/: Business logic modules (script_gen, image_gen, video_gen, composer, etc.) - config.py: Configuration with API keys and paths - requirements.txt: Python dependencies - docs/: System prompt documentation
This commit is contained in:
81
modules/asr.py
Normal file
81
modules/asr.py
Normal file
@@ -0,0 +1,81 @@
|
||||
"""
|
||||
MatchMe Studio - ASR Module (Whisper via ShuBiaoBiao)
|
||||
"""
|
||||
import logging
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
from openai import OpenAI
|
||||
|
||||
import config
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
client = OpenAI(
|
||||
api_key=config.SHUBIAOBIAO_KEY,
|
||||
base_url=config.SHUBIAOBIAO_BASE_URL
|
||||
)
|
||||
|
||||
|
||||
def extract_audio_from_video(video_path: str) -> str:
|
||||
"""Extract audio track from video using ffmpeg."""
|
||||
video_path = Path(video_path)
|
||||
audio_path = config.TEMP_DIR / f"{video_path.stem}_audio.mp3"
|
||||
|
||||
cmd = [
|
||||
"ffmpeg", "-y",
|
||||
"-i", str(video_path),
|
||||
"-vn", # No video
|
||||
"-acodec", "libmp3lame",
|
||||
"-ar", "16000", # 16kHz for Whisper
|
||||
"-ac", "1", # Mono
|
||||
str(audio_path)
|
||||
]
|
||||
|
||||
try:
|
||||
subprocess.run(cmd, check=True, capture_output=True)
|
||||
logger.info(f"Audio extracted to {audio_path}")
|
||||
return str(audio_path)
|
||||
except subprocess.CalledProcessError as e:
|
||||
logger.error(f"FFmpeg error: {e.stderr.decode()}")
|
||||
raise RuntimeError("Failed to extract audio from video")
|
||||
|
||||
|
||||
def transcribe(audio_path: str) -> str:
|
||||
"""Transcribe audio to text using Whisper API."""
|
||||
logger.info(f"Transcribing {audio_path}...")
|
||||
|
||||
try:
|
||||
with open(audio_path, "rb") as audio_file:
|
||||
response = client.audio.transcriptions.create(
|
||||
model="whisper-1",
|
||||
file=audio_file,
|
||||
language="zh", # Chinese
|
||||
response_format="text"
|
||||
)
|
||||
|
||||
text = response if isinstance(response, str) else response.text
|
||||
logger.info(f"Transcription complete: {len(text)} chars")
|
||||
return text
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Whisper API error: {e}")
|
||||
raise
|
||||
|
||||
|
||||
def transcribe_video(video_path: str) -> str:
|
||||
"""Extract audio from video and transcribe."""
|
||||
audio_path = extract_audio_from_video(video_path)
|
||||
return transcribe(audio_path)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user