feat: video-flow initial commit

- app.py: Streamlit UI for video generation workflow
- main_flow.py: CLI tool with argparse support
- modules/: Business logic modules (script_gen, image_gen, video_gen, composer, etc.)
- config.py: Configuration with API keys and paths
- requirements.txt: Python dependencies
- docs/: System prompt documentation
This commit is contained in:
Tony Zhang
2025-12-12 19:18:27 +08:00
commit 33a165a615
34 changed files with 12012 additions and 0 deletions

157
modules/export_utils.py Normal file
View File

@@ -0,0 +1,157 @@
import os
import zipfile
import logging
import shutil
import math
from pathlib import Path
from typing import List, Dict, Any
import config
logger = logging.getLogger(__name__)
def format_timestamp(seconds: float) -> str:
"""Convert seconds to SRT timestamp format (HH:MM:SS,mmm)"""
hours = int(seconds // 3600)
minutes = int((seconds % 3600) // 60)
secs = int(seconds % 60)
millis = int((seconds - int(seconds)) * 1000)
return f"{hours:02d}:{minutes:02d}:{secs:02d},{millis:03d}"
def generate_srt(script_data: Dict[str, Any], video_map: Dict[int, str]) -> str:
"""Generate SRT content from script data"""
scenes = script_data.get("scenes", [])
srt_content = ""
current_time = 0.0
# Need to get durations from actual videos if possible, else estimate
from modules import ffmpeg_utils
for i, scene in enumerate(scenes):
scene_id = scene["id"]
# Get duration
duration = 5.0
if scene_id in video_map and os.path.exists(video_map[scene_id]):
try:
info = ffmpeg_utils.get_video_info(video_map[scene_id])
duration = info.get("duration", 5.0)
except:
pass
start_time = current_time
end_time = current_time + duration
current_time = end_time
text = scene.get("subtitle", "")
if text:
srt_content += f"{i+1}\n"
srt_content += f"{format_timestamp(start_time)} --> {format_timestamp(end_time)}\n"
srt_content += f"{text}\n\n"
return srt_content
def create_capcut_package(project_id: str, script_data: Dict[str, Any], assets: Dict[str, str]) -> str:
"""
Create a ZIP package for CapCut (JianYing) import
Contains:
- videos/ (scene videos)
- audios/ (voiceover, bgm)
- images/ (fancy text transparent pngs)
- subtitles.srt
"""
package_dir = config.TEMP_DIR / f"capcut_pkg_{project_id}_{int(os.getpid())}"
if package_dir.exists():
shutil.rmtree(package_dir)
package_dir.mkdir()
(package_dir / "videos").mkdir()
(package_dir / "audios").mkdir()
(package_dir / "images").mkdir()
# 1. Generate SRT
# Need to reconstruct video map from assets or script
# Assuming 'assets' contains 'scene_videos' map
scene_videos = assets.get("scene_videos", {})
srt_content = generate_srt(script_data, scene_videos)
with open(package_dir / "subtitles.srt", "w", encoding="utf-8") as f:
f.write(srt_content)
# 2. Copy Videos
scenes = script_data.get("scenes", [])
for i, scene in enumerate(scenes):
sid = scene["id"]
if sid in scene_videos and os.path.exists(scene_videos[sid]):
# Rename with sequence number for easy sorting: 01_scene.mp4
ext = Path(scene_videos[sid]).suffix
dest_name = f"{i+1:02d}_scene_{sid}{ext}"
shutil.copy(scene_videos[sid], package_dir / "videos" / dest_name)
# 3. Copy Audio (Voiceover)
# We might not have the separate voiceover file easily accessible if it was mixed on the fly.
# But usually we generate it to temp.
# Option: Re-generate voiceover audio for the whole track or segments?
# Better: If we have 'voiceover_segments', generate them or copy if cached.
# For now, let's try to find if we have a full voiceover file or just use segments.
# Simplest: Re-generate the full voiceover audio file if it doesn't exist as a standalone asset.
# Or check if user just wants the pieces.
# Let's check if we have a mixed audio file. Usually we don't save the intermediate audio as an asset.
# So we might need to re-generate the voiceover audio here.
from modules import factory
full_vo_text = " ".join([s.get("voiceover", "") for s in scenes if s.get("voiceover")])
if full_vo_text:
try:
# Assuming default voice
voice_type = config.VOLC_TTS_DEFAULT_VOICE
vo_path = factory.generate_voiceover_volcengine(full_vo_text, voice_type)
shutil.copy(vo_path, package_dir / "audios" / "full_voiceover.mp3")
except Exception as e:
logger.warning(f"Failed to generate export voiceover: {e}")
# Copy BGM
# Check settings or script for BGM? BGM is usually a global setting in Composer.
# We'll just look for BGM in assets folder or let user drag their own.
# Or if we saved the BGM selection in the project, we could copy it.
# For now, skip specific BGM unless we know which one was used.
# 4. Copy Fancy Text Images
# We need to re-render them or find them.
# Since they are generated to temp in composer, they might be gone.
# Safer to re-render them.
from modules.text_renderer import renderer
for i, scene in enumerate(scenes):
ft = scene.get("fancy_text")
if ft:
text = ft.get("text", "") if isinstance(ft, dict) else ""
style = ft.get("style", "highlight") if isinstance(ft, dict) else "highlight"
if text:
try:
# Render
if isinstance(style, str):
# Simple mapping or default
# We need the full style dict logic from composer ideally
# For export, we just use default render
pass
# Actually, composer logic for style resolution is complex.
# Let's just use a simple render here.
img_path = renderer.render(text, {"font_size": 60, "font_color": "#FFFFFF"}, cache=False)
shutil.copy(img_path, package_dir / "images" / f"{i+1:02d}_text_{scene['id']}.png")
except:
pass
# 5. Zip it
zip_path = config.TEMP_DIR / f"capcut_export_{project_id}.zip"
with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
for root, dirs, files in os.walk(package_dir):
for file in files:
file_path = os.path.join(root, file)
arcname = os.path.relpath(file_path, package_dir)
zipf.write(file_path, arcname)
# Cleanup
shutil.rmtree(package_dir)
return str(zip_path)