feat: video-flow initial commit

- app.py: Streamlit UI for video generation workflow - main_flow.py: CLI tool with argparse support - modules/: Business logic modules (script_gen, image_gen, video_gen, composer, etc.) - config.py: Configuration with API keys and paths - requirements.txt: Python dependencies - docs/: System prompt documentation
2025-12-12 19:18:27 +08:00
commit 33a165a615
34 changed files with 12012 additions and 0 deletions
--- a/modules/export_utils.py
+++ b/modules/export_utils.py
@@ -0,0 +1,157 @@
+import os
+import zipfile
+import logging
+import shutil
+import math
+from pathlib import Path
+from typing import List, Dict, Any
+import config
+
+logger = logging.getLogger(__name__)
+
+def format_timestamp(seconds: float) -> str:
+    """Convert seconds to SRT timestamp format (HH:MM:SS,mmm)"""
+    hours = int(seconds // 3600)
+    minutes = int((seconds % 3600) // 60)
+    secs = int(seconds % 60)
+    millis = int((seconds - int(seconds)) * 1000)
+    return f"{hours:02d}:{minutes:02d}:{secs:02d},{millis:03d}"
+
+def generate_srt(script_data: Dict[str, Any], video_map: Dict[int, str]) -> str:
+    """Generate SRT content from script data"""
+    scenes = script_data.get("scenes", [])
+    srt_content = ""
+    current_time = 0.0
+    
+    # Need to get durations from actual videos if possible, else estimate
+    from modules import ffmpeg_utils
+    
+    for i, scene in enumerate(scenes):
+        scene_id = scene["id"]
+        # Get duration
+        duration = 5.0
+        if scene_id in video_map and os.path.exists(video_map[scene_id]):
+            try:
+                info = ffmpeg_utils.get_video_info(video_map[scene_id])
+                duration = info.get("duration", 5.0)
+            except:
+                pass
+        
+        start_time = current_time
+        end_time = current_time + duration
+        current_time = end_time
+        
+        text = scene.get("subtitle", "")
+        if text:
+            srt_content += f"{i+1}\n"
+            srt_content += f"{format_timestamp(start_time)} --> {format_timestamp(end_time)}\n"
+            srt_content += f"{text}\n\n"
+            
+    return srt_content
+
+def create_capcut_package(project_id: str, script_data: Dict[str, Any], assets: Dict[str, str]) -> str:
+    """
+    Create a ZIP package for CapCut (JianYing) import
+    Contains:
+    - videos/ (scene videos)
+    - audios/ (voiceover, bgm)
+    - images/ (fancy text transparent pngs)
+    - subtitles.srt
+    """
+    package_dir = config.TEMP_DIR / f"capcut_pkg_{project_id}_{int(os.getpid())}"
+    if package_dir.exists():
+        shutil.rmtree(package_dir)
+    package_dir.mkdir()
+    
+    (package_dir / "videos").mkdir()
+    (package_dir / "audios").mkdir()
+    (package_dir / "images").mkdir()
+    
+    # 1. Generate SRT
+    # Need to reconstruct video map from assets or script
+    # Assuming 'assets' contains 'scene_videos' map
+    scene_videos = assets.get("scene_videos", {})
+    srt_content = generate_srt(script_data, scene_videos)
+    with open(package_dir / "subtitles.srt", "w", encoding="utf-8") as f:
+        f.write(srt_content)
+        
+    # 2. Copy Videos
+    scenes = script_data.get("scenes", [])
+    for i, scene in enumerate(scenes):
+        sid = scene["id"]
+        if sid in scene_videos and os.path.exists(scene_videos[sid]):
+            # Rename with sequence number for easy sorting: 01_scene.mp4
+            ext = Path(scene_videos[sid]).suffix
+            dest_name = f"{i+1:02d}_scene_{sid}{ext}"
+            shutil.copy(scene_videos[sid], package_dir / "videos" / dest_name)
+            
+    # 3. Copy Audio (Voiceover)
+    # We might not have the separate voiceover file easily accessible if it was mixed on the fly.
+    # But usually we generate it to temp.
+    # Option: Re-generate voiceover audio for the whole track or segments?
+    # Better: If we have 'voiceover_segments', generate them or copy if cached.
+    # For now, let's try to find if we have a full voiceover file or just use segments.
+    # Simplest: Re-generate the full voiceover audio file if it doesn't exist as a standalone asset.
+    # Or check if user just wants the pieces.
+    # Let's check if we have a mixed audio file. Usually we don't save the intermediate audio as an asset.
+    # So we might need to re-generate the voiceover audio here.
+    from modules import factory
+    full_vo_text = " ".join([s.get("voiceover", "") for s in scenes if s.get("voiceover")])
+    if full_vo_text:
+        try:
+            # Assuming default voice
+            voice_type = config.VOLC_TTS_DEFAULT_VOICE
+            vo_path = factory.generate_voiceover_volcengine(full_vo_text, voice_type)
+            shutil.copy(vo_path, package_dir / "audios" / "full_voiceover.mp3")
+        except Exception as e:
+            logger.warning(f"Failed to generate export voiceover: {e}")
+            
+    # Copy BGM
+    # Check settings or script for BGM? BGM is usually a global setting in Composer.
+    # We'll just look for BGM in assets folder or let user drag their own.
+    # Or if we saved the BGM selection in the project, we could copy it.
+    # For now, skip specific BGM unless we know which one was used.
+    
+    # 4. Copy Fancy Text Images
+    # We need to re-render them or find them. 
+    # Since they are generated to temp in composer, they might be gone.
+    # Safer to re-render them.
+    from modules.text_renderer import renderer
+    for i, scene in enumerate(scenes):
+        ft = scene.get("fancy_text")
+        if ft:
+            text = ft.get("text", "") if isinstance(ft, dict) else ""
+            style = ft.get("style", "highlight") if isinstance(ft, dict) else "highlight"
+            if text:
+                try:
+                    # Render
+                    if isinstance(style, str):
+                         # Simple mapping or default
+                         # We need the full style dict logic from composer ideally
+                         # For export, we just use default render
+                         pass
+                    
+                    # Actually, composer logic for style resolution is complex.
+                    # Let's just use a simple render here.
+                    img_path = renderer.render(text, {"font_size": 60, "font_color": "#FFFFFF"}, cache=False)
+                    shutil.copy(img_path, package_dir / "images" / f"{i+1:02d}_text_{scene['id']}.png")
+                except:
+                    pass
+
+    # 5. Zip it
+    zip_path = config.TEMP_DIR / f"capcut_export_{project_id}.zip"
+    with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
+        for root, dirs, files in os.walk(package_dir):
+            for file in files:
+                file_path = os.path.join(root, file)
+                arcname = os.path.relpath(file_path, package_dir)
+                zipf.write(file_path, arcname)
+                
+    # Cleanup
+    shutil.rmtree(package_dir)
+    return str(zip_path)
+
+
+
+
+