feat: video-flow initial commit

- app.py: Streamlit UI for video generation workflow - main_flow.py: CLI tool with argparse support - modules/: Business logic modules (script_gen, image_gen, video_gen, composer, etc.) - config.py: Configuration with API keys and paths - requirements.txt: Python dependencies - docs/: System prompt documentation
2025-12-12 19:18:27 +08:00
commit 33a165a615
34 changed files with 12012 additions and 0 deletions
--- a/web_app.py
+++ b/web_app.py
@@ -0,0 +1,593 @@
+"""
+MatchMe Studio - 6-Step Video Creation Wizard (v2)
+"""
+import streamlit as st
+import logging
+from pathlib import Path
+
+import config
+from modules import brain, factory, editor, storage, ingest, asr, project
+
+logging.basicConfig(level=logging.INFO)
+
+st.set_page_config(
+    page_title="MatchMe 视频工场",
+    page_icon="🎬",
+    layout="wide"
+)
+
+# Custom CSS
+st.markdown("""
+<style>
+    /* Fix for file uploader */
+    section[data-testid="stFileUploader"] {
+        width: 100%;
+    }
+    
+    .step-header {
+        background: linear-gradient(90deg, #FF4B4B, #FF914D);
+        padding: 10px 20px;
+        border-radius: 10px;
+        color: white;
+        font-weight: bold;
+        margin-bottom: 20px;
+    }
+    .stButton>button {
+        border-radius: 20px;
+        background: linear-gradient(45deg, #FF4B4B, #FF914D);
+        color: white;
+        border: none;
+        padding: 10px 30px;
+    }
+    .scene-card {
+        background: #262730;
+        padding: 15px;
+        border-radius: 10px;
+        margin: 10px 0;
+    }
+    .question-card {
+        background: #1e1e2e;
+        padding: 15px;
+        border-radius: 8px;
+        margin: 10px 0;
+        border-left: 3px solid #FF4B4B;
+    }
+</style>
+""", unsafe_allow_html=True)
+
+
+def init_session():
+    """Initialize session state."""
+    if "proj" not in st.session_state:
+        st.session_state.proj = project.create_project()
+    if "step" not in st.session_state:
+        st.session_state.step = 0
+    if "brief" not in st.session_state:
+        st.session_state.brief = {}
+
+
+def render_sidebar():
+    """Render sidebar with project info."""
+    with st.sidebar:
+        st.header("项目控制台")
+        
+        proj = st.session_state.proj
+        st.text(f"项目 ID: {proj.id}")
+        st.text(f"状态: {proj.status}")
+        
+        st.divider()
+        
+        load_id = st.text_input("恢复项目 (输入ID)")
+        if st.button("加载"):
+            loaded = project.load_project(load_id)
+            if loaded:
+                st.session_state.proj = loaded
+                st.success(f"已加载项目 {load_id}")
+                st.rerun()
+            else:
+                st.error("项目不存在")
+        
+        st.divider()
+        
+        if st.button("重置项目"):
+            st.session_state.proj = project.create_project()
+            st.session_state.step = 0
+            st.session_state.brief = {}
+            st.rerun()
+        
+        st.divider()
+        steps = ["素材提交", "AI分析", "脚本生成", "画面生成", "视频生成", "最终合成"]
+        for i, name in enumerate(steps):
+            if i == st.session_state.step:
+                st.markdown(f"**→ {i}. {name}**")
+            elif i < st.session_state.step:
+                st.markdown(f"✅ {i}. {name}")
+            else:
+                st.markdown(f"○ {i}. {name}")
+
+
+def step0_ingest():
+    """Step 0: Material Submission."""
+    st.markdown('<div class="step-header">Step 0: 素材提交</div>', unsafe_allow_html=True)
+    
+    proj = st.session_state.proj
+    
+    mode = st.radio(
+        "选择输入方式",
+        ["纯文本创意", "图片 + 描述", "视频 + 描述"],
+        horizontal=True
+    )
+    
+    prompt = st.text_area("创意描述 / 产品卖点", height=100, placeholder="描述你想要的视频内容...")
+    
+    if mode == "纯文本创意":
+        proj.input_mode = "text"
+        
+    elif mode == "图片 + 描述":
+        proj.input_mode = "images"
+        uploaded = st.file_uploader("上传图片 (支持多张)", type=["jpg", "png", "jpeg"], accept_multiple_files=True)
+        
+        if uploaded:
+            urls = []
+            with st.spinner("上传图片中..."):
+                for f in uploaded:
+                    temp_path = config.TEMP_DIR / f.name
+                    with open(temp_path, "wb") as fp:
+                        fp.write(f.getbuffer())
+                    url = storage.upload_file(str(temp_path))
+                    if url:
+                        urls.append(url)
+                    else:
+                        st.error(f"上传失败: {f.name}")
+            
+            if urls:
+                proj.image_urls = urls
+                st.image(urls, width=150)
+                st.success(f"成功上传 {len(urls)} 张图片")
+            
+    elif mode == "视频 + 描述":
+        proj.input_mode = "video"
+        uploaded = st.file_uploader("上传视频", type=["mp4"])
+        
+        if uploaded:
+            with st.spinner("处理视频中..."):
+                temp_path = config.TEMP_DIR / uploaded.name
+                with open(temp_path, "wb") as f:
+                    f.write(uploaded.getbuffer())
+                
+                try:
+                    frame_urls, video_url = ingest.process_uploaded_video(str(temp_path))
+                    proj.image_urls = frame_urls
+                    proj.video_url = video_url
+                    st.image(frame_urls, width=150, caption=["帧1", "帧2", "帧3"])
+                except Exception as e:
+                    st.error(f"视频处理失败: {e}")
+                
+                try:
+                    asr_text = asr.transcribe_video(str(temp_path))
+                    proj.asr_text = asr_text
+                    st.info(f"语音识别: {asr_text[:100]}...")
+                except Exception as e:
+                    st.warning(f"语音识别失败: {e}")
+    
+    proj.prompt = prompt
+    
+    if st.button("下一步: AI 分析", disabled=not prompt):
+        proj.status = "analyzing"
+        project.save_project(proj)
+        st.session_state.step = 1
+        st.rerun()
+
+
+def step1_analyze():
+    """Step 1: AI Analysis & Questions with multi-select and custom input."""
+    st.markdown('<div class="step-header">Step 1: AI 深度分析</div>', unsafe_allow_html=True)
+    
+    proj = st.session_state.proj
+    
+    # Run analysis if not done
+    if not proj.analysis:
+        with st.spinner("AI 正在分析素材..."):
+            result = brain.analyze_materials(
+                prompt=proj.prompt,
+                image_urls=proj.image_urls if proj.image_urls else None,
+                asr_text=proj.asr_text
+            )
+            proj.analysis = result.get("analysis", "")
+            proj.questions = result.get("questions", [])
+            project.save_project(proj)
+    
+    st.subheader("分析结果")
+    st.write(proj.analysis)
+    
+    # Show questions with multi-select and custom input
+    if proj.questions:
+        st.subheader("补充信息")
+        st.caption("请回答以下问题，帮助 AI 更好地理解你的需求")
+        
+        answers = {}
+        for q in proj.questions:
+            qid = q["id"]
+            st.markdown(f'<div class="question-card">', unsafe_allow_html=True)
+            
+            # Check if multi-select is allowed
+            allow_multiple = q.get("allow_multiple", False)
+            allow_custom = q.get("allow_custom", True)
+            
+            if allow_multiple:
+                selected = st.multiselect(
+                    q["text"],
+                    q["options"],
+                    key=f"q_{qid}"
+                )
+                answers[qid] = {"selected": selected}
+            else:
+                selected = st.radio(
+                    q["text"],
+                    q["options"],
+                    key=f"q_{qid}"
+                )
+                answers[qid] = {"selected": [selected] if selected else []}
+            
+            # Custom input for additional context
+            if allow_custom:
+                custom = st.text_input(
+                    "补充说明 (选填)",
+                    key=f"custom_{qid}",
+                    placeholder="如有其他想法，请在此补充..."
+                )
+                answers[qid]["custom"] = custom
+            
+            st.markdown('</div>', unsafe_allow_html=True)
+        
+        if st.button("确认回答，生成创意简报"):
+            proj.answers = answers
+            
+            # Refine brief with answers
+            with st.spinner("整合创意简报中..."):
+                brief_result = brain.refine_brief(
+                    proj.prompt, 
+                    {"analysis": proj.analysis}, 
+                    answers,
+                    proj.image_urls
+                )
+                st.session_state.brief = brief_result.get("brief", {})
+                
+                # Store creative summary
+                if "creative_summary" in brief_result:
+                    st.session_state.brief["creative_summary"] = brief_result["creative_summary"]
+            
+            project.save_project(proj)
+            st.session_state.step = 2
+            st.rerun()
+    else:
+        # No questions needed, build basic brief
+        if st.button("下一步: 生成脚本"):
+            st.session_state.brief = {
+                "product": proj.prompt,
+                "selling_points": [],
+                "style": "现代广告"
+            }
+            st.session_state.step = 2
+            st.rerun()
+
+
+def step2_script():
+    """Step 2: Script Generation."""
+    st.markdown('<div class="step-header">Step 2: 脚本生成</div>', unsafe_allow_html=True)
+    
+    proj = st.session_state.proj
+    brief = st.session_state.brief
+    
+    # Show creative summary
+    if brief.get("creative_summary"):
+        st.info(f"🎯 创意方向: {brief['creative_summary']}")
+    
+    if brief.get("style"):
+        st.caption(f"视频风格: {brief['style']}")
+    
+    # Generate script if not done
+    if not proj.scenes:
+        with st.spinner("AI 正在创作脚本..."):
+            script = brain.generate_script(brief, proj.image_urls)
+            proj.hook = script.get("hook", "")
+            proj.scenes = script.get("scenes", [])
+            proj.cta = script.get("cta", "")
+            
+            # Store creative summary from script if available
+            if script.get("creative_summary"):
+                brief["creative_summary"] = script["creative_summary"]
+                st.session_state.brief = brief
+            
+            proj.status = "scripting"
+            project.save_project(proj)
+    
+    # Display script
+    st.subheader(f"🎣 Hook: {proj.hook}")
+    
+    # Creative summary
+    if brief.get("creative_summary"):
+        st.markdown(f"**整体创意**: {brief['creative_summary']}")
+    
+    for i, scene in enumerate(proj.scenes):
+        with st.expander(f"分镜 {scene.get('id', i+1)}: {scene.get('timeline', '')}"):
+            col1, col2 = st.columns(2)
+            
+            with col1:
+                st.write(f"**时长**: {scene.get('duration', 5)}秒")
+                st.write(f"**运镜**: {scene.get('camera_movement', '')}")
+                st.write(f"**故事节拍**: {scene.get('story_beat', '')}")
+                st.write(f"**音效设计**: {scene.get('sound_design', '')}")
+            
+            with col2:
+                kf = scene.get("keyframe", {})
+                st.write(f"**色调**: {kf.get('color_tone', '')}")
+                st.write(f"**环境**: {kf.get('environment', '')}")
+                st.write(f"**焦点**: {kf.get('focus', '')}")
+                st.write(f"**构图**: {kf.get('composition', '')}")
+            
+            # Image prompt (key for generation)
+            st.write("**生图Prompt**:")
+            st.code(scene.get("image_prompt", "(未生成)"), language=None)
+            
+            st.write(f"**旁白**: {scene.get('voiceover', '(无)')}")
+            
+            feedback = st.text_input(f"修改意见", key=f"fb_{i}")
+            if st.button(f"重新生成此分镜", key=f"regen_{i}"):
+                with st.spinner("重新生成中..."):
+                    new_scene = brain.regenerate_scene(
+                        {"hook": proj.hook, "scenes": proj.scenes, "cta": proj.cta},
+                        scene.get("id", i+1),
+                        feedback,
+                        brief
+                    )
+                    proj.scenes[i] = new_scene
+                    project.save_project(proj)
+                    st.rerun()
+    
+    # CTA - ensure it's a string
+    cta_text = proj.cta
+    if isinstance(cta_text, dict):
+        cta_text = cta_text.get("text", str(cta_text))
+    st.subheader(f"📢 CTA: {cta_text}")
+    
+    col1, col2 = st.columns(2)
+    with col1:
+        regen_feedback = st.text_input("整体修改意见")
+        if st.button("重新生成整个脚本"):
+            with st.spinner("重新生成中..."):
+                script = brain.generate_script(brief, proj.image_urls, regen_feedback)
+                proj.hook = script.get("hook", "")
+                proj.scenes = script.get("scenes", [])
+                proj.cta = script.get("cta", "")
+                project.save_project(proj)
+                st.rerun()
+    
+    with col2:
+        if st.button("确认脚本，下一步"):
+            st.session_state.step = 3
+            st.rerun()
+
+
+def step3_images():
+    """Step 3: Image Generation (Concurrent) using Gemini Image."""
+    st.markdown('<div class="step-header">Step 3: 画面生成 (Gemini Image)</div>', unsafe_allow_html=True)
+    
+    proj = st.session_state.proj
+    brief = st.session_state.brief
+    
+    # Show reference images if available
+    if proj.image_urls:
+        st.caption("参考素材（用于保持产品一致性）:")
+        st.image(proj.image_urls[:3], width=100)
+    
+    has_images = all(s.get("image_url") for s in proj.scenes)
+    
+    if not has_images:
+        if st.button("开始生成所有画面 (并发)"):
+            progress = st.progress(0)
+            status = st.empty()
+            
+            try:
+                status.text("正在并发生成所有分镜画面...")
+                # Pass user's reference images for product consistency
+                image_urls = factory.generate_all_scene_images_concurrent(
+                    proj.scenes, 
+                    brief,
+                    reference_images=proj.image_urls,  # 传递用户素材
+                    max_workers=3
+                )
+                
+                for i, url in enumerate(image_urls):
+                    if url:
+                        proj.scenes[i]["image_url"] = url
+                    progress.progress((i + 1) / len(proj.scenes))
+                
+                proj.status = "imaging"
+                project.save_project(proj)
+                st.rerun()
+                
+            except Exception as e:
+                st.error(f"生成失败: {e}")
+                import traceback
+                st.code(traceback.format_exc())
+    
+    # Display images in grid
+    cols = st.columns(min(4, len(proj.scenes)))
+    for i, scene in enumerate(proj.scenes):
+        with cols[i % 4]:
+            img_url = scene.get("image_url", "")
+            if img_url:
+                st.image(img_url, caption=f"分镜 {scene.get('id', i+1)}")
+                
+                if st.button(f"重新生成", key=f"img_regen_{i}"):
+                    with st.spinner("生成中..."):
+                        url = factory.generate_scene_image(scene, brief, proj.image_urls)
+                        proj.scenes[i]["image_url"] = url
+                        project.save_project(proj)
+                        st.rerun()
+                
+                custom = st.file_uploader(f"替换", key=f"img_up_{i}", type=["jpg", "png"])
+                if custom:
+                    temp_path = config.TEMP_DIR / custom.name
+                    with open(temp_path, "wb") as f:
+                        f.write(custom.getbuffer())
+                    url = storage.upload_file(str(temp_path))
+                    if url:
+                        proj.scenes[i]["image_url"] = url
+                        project.save_project(proj)
+                        st.rerun()
+            
+            vo = st.text_area(f"旁白", scene.get("voiceover", ""), key=f"vo_{i}", height=80)
+            if vo != scene.get("voiceover", ""):
+                proj.scenes[i]["voiceover"] = vo
+                project.save_project(proj)
+    
+    if has_images and st.button("下一步: 生成视频"):
+        st.session_state.step = 4
+        st.rerun()
+
+
+def step4_videos():
+    """Step 4: Video Generation (Concurrent) using Sora 2."""
+    st.markdown('<div class="step-header">Step 4: 分镜视频生成 (Sora 2)</div>', unsafe_allow_html=True)
+    
+    proj = st.session_state.proj
+    
+    has_videos = all(s.get("video_url") for s in proj.scenes)
+    
+    if not has_videos:
+        if st.button("开始生成所有视频 (并发)"):
+            progress = st.progress(0)
+            status = st.empty()
+            
+            try:
+                image_urls = [s.get("image_url") for s in proj.scenes]
+                
+                status.text("正在并发生成所有分镜视频 (Sora 2)...")
+                video_urls = factory.generate_all_scene_videos_concurrent(
+                    proj.scenes,
+                    image_urls,
+                    max_workers=2
+                )
+                
+                for i, url in enumerate(video_urls):
+                    if url:
+                        proj.scenes[i]["video_url"] = url
+                    progress.progress((i + 1) / len(proj.scenes))
+                
+                proj.status = "video"
+                project.save_project(proj)
+                st.rerun()
+                
+            except Exception as e:
+                st.error(f"视频生成失败: {e}")
+                import traceback
+                st.code(traceback.format_exc())
+    
+    # Display videos
+    for i, scene in enumerate(proj.scenes):
+        vid_url = scene.get("video_url", "")
+        if vid_url:
+            col1, col2 = st.columns([3, 1])
+            with col1:
+                st.video(vid_url)
+            with col2:
+                st.write(f"分镜 {scene.get('id', i+1)}")
+                st.write(f"{scene.get('duration', 5)}秒")
+                
+                if st.button(f"重新生成", key=f"vid_regen_{i}"):
+                    with st.spinner("生成中..."):
+                        image_url = scene.get("image_url", "")
+                        url = factory.generate_scene_video(
+                            image_url,
+                            scene.get("camera_movement", "slow zoom"),
+                            scene.get("duration", 5)
+                        )
+                        proj.scenes[i]["video_url"] = url
+                        project.save_project(proj)
+                        st.rerun()
+    
+    if has_videos and st.button("下一步: 合成"):
+        st.session_state.step = 5
+        st.rerun()
+
+
+def step5_render():
+    """Step 5: Final Rendering."""
+    st.markdown('<div class="step-header">Step 5: 最终合成</div>', unsafe_allow_html=True)
+    
+    proj = st.session_state.proj
+    brief = st.session_state.brief
+    
+    col1, col2 = st.columns(2)
+    
+    with col1:
+        add_subtitles = st.checkbox("烧录字幕", value=True)
+        add_voiceover = st.checkbox("添加旁白配音", value=True)
+    
+    with col2:
+        add_bgm = st.checkbox("添加背景音乐", value=False)
+        bgm_file = None
+        if add_bgm:
+            bgm_file = st.file_uploader("上传 BGM", type=["mp3", "wav"])
+    
+    if st.button("开始合成"):
+        with st.spinner("合成中，请稍候..."):
+            video_urls = [s.get("video_url") for s in proj.scenes]
+            
+            vo_url = ""
+            if add_voiceover:
+                style = brief.get("style", "")
+                vo_url = factory.generate_full_voiceover(proj.scenes, style)
+            
+            bgm_url = ""
+            if bgm_file:
+                temp_path = config.TEMP_DIR / bgm_file.name
+                with open(temp_path, "wb") as f:
+                    f.write(bgm_file.getbuffer())
+                bgm_url = storage.upload_file(str(temp_path))
+            
+            final_url = editor.assemble_final_video(
+                video_urls=video_urls,
+                scenes=proj.scenes if add_subtitles else [],
+                voiceover_url=vo_url,
+                bgm_url=bgm_url
+            )
+            
+            proj.final_video_url = final_url
+            proj.status = "done"
+            project.save_project(proj)
+        
+        st.success("🎉 视频合成完成！")
+        st.video(final_url)
+        st.markdown(f"### [📥 下载高清视频]({final_url})")
+        
+        storage.cleanup_temp()
+
+
+def main():
+    init_session()
+    render_sidebar()
+    
+    st.title("MatchMe 视频工场 🎬")
+    st.caption("AI 驱动的短视频创作平台")
+    
+    step = st.session_state.step
+    
+    if step == 0:
+        step0_ingest()
+    elif step == 1:
+        step1_analyze()
+    elif step == 2:
+        step2_script()
+    elif step == 3:
+        step3_images()
+    elif step == 4:
+        step4_videos()
+    elif step == 5:
+        step5_render()
+
+
+if __name__ == "__main__":
+    main()