""" MatchMe Studio - 6-Step Video Creation Wizard (v2) """ import streamlit as st import logging from pathlib import Path import config from modules import brain, factory, editor, storage, ingest, asr, project logging.basicConfig(level=logging.INFO) st.set_page_config( page_title="MatchMe 视频工场", page_icon="🎬", layout="wide" ) # Custom CSS st.markdown(""" """, unsafe_allow_html=True) def init_session(): """Initialize session state.""" if "proj" not in st.session_state: st.session_state.proj = project.create_project() if "step" not in st.session_state: st.session_state.step = 0 if "brief" not in st.session_state: st.session_state.brief = {} def render_sidebar(): """Render sidebar with project info.""" with st.sidebar: st.header("项目控制台") proj = st.session_state.proj st.text(f"项目 ID: {proj.id}") st.text(f"状态: {proj.status}") st.divider() load_id = st.text_input("恢复项目 (输入ID)") if st.button("加载"): loaded = project.load_project(load_id) if loaded: st.session_state.proj = loaded st.success(f"已加载项目 {load_id}") st.rerun() else: st.error("项目不存在") st.divider() if st.button("重置项目"): st.session_state.proj = project.create_project() st.session_state.step = 0 st.session_state.brief = {} st.rerun() st.divider() steps = ["素材提交", "AI分析", "脚本生成", "画面生成", "视频生成", "最终合成"] for i, name in enumerate(steps): if i == st.session_state.step: st.markdown(f"**→ {i}. {name}**") elif i < st.session_state.step: st.markdown(f"✅ {i}. {name}") else: st.markdown(f"○ {i}. {name}") def step0_ingest(): """Step 0: Material Submission.""" st.markdown('

Step 0: 素材提交

', unsafe_allow_html=True) proj = st.session_state.proj mode = st.radio( "选择输入方式", ["纯文本创意", "图片 + 描述", "视频 + 描述"], horizontal=True ) prompt = st.text_area("创意描述 / 产品卖点", height=100, placeholder="描述你想要的视频内容...") if mode == "纯文本创意": proj.input_mode = "text" elif mode == "图片 + 描述": proj.input_mode = "images" uploaded = st.file_uploader("上传图片 (支持多张)", type=["jpg", "png", "jpeg"], accept_multiple_files=True) if uploaded: urls = [] with st.spinner("上传图片中..."): for f in uploaded: temp_path = config.TEMP_DIR / f.name with open(temp_path, "wb") as fp: fp.write(f.getbuffer()) url = storage.upload_file(str(temp_path)) if url: urls.append(url) else: st.error(f"上传失败: {f.name}") if urls: proj.image_urls = urls st.image(urls, width=150) st.success(f"成功上传 {len(urls)} 张图片") elif mode == "视频 + 描述": proj.input_mode = "video" uploaded = st.file_uploader("上传视频", type=["mp4"]) if uploaded: with st.spinner("处理视频中..."): temp_path = config.TEMP_DIR / uploaded.name with open(temp_path, "wb") as f: f.write(uploaded.getbuffer()) try: frame_urls, video_url = ingest.process_uploaded_video(str(temp_path)) proj.image_urls = frame_urls proj.video_url = video_url st.image(frame_urls, width=150, caption=["帧1", "帧2", "帧3"]) except Exception as e: st.error(f"视频处理失败: {e}") try: asr_text = asr.transcribe_video(str(temp_path)) proj.asr_text = asr_text st.info(f"语音识别: {asr_text[:100]}...") except Exception as e: st.warning(f"语音识别失败: {e}") proj.prompt = prompt if st.button("下一步: AI 分析", disabled=not prompt): proj.status = "analyzing" project.save_project(proj) st.session_state.step = 1 st.rerun() def step1_analyze(): """Step 1: AI Analysis & Questions with multi-select and custom input.""" st.markdown('

Step 1: AI 深度分析

', unsafe_allow_html=True) proj = st.session_state.proj # Run analysis if not done if not proj.analysis: with st.spinner("AI 正在分析素材..."): result = brain.analyze_materials( prompt=proj.prompt, image_urls=proj.image_urls if proj.image_urls else None, asr_text=proj.asr_text ) proj.analysis = result.get("analysis", "") proj.questions = result.get("questions", []) project.save_project(proj) st.subheader("分析结果") st.write(proj.analysis) # Show questions with multi-select and custom input if proj.questions: st.subheader("补充信息") st.caption("请回答以下问题，帮助 AI 更好地理解你的需求") answers = {} for q in proj.questions: qid = q["id"] st.markdown(f'

', unsafe_allow_html=True) # Check if multi-select is allowed allow_multiple = q.get("allow_multiple", False) allow_custom = q.get("allow_custom", True) if allow_multiple: selected = st.multiselect( q["text"], q["options"], key=f"q_{qid}" ) answers[qid] = {"selected": selected} else: selected = st.radio( q["text"], q["options"], key=f"q_{qid}" ) answers[qid] = {"selected": [selected] if selected else []} # Custom input for additional context if allow_custom: custom = st.text_input( "补充说明 (选填)", key=f"custom_{qid}", placeholder="如有其他想法，请在此补充..." ) answers[qid]["custom"] = custom st.markdown('

', unsafe_allow_html=True) if st.button("确认回答，生成创意简报"): proj.answers = answers # Refine brief with answers with st.spinner("整合创意简报中..."): brief_result = brain.refine_brief( proj.prompt, {"analysis": proj.analysis}, answers, proj.image_urls ) st.session_state.brief = brief_result.get("brief", {}) # Store creative summary if "creative_summary" in brief_result: st.session_state.brief["creative_summary"] = brief_result["creative_summary"] project.save_project(proj) st.session_state.step = 2 st.rerun() else: # No questions needed, build basic brief if st.button("下一步: 生成脚本"): st.session_state.brief = { "product": proj.prompt, "selling_points": [], "style": "现代广告" } st.session_state.step = 2 st.rerun() def step2_script(): """Step 2: Script Generation.""" st.markdown('

Step 2: 脚本生成

', unsafe_allow_html=True) proj = st.session_state.proj brief = st.session_state.brief # Show creative summary if brief.get("creative_summary"): st.info(f"🎯 创意方向: {brief['creative_summary']}") if brief.get("style"): st.caption(f"视频风格: {brief['style']}") # Generate script if not done if not proj.scenes: with st.spinner("AI 正在创作脚本..."): script = brain.generate_script(brief, proj.image_urls) proj.hook = script.get("hook", "") proj.scenes = script.get("scenes", []) proj.cta = script.get("cta", "") # Store creative summary from script if available if script.get("creative_summary"): brief["creative_summary"] = script["creative_summary"] st.session_state.brief = brief proj.status = "scripting" project.save_project(proj) # Display script st.subheader(f"🎣 Hook: {proj.hook}") # Creative summary if brief.get("creative_summary"): st.markdown(f"**整体创意**: {brief['creative_summary']}") for i, scene in enumerate(proj.scenes): with st.expander(f"分镜 {scene.get('id', i+1)}: {scene.get('timeline', '')}"): col1, col2 = st.columns(2) with col1: st.write(f"**时长**: {scene.get('duration', 5)}秒") st.write(f"**运镜**: {scene.get('camera_movement', '')}") st.write(f"**故事节拍**: {scene.get('story_beat', '')}") st.write(f"**音效设计**: {scene.get('sound_design', '')}") with col2: kf = scene.get("keyframe", {}) st.write(f"**色调**: {kf.get('color_tone', '')}") st.write(f"**环境**: {kf.get('environment', '')}") st.write(f"**焦点**: {kf.get('focus', '')}") st.write(f"**构图**: {kf.get('composition', '')}") # Image prompt (key for generation) st.write("**生图Prompt**:") st.code(scene.get("image_prompt", "(未生成)"), language=None) st.write(f"**旁白**: {scene.get('voiceover', '(无)')}") feedback = st.text_input(f"修改意见", key=f"fb_{i}") if st.button(f"重新生成此分镜", key=f"regen_{i}"): with st.spinner("重新生成中..."): new_scene = brain.regenerate_scene( {"hook": proj.hook, "scenes": proj.scenes, "cta": proj.cta}, scene.get("id", i+1), feedback, brief ) proj.scenes[i] = new_scene project.save_project(proj) st.rerun() # CTA - ensure it's a string cta_text = proj.cta if isinstance(cta_text, dict): cta_text = cta_text.get("text", str(cta_text)) st.subheader(f"📢 CTA: {cta_text}") col1, col2 = st.columns(2) with col1: regen_feedback = st.text_input("整体修改意见") if st.button("重新生成整个脚本"): with st.spinner("重新生成中..."): script = brain.generate_script(brief, proj.image_urls, regen_feedback) proj.hook = script.get("hook", "") proj.scenes = script.get("scenes", []) proj.cta = script.get("cta", "") project.save_project(proj) st.rerun() with col2: if st.button("确认脚本，下一步"): st.session_state.step = 3 st.rerun() def step3_images(): """Step 3: Image Generation (Concurrent) using Gemini Image.""" st.markdown('

Step 3: 画面生成 (Gemini Image)

', unsafe_allow_html=True) proj = st.session_state.proj brief = st.session_state.brief # Show reference images if available if proj.image_urls: st.caption("参考素材（用于保持产品一致性）:") st.image(proj.image_urls[:3], width=100) has_images = all(s.get("image_url") for s in proj.scenes) if not has_images: if st.button("开始生成所有画面 (并发)"): progress = st.progress(0) status = st.empty() try: status.text("正在并发生成所有分镜画面...") # Pass user's reference images for product consistency image_urls = factory.generate_all_scene_images_concurrent( proj.scenes, brief, reference_images=proj.image_urls, # 传递用户素材 max_workers=3 ) for i, url in enumerate(image_urls): if url: proj.scenes[i]["image_url"] = url progress.progress((i + 1) / len(proj.scenes)) proj.status = "imaging" project.save_project(proj) st.rerun() except Exception as e: st.error(f"生成失败: {e}") import traceback st.code(traceback.format_exc()) # Display images in grid cols = st.columns(min(4, len(proj.scenes))) for i, scene in enumerate(proj.scenes): with cols[i % 4]: img_url = scene.get("image_url", "") if img_url: st.image(img_url, caption=f"分镜 {scene.get('id', i+1)}") if st.button(f"重新生成", key=f"img_regen_{i}"): with st.spinner("生成中..."): url = factory.generate_scene_image(scene, brief, proj.image_urls) proj.scenes[i]["image_url"] = url project.save_project(proj) st.rerun() custom = st.file_uploader(f"替换", key=f"img_up_{i}", type=["jpg", "png"]) if custom: temp_path = config.TEMP_DIR / custom.name with open(temp_path, "wb") as f: f.write(custom.getbuffer()) url = storage.upload_file(str(temp_path)) if url: proj.scenes[i]["image_url"] = url project.save_project(proj) st.rerun() vo = st.text_area(f"旁白", scene.get("voiceover", ""), key=f"vo_{i}", height=80) if vo != scene.get("voiceover", ""): proj.scenes[i]["voiceover"] = vo project.save_project(proj) if has_images and st.button("下一步: 生成视频"): st.session_state.step = 4 st.rerun() def step4_videos(): """Step 4: Video Generation (Concurrent) using Sora 2.""" st.markdown('

Step 4: 分镜视频生成 (Sora 2)

', unsafe_allow_html=True) proj = st.session_state.proj has_videos = all(s.get("video_url") for s in proj.scenes) if not has_videos: if st.button("开始生成所有视频 (并发)"): progress = st.progress(0) status = st.empty() try: image_urls = [s.get("image_url") for s in proj.scenes] status.text("正在并发生成所有分镜视频 (Sora 2)...") video_urls = factory.generate_all_scene_videos_concurrent( proj.scenes, image_urls, max_workers=2 ) for i, url in enumerate(video_urls): if url: proj.scenes[i]["video_url"] = url progress.progress((i + 1) / len(proj.scenes)) proj.status = "video" project.save_project(proj) st.rerun() except Exception as e: st.error(f"视频生成失败: {e}") import traceback st.code(traceback.format_exc()) # Display videos for i, scene in enumerate(proj.scenes): vid_url = scene.get("video_url", "") if vid_url: col1, col2 = st.columns([3, 1]) with col1: st.video(vid_url) with col2: st.write(f"分镜 {scene.get('id', i+1)}") st.write(f"{scene.get('duration', 5)}秒") if st.button(f"重新生成", key=f"vid_regen_{i}"): with st.spinner("生成中..."): image_url = scene.get("image_url", "") url = factory.generate_scene_video( image_url, scene.get("camera_movement", "slow zoom"), scene.get("duration", 5) ) proj.scenes[i]["video_url"] = url project.save_project(proj) st.rerun() if has_videos and st.button("下一步: 合成"): st.session_state.step = 5 st.rerun() def step5_render(): """Step 5: Final Rendering.""" st.markdown('

Step 5: 最终合成

', unsafe_allow_html=True) proj = st.session_state.proj brief = st.session_state.brief col1, col2 = st.columns(2) with col1: add_subtitles = st.checkbox("烧录字幕", value=True) add_voiceover = st.checkbox("添加旁白配音", value=True) with col2: add_bgm = st.checkbox("添加背景音乐", value=False) bgm_file = None if add_bgm: bgm_file = st.file_uploader("上传 BGM", type=["mp3", "wav"]) if st.button("开始合成"): with st.spinner("合成中，请稍候..."): video_urls = [s.get("video_url") for s in proj.scenes] vo_url = "" if add_voiceover: style = brief.get("style", "") vo_url = factory.generate_full_voiceover(proj.scenes, style) bgm_url = "" if bgm_file: temp_path = config.TEMP_DIR / bgm_file.name with open(temp_path, "wb") as f: f.write(bgm_file.getbuffer()) bgm_url = storage.upload_file(str(temp_path)) final_url = editor.assemble_final_video( video_urls=video_urls, scenes=proj.scenes if add_subtitles else [], voiceover_url=vo_url, bgm_url=bgm_url ) proj.final_video_url = final_url proj.status = "done" project.save_project(proj) st.success("🎉 视频合成完成！") st.video(final_url) st.markdown(f"### [📥 下载高清视频]({final_url})") storage.cleanup_temp() def main(): init_session() render_sidebar() st.title("MatchMe 视频工场 🎬") st.caption("AI 驱动的短视频创作平台") step = st.session_state.step if step == 0: step0_ingest() elif step == 1: step1_analyze() elif step == 2: step2_script() elif step == 3: step3_images() elif step == 4: step4_videos() elif step == 5: step5_render() if __name__ == "__main__": main()