feat: video-flow initial commit

- app.py: Streamlit UI for video generation workflow
- main_flow.py: CLI tool with argparse support
- modules/: Business logic modules (script_gen, image_gen, video_gen, composer, etc.)
- config.py: Configuration with API keys and paths
- requirements.txt: Python dependencies
- docs/: System prompt documentation
This commit is contained in:
Tony Zhang
2025-12-12 19:18:27 +08:00
commit 33a165a615
34 changed files with 12012 additions and 0 deletions

593
web_app.py Normal file
View File

@@ -0,0 +1,593 @@
"""
MatchMe Studio - 6-Step Video Creation Wizard (v2)
"""
import streamlit as st
import logging
from pathlib import Path
import config
from modules import brain, factory, editor, storage, ingest, asr, project
logging.basicConfig(level=logging.INFO)
st.set_page_config(
page_title="MatchMe 视频工场",
page_icon="🎬",
layout="wide"
)
# Custom CSS
st.markdown("""
<style>
/* Fix for file uploader */
section[data-testid="stFileUploader"] {
width: 100%;
}
.step-header {
background: linear-gradient(90deg, #FF4B4B, #FF914D);
padding: 10px 20px;
border-radius: 10px;
color: white;
font-weight: bold;
margin-bottom: 20px;
}
.stButton>button {
border-radius: 20px;
background: linear-gradient(45deg, #FF4B4B, #FF914D);
color: white;
border: none;
padding: 10px 30px;
}
.scene-card {
background: #262730;
padding: 15px;
border-radius: 10px;
margin: 10px 0;
}
.question-card {
background: #1e1e2e;
padding: 15px;
border-radius: 8px;
margin: 10px 0;
border-left: 3px solid #FF4B4B;
}
</style>
""", unsafe_allow_html=True)
def init_session():
"""Initialize session state."""
if "proj" not in st.session_state:
st.session_state.proj = project.create_project()
if "step" not in st.session_state:
st.session_state.step = 0
if "brief" not in st.session_state:
st.session_state.brief = {}
def render_sidebar():
"""Render sidebar with project info."""
with st.sidebar:
st.header("项目控制台")
proj = st.session_state.proj
st.text(f"项目 ID: {proj.id}")
st.text(f"状态: {proj.status}")
st.divider()
load_id = st.text_input("恢复项目 (输入ID)")
if st.button("加载"):
loaded = project.load_project(load_id)
if loaded:
st.session_state.proj = loaded
st.success(f"已加载项目 {load_id}")
st.rerun()
else:
st.error("项目不存在")
st.divider()
if st.button("重置项目"):
st.session_state.proj = project.create_project()
st.session_state.step = 0
st.session_state.brief = {}
st.rerun()
st.divider()
steps = ["素材提交", "AI分析", "脚本生成", "画面生成", "视频生成", "最终合成"]
for i, name in enumerate(steps):
if i == st.session_state.step:
st.markdown(f"**→ {i}. {name}**")
elif i < st.session_state.step:
st.markdown(f"{i}. {name}")
else:
st.markdown(f"{i}. {name}")
def step0_ingest():
"""Step 0: Material Submission."""
st.markdown('<div class="step-header">Step 0: 素材提交</div>', unsafe_allow_html=True)
proj = st.session_state.proj
mode = st.radio(
"选择输入方式",
["纯文本创意", "图片 + 描述", "视频 + 描述"],
horizontal=True
)
prompt = st.text_area("创意描述 / 产品卖点", height=100, placeholder="描述你想要的视频内容...")
if mode == "纯文本创意":
proj.input_mode = "text"
elif mode == "图片 + 描述":
proj.input_mode = "images"
uploaded = st.file_uploader("上传图片 (支持多张)", type=["jpg", "png", "jpeg"], accept_multiple_files=True)
if uploaded:
urls = []
with st.spinner("上传图片中..."):
for f in uploaded:
temp_path = config.TEMP_DIR / f.name
with open(temp_path, "wb") as fp:
fp.write(f.getbuffer())
url = storage.upload_file(str(temp_path))
if url:
urls.append(url)
else:
st.error(f"上传失败: {f.name}")
if urls:
proj.image_urls = urls
st.image(urls, width=150)
st.success(f"成功上传 {len(urls)} 张图片")
elif mode == "视频 + 描述":
proj.input_mode = "video"
uploaded = st.file_uploader("上传视频", type=["mp4"])
if uploaded:
with st.spinner("处理视频中..."):
temp_path = config.TEMP_DIR / uploaded.name
with open(temp_path, "wb") as f:
f.write(uploaded.getbuffer())
try:
frame_urls, video_url = ingest.process_uploaded_video(str(temp_path))
proj.image_urls = frame_urls
proj.video_url = video_url
st.image(frame_urls, width=150, caption=["帧1", "帧2", "帧3"])
except Exception as e:
st.error(f"视频处理失败: {e}")
try:
asr_text = asr.transcribe_video(str(temp_path))
proj.asr_text = asr_text
st.info(f"语音识别: {asr_text[:100]}...")
except Exception as e:
st.warning(f"语音识别失败: {e}")
proj.prompt = prompt
if st.button("下一步: AI 分析", disabled=not prompt):
proj.status = "analyzing"
project.save_project(proj)
st.session_state.step = 1
st.rerun()
def step1_analyze():
"""Step 1: AI Analysis & Questions with multi-select and custom input."""
st.markdown('<div class="step-header">Step 1: AI 深度分析</div>', unsafe_allow_html=True)
proj = st.session_state.proj
# Run analysis if not done
if not proj.analysis:
with st.spinner("AI 正在分析素材..."):
result = brain.analyze_materials(
prompt=proj.prompt,
image_urls=proj.image_urls if proj.image_urls else None,
asr_text=proj.asr_text
)
proj.analysis = result.get("analysis", "")
proj.questions = result.get("questions", [])
project.save_project(proj)
st.subheader("分析结果")
st.write(proj.analysis)
# Show questions with multi-select and custom input
if proj.questions:
st.subheader("补充信息")
st.caption("请回答以下问题,帮助 AI 更好地理解你的需求")
answers = {}
for q in proj.questions:
qid = q["id"]
st.markdown(f'<div class="question-card">', unsafe_allow_html=True)
# Check if multi-select is allowed
allow_multiple = q.get("allow_multiple", False)
allow_custom = q.get("allow_custom", True)
if allow_multiple:
selected = st.multiselect(
q["text"],
q["options"],
key=f"q_{qid}"
)
answers[qid] = {"selected": selected}
else:
selected = st.radio(
q["text"],
q["options"],
key=f"q_{qid}"
)
answers[qid] = {"selected": [selected] if selected else []}
# Custom input for additional context
if allow_custom:
custom = st.text_input(
"补充说明 (选填)",
key=f"custom_{qid}",
placeholder="如有其他想法,请在此补充..."
)
answers[qid]["custom"] = custom
st.markdown('</div>', unsafe_allow_html=True)
if st.button("确认回答,生成创意简报"):
proj.answers = answers
# Refine brief with answers
with st.spinner("整合创意简报中..."):
brief_result = brain.refine_brief(
proj.prompt,
{"analysis": proj.analysis},
answers,
proj.image_urls
)
st.session_state.brief = brief_result.get("brief", {})
# Store creative summary
if "creative_summary" in brief_result:
st.session_state.brief["creative_summary"] = brief_result["creative_summary"]
project.save_project(proj)
st.session_state.step = 2
st.rerun()
else:
# No questions needed, build basic brief
if st.button("下一步: 生成脚本"):
st.session_state.brief = {
"product": proj.prompt,
"selling_points": [],
"style": "现代广告"
}
st.session_state.step = 2
st.rerun()
def step2_script():
"""Step 2: Script Generation."""
st.markdown('<div class="step-header">Step 2: 脚本生成</div>', unsafe_allow_html=True)
proj = st.session_state.proj
brief = st.session_state.brief
# Show creative summary
if brief.get("creative_summary"):
st.info(f"🎯 创意方向: {brief['creative_summary']}")
if brief.get("style"):
st.caption(f"视频风格: {brief['style']}")
# Generate script if not done
if not proj.scenes:
with st.spinner("AI 正在创作脚本..."):
script = brain.generate_script(brief, proj.image_urls)
proj.hook = script.get("hook", "")
proj.scenes = script.get("scenes", [])
proj.cta = script.get("cta", "")
# Store creative summary from script if available
if script.get("creative_summary"):
brief["creative_summary"] = script["creative_summary"]
st.session_state.brief = brief
proj.status = "scripting"
project.save_project(proj)
# Display script
st.subheader(f"🎣 Hook: {proj.hook}")
# Creative summary
if brief.get("creative_summary"):
st.markdown(f"**整体创意**: {brief['creative_summary']}")
for i, scene in enumerate(proj.scenes):
with st.expander(f"分镜 {scene.get('id', i+1)}: {scene.get('timeline', '')}"):
col1, col2 = st.columns(2)
with col1:
st.write(f"**时长**: {scene.get('duration', 5)}")
st.write(f"**运镜**: {scene.get('camera_movement', '')}")
st.write(f"**故事节拍**: {scene.get('story_beat', '')}")
st.write(f"**音效设计**: {scene.get('sound_design', '')}")
with col2:
kf = scene.get("keyframe", {})
st.write(f"**色调**: {kf.get('color_tone', '')}")
st.write(f"**环境**: {kf.get('environment', '')}")
st.write(f"**焦点**: {kf.get('focus', '')}")
st.write(f"**构图**: {kf.get('composition', '')}")
# Image prompt (key for generation)
st.write("**生图Prompt**:")
st.code(scene.get("image_prompt", "(未生成)"), language=None)
st.write(f"**旁白**: {scene.get('voiceover', '(无)')}")
feedback = st.text_input(f"修改意见", key=f"fb_{i}")
if st.button(f"重新生成此分镜", key=f"regen_{i}"):
with st.spinner("重新生成中..."):
new_scene = brain.regenerate_scene(
{"hook": proj.hook, "scenes": proj.scenes, "cta": proj.cta},
scene.get("id", i+1),
feedback,
brief
)
proj.scenes[i] = new_scene
project.save_project(proj)
st.rerun()
# CTA - ensure it's a string
cta_text = proj.cta
if isinstance(cta_text, dict):
cta_text = cta_text.get("text", str(cta_text))
st.subheader(f"📢 CTA: {cta_text}")
col1, col2 = st.columns(2)
with col1:
regen_feedback = st.text_input("整体修改意见")
if st.button("重新生成整个脚本"):
with st.spinner("重新生成中..."):
script = brain.generate_script(brief, proj.image_urls, regen_feedback)
proj.hook = script.get("hook", "")
proj.scenes = script.get("scenes", [])
proj.cta = script.get("cta", "")
project.save_project(proj)
st.rerun()
with col2:
if st.button("确认脚本,下一步"):
st.session_state.step = 3
st.rerun()
def step3_images():
"""Step 3: Image Generation (Concurrent) using Gemini Image."""
st.markdown('<div class="step-header">Step 3: 画面生成 (Gemini Image)</div>', unsafe_allow_html=True)
proj = st.session_state.proj
brief = st.session_state.brief
# Show reference images if available
if proj.image_urls:
st.caption("参考素材(用于保持产品一致性):")
st.image(proj.image_urls[:3], width=100)
has_images = all(s.get("image_url") for s in proj.scenes)
if not has_images:
if st.button("开始生成所有画面 (并发)"):
progress = st.progress(0)
status = st.empty()
try:
status.text("正在并发生成所有分镜画面...")
# Pass user's reference images for product consistency
image_urls = factory.generate_all_scene_images_concurrent(
proj.scenes,
brief,
reference_images=proj.image_urls, # 传递用户素材
max_workers=3
)
for i, url in enumerate(image_urls):
if url:
proj.scenes[i]["image_url"] = url
progress.progress((i + 1) / len(proj.scenes))
proj.status = "imaging"
project.save_project(proj)
st.rerun()
except Exception as e:
st.error(f"生成失败: {e}")
import traceback
st.code(traceback.format_exc())
# Display images in grid
cols = st.columns(min(4, len(proj.scenes)))
for i, scene in enumerate(proj.scenes):
with cols[i % 4]:
img_url = scene.get("image_url", "")
if img_url:
st.image(img_url, caption=f"分镜 {scene.get('id', i+1)}")
if st.button(f"重新生成", key=f"img_regen_{i}"):
with st.spinner("生成中..."):
url = factory.generate_scene_image(scene, brief, proj.image_urls)
proj.scenes[i]["image_url"] = url
project.save_project(proj)
st.rerun()
custom = st.file_uploader(f"替换", key=f"img_up_{i}", type=["jpg", "png"])
if custom:
temp_path = config.TEMP_DIR / custom.name
with open(temp_path, "wb") as f:
f.write(custom.getbuffer())
url = storage.upload_file(str(temp_path))
if url:
proj.scenes[i]["image_url"] = url
project.save_project(proj)
st.rerun()
vo = st.text_area(f"旁白", scene.get("voiceover", ""), key=f"vo_{i}", height=80)
if vo != scene.get("voiceover", ""):
proj.scenes[i]["voiceover"] = vo
project.save_project(proj)
if has_images and st.button("下一步: 生成视频"):
st.session_state.step = 4
st.rerun()
def step4_videos():
"""Step 4: Video Generation (Concurrent) using Sora 2."""
st.markdown('<div class="step-header">Step 4: 分镜视频生成 (Sora 2)</div>', unsafe_allow_html=True)
proj = st.session_state.proj
has_videos = all(s.get("video_url") for s in proj.scenes)
if not has_videos:
if st.button("开始生成所有视频 (并发)"):
progress = st.progress(0)
status = st.empty()
try:
image_urls = [s.get("image_url") for s in proj.scenes]
status.text("正在并发生成所有分镜视频 (Sora 2)...")
video_urls = factory.generate_all_scene_videos_concurrent(
proj.scenes,
image_urls,
max_workers=2
)
for i, url in enumerate(video_urls):
if url:
proj.scenes[i]["video_url"] = url
progress.progress((i + 1) / len(proj.scenes))
proj.status = "video"
project.save_project(proj)
st.rerun()
except Exception as e:
st.error(f"视频生成失败: {e}")
import traceback
st.code(traceback.format_exc())
# Display videos
for i, scene in enumerate(proj.scenes):
vid_url = scene.get("video_url", "")
if vid_url:
col1, col2 = st.columns([3, 1])
with col1:
st.video(vid_url)
with col2:
st.write(f"分镜 {scene.get('id', i+1)}")
st.write(f"{scene.get('duration', 5)}")
if st.button(f"重新生成", key=f"vid_regen_{i}"):
with st.spinner("生成中..."):
image_url = scene.get("image_url", "")
url = factory.generate_scene_video(
image_url,
scene.get("camera_movement", "slow zoom"),
scene.get("duration", 5)
)
proj.scenes[i]["video_url"] = url
project.save_project(proj)
st.rerun()
if has_videos and st.button("下一步: 合成"):
st.session_state.step = 5
st.rerun()
def step5_render():
"""Step 5: Final Rendering."""
st.markdown('<div class="step-header">Step 5: 最终合成</div>', unsafe_allow_html=True)
proj = st.session_state.proj
brief = st.session_state.brief
col1, col2 = st.columns(2)
with col1:
add_subtitles = st.checkbox("烧录字幕", value=True)
add_voiceover = st.checkbox("添加旁白配音", value=True)
with col2:
add_bgm = st.checkbox("添加背景音乐", value=False)
bgm_file = None
if add_bgm:
bgm_file = st.file_uploader("上传 BGM", type=["mp3", "wav"])
if st.button("开始合成"):
with st.spinner("合成中,请稍候..."):
video_urls = [s.get("video_url") for s in proj.scenes]
vo_url = ""
if add_voiceover:
style = brief.get("style", "")
vo_url = factory.generate_full_voiceover(proj.scenes, style)
bgm_url = ""
if bgm_file:
temp_path = config.TEMP_DIR / bgm_file.name
with open(temp_path, "wb") as f:
f.write(bgm_file.getbuffer())
bgm_url = storage.upload_file(str(temp_path))
final_url = editor.assemble_final_video(
video_urls=video_urls,
scenes=proj.scenes if add_subtitles else [],
voiceover_url=vo_url,
bgm_url=bgm_url
)
proj.final_video_url = final_url
proj.status = "done"
project.save_project(proj)
st.success("🎉 视频合成完成!")
st.video(final_url)
st.markdown(f"### [📥 下载高清视频]({final_url})")
storage.cleanup_temp()
def main():
init_session()
render_sidebar()
st.title("MatchMe 视频工场 🎬")
st.caption("AI 驱动的短视频创作平台")
step = st.session_state.step
if step == 0:
step0_ingest()
elif step == 1:
step1_analyze()
elif step == 2:
step2_script()
elif step == 3:
step3_images()
elif step == 4:
step4_videos()
elif step == 5:
step5_render()
if __name__ == "__main__":
main()