Files
video-flow/modules/brain.py
Tony Zhang 33a165a615 feat: video-flow initial commit
- app.py: Streamlit UI for video generation workflow
- main_flow.py: CLI tool with argparse support
- modules/: Business logic modules (script_gen, image_gen, video_gen, composer, etc.)
- config.py: Configuration with API keys and paths
- requirements.txt: Python dependencies
- docs/: System prompt documentation
2025-12-12 19:18:27 +08:00

347 lines
12 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
MatchMe Studio - Brain Module (Multi-stage Analysis & Script Generation)
"""
import json
import logging
from typing import Dict, Any, List, Optional
from openai import OpenAI
import config
logger = logging.getLogger(__name__)
# Use Volcengine (Doubao) via OpenAI Compatible Interface
client = OpenAI(
api_key=config.VOLC_API_KEY,
base_url=config.VOLC_BASE_URL
)
# ============================================================
# Stage 1: Analyze Materials
# ============================================================
ANALYZE_SYSTEM_PROMPT = """你是一位资深短视频创作总监专精TikTok/抖音爆款内容。
任务:深度分析用户提供的素材和需求,识别产品特性、使用场景、目标人群。
分析维度:
1. 产品/服务核心卖点(从素材中提取视觉特征)
2. 视觉风格特征(颜色、质感、包装)
3. 潜在目标受众
4. 内容调性建议
然后检查是否缺少关键信息如果缺少生成2-5个问题帮助完善需求。
每个问题必须与短视频创作直接相关。
输出严格JSON格式
{
"analysis": "详细分析结果,包括从素材中识别到的视觉元素...",
"detected_info": {
"product": "识别到的产品名称和类型",
"visual_features": ["视觉特征1", "视觉特征2"],
"audience": "推测的目标人群",
"style": "推测的风格"
},
"missing_info": ["缺少的信息1", "缺少的信息2"],
"questions": [
{
"id": "q1",
"text": "问题文字(说明为什么这个问题重要)",
"options": ["选项A", "选项B", "选项C"],
"allow_multiple": true,
"allow_custom": true
}
],
"ready": false
}
如果信息足够ready=truequestions为空数组。
"""
def analyze_materials(
prompt: str,
image_urls: List[str] = None,
asr_text: str = ""
) -> Dict[str, Any]:
"""
Deep analysis of user materials.
Returns analysis text and questions if info is missing.
"""
logger.info("Brain: Analyzing materials...")
# Using Vision Model format (Doubao Vision)
# Input format: messages with content list (text + image_url)
content_parts = [{"type": "text", "text": f"用户需求: {prompt}"}]
if asr_text:
content_parts.append({"type": "text", "text": f"\n视频原声(ASR转写): {asr_text}"})
if image_urls:
content_parts.append({"type": "text", "text": "\n用户上传的素材图片(请仔细分析这些图片中的产品特征):"})
for url in image_urls:
content_parts.append({
"type": "image_url",
"image_url": {"url": url}
})
messages = [
# Note: Some vision models might not support 'system' role with images well,
# but Doubao usually follows standard chat structure.
# If system prompt fails, prepend it to user content.
{"role": "system", "content": ANALYZE_SYSTEM_PROMPT},
{"role": "user", "content": content_parts}
]
try:
# Use Vision Model for Analysis
response = client.chat.completions.create(
model=config.VISION_MODEL_ID,
messages=messages,
temperature=0.7,
max_tokens=4000
)
content = response.choices[0].message.content.strip()
if content.startswith("```"):
parts = content.split("```")
if len(parts) > 1:
content = parts[1]
if content.startswith("json"): content = content[4:]
return json.loads(content)
except Exception as e:
logger.error(f"Brain Analyze Error: {e}")
raise
# ============================================================
# Stage 2: Refine Brief with Answers
# ============================================================
REFINE_SYSTEM_PROMPT = """你是短视频创作总监。
根据原始需求、AI分析结果、用户补充回答整合为完整的创意简报。
注意用户选择的风格偏好如ASMR、剧情、视觉流等必须作为核心创作方向贯穿整个简报。
输出JSON:
{
"brief": {
"product": "产品名称",
"product_visual_description": "产品视觉描述(颜色、形状、包装、质感等,用于后续图片生成)",
"selling_points": ["卖点1", "卖点2"],
"target_audience": "目标人群",
"platform": "投放平台",
"style": "视频风格必须明确如ASMR/剧情/视觉流等)",
"style_requirements": "该风格的具体创作要求如ASMR需要开盖声、质感特写、无人脸等",
"creativity_level": "创意程度",
"reference": "对标账号/竞品",
"user_assets_description": "用户上传素材的描述(用于后续继承)"
},
"creative_summary": "整体创意概述50字以内描述这个视频的核心创意方向",
"ready": true
}
"""
def refine_brief(
original_prompt: str,
analysis: Dict[str, Any],
answers: Dict[str, Any],
image_urls: List[str] = None
) -> Dict[str, Any]:
"""
Integrate user answers into a complete creative brief.
"""
logger.info("Brain: Refining brief with answers...")
user_content = f"""
原始需求: {original_prompt}
AI分析结果: {json.dumps(analysis, ensure_ascii=False)}
用户补充回答: {json.dumps(answers, ensure_ascii=False)}
用户上传的素材URL: {json.dumps(image_urls or [], ensure_ascii=False)}
"""
try:
# Use Text LLM for reasoning/refining if no new images involved
# But to keep it simple, we can stick to BRAIN_MODEL_ID (Doubao Pro)
response = client.chat.completions.create(
model=config.BRAIN_MODEL_ID,
messages=[
{"role": "system", "content": REFINE_SYSTEM_PROMPT},
{"role": "user", "content": user_content}
],
temperature=0.5,
max_tokens=3000
)
content = response.choices[0].message.content.strip()
if content.startswith("```"):
parts = content.split("```")
if len(parts) > 1:
content = parts[1]
if content.startswith("json"): content = content[4:]
return json.loads(content)
except Exception as e:
logger.error(f"Brain Refine Error: {e}")
raise
# ============================================================
# Stage 3: Generate Script
# ============================================================
SCRIPT_SYSTEM_PROMPT = """你是顶级短视频编导,专精{style}风格内容创作。
根据创意简报生成爆款脚本。必须严格遵循用户选择的风格要求。
脚本结构要求:
1. creative_summary: 整体创意概述(这条视频的核心创意是什么)
2. hook: 前3秒钩子设计必须抓眼球符合{style}风格)
3. scenes: 3-8个分镜
4. cta: 结尾行动号召(纯文本字符串)
每个分镜(scene)必须包含:
- id: 分镜编号
- duration: 时长(5/10/15秒符合视频模型参数)
- timeline: 时间轴 (如 "0:00-0:05")
- image_prompt: 【关键】用于AI生图的详细英文prompt必须包含
* 产品的具体视觉描述继承自brief中的product_visual_description
* 8k, hyper-realistic, cinematic lighting
* 色调、环境、构图、焦点
* 风格要求如ASMR需要macro shot, satisfying texture, no human face
- keyframe: {
"color_tone": "色调",
"environment": "环境/背景",
"foreground": "前景元素",
"focus": "视觉焦点",
"subject": "主体描述",
"composition": "构图方式"
}
- camera_movement: 运镜描述slow zoom in, pan left, static
- story_beat: 这个分镜在整体故事中的作用
- voiceover: 旁白文字({style}风格如ASMR应简短或无旁白用音效代替
- sound_design: 音效设计(如:开盖声、水滴声、环境白噪音)
- rhythm: {"change": "保持/加快/放慢", "multiplier": 1.0}
旁白要求:
- 必须连贯,形成完整的叙事
- 符合{style}风格ASMR风格应极简或无旁白
- 每句旁白要能独立成句,但连起来是完整故事
输出严格JSON格式。
"""
def generate_script(
brief: Dict[str, Any],
image_urls: List[str] = None,
regenerate_feedback: str = ""
) -> Dict[str, Any]:
"""
Generate complete video script with scenes.
"""
logger.info("Brain: Generating script...")
style = brief.get("style", "现代广告")
system_prompt = SCRIPT_SYSTEM_PROMPT.replace("{style}", style)
content_parts = [{"type": "text", "text": f"创意简报: {json.dumps(brief, ensure_ascii=False)}"}]
if regenerate_feedback:
content_parts.append({"type": "text", "text": f"\n用户反馈(请据此调整): {regenerate_feedback}"})
if image_urls:
content_parts.append({"type": "text", "text": "\n用户上传的参考素材生成的image_prompt必须参考这些素材中的产品外观:"})
for url in image_urls:
content_parts.append({
"type": "image_url",
"image_url": {"url": url}
})
try:
response = client.chat.completions.create(
model=config.VISION_MODEL_ID, # Use Vision model to see reference images if available
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": content_parts}
],
temperature=0.8,
max_tokens=8000
)
content = response.choices[0].message.content.strip()
if content.startswith("```"):
parts = content.split("```")
if len(parts) > 1:
content = parts[1]
if content.startswith("json"): content = content[4:]
return json.loads(content)
except Exception as e:
logger.error(f"Brain Script Error: {e}")
raise
# ============================================================
# Stage 4: Regenerate Single Scene
# ============================================================
def regenerate_scene(
full_script: Dict[str, Any],
scene_id: int,
feedback: str,
brief: Dict[str, Any] = None
) -> Dict[str, Any]:
"""
Regenerate a single scene based on feedback.
"""
logger.info(f"Brain: Regenerating scene {scene_id}...")
style = brief.get("style", "现代广告") if brief else "现代广告"
system_prompt = f"""你是短视频编导,专精{style}风格。根据用户反馈重新生成指定分镜。
保持与其他分镜的风格连贯性。
image_prompt必须继承产品的视觉描述。
只输出新的scene对象(JSON)。
"""
user_content = f"""
完整脚本: {json.dumps(full_script, ensure_ascii=False)}
创意简报: {json.dumps(brief, ensure_ascii=False) if brief else ""}
需要重新生成的分镜ID: {scene_id}
用户反馈: {feedback}
"""
try:
response = client.chat.completions.create(
model=config.BRAIN_MODEL_ID,
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_content}
],
temperature=0.8,
max_tokens=2000
)
content = response.choices[0].message.content.strip()
if content.startswith("```"):
parts = content.split("```")
if len(parts) > 1:
content = parts[1]
if content.startswith("json"): content = content[4:]
return json.loads(content)
except Exception as e:
logger.error(f"Brain Regenerate Scene Error: {e}")
raise