""" MatchMe Studio - Brain Module (Multi-stage Analysis & Script Generation) """ import json import logging from typing import Dict, Any, List, Optional from openai import OpenAI import config logger = logging.getLogger(__name__) # Use Volcengine (Doubao) via OpenAI Compatible Interface client = OpenAI( api_key=config.VOLC_API_KEY, base_url=config.VOLC_BASE_URL ) # ============================================================ # Stage 1: Analyze Materials # ============================================================ ANALYZE_SYSTEM_PROMPT = """你是一位资深短视频创作总监,专精TikTok/抖音爆款内容。 任务:深度分析用户提供的素材和需求,识别产品特性、使用场景、目标人群。 分析维度: 1. 产品/服务核心卖点(从素材中提取视觉特征) 2. 视觉风格特征(颜色、质感、包装) 3. 潜在目标受众 4. 内容调性建议 然后检查是否缺少关键信息,如果缺少,生成2-5个问题帮助完善需求。 每个问题必须与短视频创作直接相关。 输出严格JSON格式: { "analysis": "详细分析结果,包括从素材中识别到的视觉元素...", "detected_info": { "product": "识别到的产品名称和类型", "visual_features": ["视觉特征1", "视觉特征2"], "audience": "推测的目标人群", "style": "推测的风格" }, "missing_info": ["缺少的信息1", "缺少的信息2"], "questions": [ { "id": "q1", "text": "问题文字(说明为什么这个问题重要)", "options": ["选项A", "选项B", "选项C"], "allow_multiple": true, "allow_custom": true } ], "ready": false } 如果信息足够,ready=true,questions为空数组。 """ def analyze_materials( prompt: str, image_urls: List[str] = None, asr_text: str = "" ) -> Dict[str, Any]: """ Deep analysis of user materials. Returns analysis text and questions if info is missing. """ logger.info("Brain: Analyzing materials...") # Using Vision Model format (Doubao Vision) # Input format: messages with content list (text + image_url) content_parts = [{"type": "text", "text": f"用户需求: {prompt}"}] if asr_text: content_parts.append({"type": "text", "text": f"\n视频原声(ASR转写): {asr_text}"}) if image_urls: content_parts.append({"type": "text", "text": "\n用户上传的素材图片(请仔细分析这些图片中的产品特征):"}) for url in image_urls: content_parts.append({ "type": "image_url", "image_url": {"url": url} }) messages = [ # Note: Some vision models might not support 'system' role with images well, # but Doubao usually follows standard chat structure. # If system prompt fails, prepend it to user content. {"role": "system", "content": ANALYZE_SYSTEM_PROMPT}, {"role": "user", "content": content_parts} ] try: # Use Vision Model for Analysis response = client.chat.completions.create( model=config.VISION_MODEL_ID, messages=messages, temperature=0.7, max_tokens=4000 ) content = response.choices[0].message.content.strip() if content.startswith("```"): parts = content.split("```") if len(parts) > 1: content = parts[1] if content.startswith("json"): content = content[4:] return json.loads(content) except Exception as e: logger.error(f"Brain Analyze Error: {e}") raise # ============================================================ # Stage 2: Refine Brief with Answers # ============================================================ REFINE_SYSTEM_PROMPT = """你是短视频创作总监。 根据原始需求、AI分析结果、用户补充回答,整合为完整的创意简报。 注意:用户选择的风格偏好(如ASMR、剧情、视觉流等)必须作为核心创作方向贯穿整个简报。 输出JSON: { "brief": { "product": "产品名称", "product_visual_description": "产品视觉描述(颜色、形状、包装、质感等,用于后续图片生成)", "selling_points": ["卖点1", "卖点2"], "target_audience": "目标人群", "platform": "投放平台", "style": "视频风格(必须明确,如ASMR/剧情/视觉流等)", "style_requirements": "该风格的具体创作要求(如ASMR需要:开盖声、质感特写、无人脸等)", "creativity_level": "创意程度", "reference": "对标账号/竞品", "user_assets_description": "用户上传素材的描述(用于后续继承)" }, "creative_summary": "整体创意概述(50字以内,描述这个视频的核心创意方向)", "ready": true } """ def refine_brief( original_prompt: str, analysis: Dict[str, Any], answers: Dict[str, Any], image_urls: List[str] = None ) -> Dict[str, Any]: """ Integrate user answers into a complete creative brief. """ logger.info("Brain: Refining brief with answers...") user_content = f""" 原始需求: {original_prompt} AI分析结果: {json.dumps(analysis, ensure_ascii=False)} 用户补充回答: {json.dumps(answers, ensure_ascii=False)} 用户上传的素材URL: {json.dumps(image_urls or [], ensure_ascii=False)} """ try: # Use Text LLM for reasoning/refining if no new images involved # But to keep it simple, we can stick to BRAIN_MODEL_ID (Doubao Pro) response = client.chat.completions.create( model=config.BRAIN_MODEL_ID, messages=[ {"role": "system", "content": REFINE_SYSTEM_PROMPT}, {"role": "user", "content": user_content} ], temperature=0.5, max_tokens=3000 ) content = response.choices[0].message.content.strip() if content.startswith("```"): parts = content.split("```") if len(parts) > 1: content = parts[1] if content.startswith("json"): content = content[4:] return json.loads(content) except Exception as e: logger.error(f"Brain Refine Error: {e}") raise # ============================================================ # Stage 3: Generate Script # ============================================================ SCRIPT_SYSTEM_PROMPT = """你是顶级短视频编导,专精{style}风格内容创作。 根据创意简报生成爆款脚本。必须严格遵循用户选择的风格要求。 脚本结构要求: 1. creative_summary: 整体创意概述(这条视频的核心创意是什么) 2. hook: 前3秒钩子设计(必须抓眼球,符合{style}风格) 3. scenes: 3-8个分镜 4. cta: 结尾行动号召(纯文本字符串) 每个分镜(scene)必须包含: - id: 分镜编号 - duration: 时长(5/10/15秒,符合视频模型参数) - timeline: 时间轴 (如 "0:00-0:05") - image_prompt: 【关键】用于AI生图的详细英文prompt,必须包含: * 产品的具体视觉描述(继承自brief中的product_visual_description) * 8k, hyper-realistic, cinematic lighting * 色调、环境、构图、焦点 * 风格要求(如ASMR需要:macro shot, satisfying texture, no human face) - keyframe: { "color_tone": "色调", "environment": "环境/背景", "foreground": "前景元素", "focus": "视觉焦点", "subject": "主体描述", "composition": "构图方式" } - camera_movement: 运镜描述(如:slow zoom in, pan left, static) - story_beat: 这个分镜在整体故事中的作用 - voiceover: 旁白文字({style}风格,如ASMR应简短或无旁白,用音效代替) - sound_design: 音效设计(如:开盖声、水滴声、环境白噪音) - rhythm: {"change": "保持/加快/放慢", "multiplier": 1.0} 旁白要求: - 必须连贯,形成完整的叙事 - 符合{style}风格(ASMR风格应极简或无旁白) - 每句旁白要能独立成句,但连起来是完整故事 输出严格JSON格式。 """ def generate_script( brief: Dict[str, Any], image_urls: List[str] = None, regenerate_feedback: str = "" ) -> Dict[str, Any]: """ Generate complete video script with scenes. """ logger.info("Brain: Generating script...") style = brief.get("style", "现代广告") system_prompt = SCRIPT_SYSTEM_PROMPT.replace("{style}", style) content_parts = [{"type": "text", "text": f"创意简报: {json.dumps(brief, ensure_ascii=False)}"}] if regenerate_feedback: content_parts.append({"type": "text", "text": f"\n用户反馈(请据此调整): {regenerate_feedback}"}) if image_urls: content_parts.append({"type": "text", "text": "\n用户上传的参考素材(生成的image_prompt必须参考这些素材中的产品外观):"}) for url in image_urls: content_parts.append({ "type": "image_url", "image_url": {"url": url} }) try: response = client.chat.completions.create( model=config.VISION_MODEL_ID, # Use Vision model to see reference images if available messages=[ {"role": "system", "content": system_prompt}, {"role": "user", "content": content_parts} ], temperature=0.8, max_tokens=8000 ) content = response.choices[0].message.content.strip() if content.startswith("```"): parts = content.split("```") if len(parts) > 1: content = parts[1] if content.startswith("json"): content = content[4:] return json.loads(content) except Exception as e: logger.error(f"Brain Script Error: {e}") raise # ============================================================ # Stage 4: Regenerate Single Scene # ============================================================ def regenerate_scene( full_script: Dict[str, Any], scene_id: int, feedback: str, brief: Dict[str, Any] = None ) -> Dict[str, Any]: """ Regenerate a single scene based on feedback. """ logger.info(f"Brain: Regenerating scene {scene_id}...") style = brief.get("style", "现代广告") if brief else "现代广告" system_prompt = f"""你是短视频编导,专精{style}风格。根据用户反馈重新生成指定分镜。 保持与其他分镜的风格连贯性。 image_prompt必须继承产品的视觉描述。 只输出新的scene对象(JSON)。 """ user_content = f""" 完整脚本: {json.dumps(full_script, ensure_ascii=False)} 创意简报: {json.dumps(brief, ensure_ascii=False) if brief else "无"} 需要重新生成的分镜ID: {scene_id} 用户反馈: {feedback} """ try: response = client.chat.completions.create( model=config.BRAIN_MODEL_ID, messages=[ {"role": "system", "content": system_prompt}, {"role": "user", "content": user_content} ], temperature=0.8, max_tokens=2000 ) content = response.choices[0].message.content.strip() if content.startswith("```"): parts = content.split("```") if len(parts) > 1: content = parts[1] if content.startswith("json"): content = content[4:] return json.loads(content) except Exception as e: logger.error(f"Brain Regenerate Scene Error: {e}") raise