Files
video-flow/modules/text_renderer.py
2026-01-09 14:09:16 +08:00

389 lines
15 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
通用文本渲染引擎
支持原子化设计参数,供上游 Design Agent 灵活调用
"""
import os
import hashlib
import logging
from pathlib import Path
from typing import Dict, Any, List, Tuple, Union, Optional
from PIL import Image, ImageDraw, ImageFont, ImageFilter, ImageColor
import config
from modules.styles import get_style
logger = logging.getLogger(__name__)
# 缓存目录
CACHE_DIR = config.TEMP_DIR / "text_renderer_cache"
CACHE_DIR.mkdir(exist_ok=True)
class TextRenderer:
"""
通用文本渲染器
基于原子化参数渲染文本图片 (PNG)
"""
def __init__(self):
self.default_font_path = self._resolve_font_path(None)
def _resolve_font_path(self, font_family: Optional[str]) -> str:
"""解析字体路径,支持多级回退"""
candidates = []
if font_family:
# 1. 尝试作为绝对路径
candidates.append(font_family)
# 2. 尝试在 assets/fonts 下查找
candidates.append(str(config.FONTS_DIR / font_family))
if not font_family.endswith(".ttf") and not font_family.endswith(".otf"):
candidates.append(str(config.FONTS_DIR / f"{font_family}.ttf"))
candidates.append(str(config.FONTS_DIR / f"{font_family}.otf"))
# 3. 预设项目字体
candidates.extend([
str(config.FONTS_DIR / "SmileySans-Oblique.ttf"),
str(config.FONTS_DIR / "AlibabaPuHuiTi-Bold.ttf"),
str(config.FONTS_DIR / "AlibabaPuHuiTi-Regular.ttf"),
str(config.FONTS_DIR / "NotoSansSC-Bold.otf"), # 假如有效
])
# 4. 系统字体回退
candidates.extend([
"/System/Library/Fonts/PingFang.ttc",
"/System/Library/Fonts/STHeiti Medium.ttc",
"C:/Windows/Fonts/msyh.ttc",
"C:/Windows/Fonts/simhei.ttf",
])
for path in candidates:
if path and os.path.exists(path):
# 简单验证文件大小
try:
if os.path.getsize(path) > 10000:
return path
except:
continue
logger.warning("No valid font found, using default load_default()")
return None
def _get_font(self, font_path: str, size: int) -> ImageFont.FreeTypeFont:
try:
if font_path:
return ImageFont.truetype(font_path, size)
except Exception as e:
logger.warning(f"Failed to load font {font_path}: {e}")
return ImageFont.load_default()
def _parse_color(self, color: Union[str, Tuple]) -> Tuple[int, int, int, int]:
"""解析颜色为 RGBA"""
if isinstance(color, str):
if color.startswith("#"):
rgb = ImageColor.getrgb(color)
return rgb + (255,)
# TODO: 支持 'rgba(r,g,b,a)' 格式
if isinstance(color, tuple):
if len(color) == 3:
return color + (255,)
return color
return (0, 0, 0, 255)
def _wrap_text_to_width(self, text: str, font: ImageFont.FreeTypeFont, max_width: int) -> str:
"""
将文本按最大宽度自动换行(支持中英文混排)。
- 保留原始换行符为段落边界
- 英文优先按空格断词;中文按字符贪心换行
"""
try:
mw = int(max_width or 0)
except Exception:
mw = 0
if mw <= 0:
return text
# 兼容:去掉末尾多余空行
raw_paras = (text or "").split("\n")
out_lines: List[str] = []
# 1x1 dummy draw 用于测量
dummy_draw = ImageDraw.Draw(Image.new("RGBA", (1, 1)))
def text_w(s: str) -> float:
try:
return float(dummy_draw.textlength(s, font=font))
except Exception:
bbox = dummy_draw.textbbox((0, 0), s, font=font)
return float((bbox[2] - bbox[0]) if bbox else 0)
for para in raw_paras:
p = (para or "").rstrip()
if not p:
out_lines.append("")
continue
# 英文/混排:尝试按空格分词,否则按字符
use_words = (" " in p)
tokens = p.split(" ") if use_words else list(p)
cur = ""
for tok in tokens:
cand = (cur + (" " if (use_words and cur) else "") + tok) if cur else tok
if text_w(cand) <= mw:
cur = cand
continue
# 当前行放不下:先落一行
if cur:
out_lines.append(cur)
cur = tok
else:
# 单 token 超宽:强制按字符拆
if use_words:
chars = list(tok)
else:
chars = [tok]
buf = ""
for ch in chars:
cand2 = buf + ch
if text_w(cand2) <= mw or not buf:
buf = cand2
else:
out_lines.append(buf)
buf = ch
cur = buf
if cur:
out_lines.append(cur)
# 去掉尾部空行(保持中间空行)
while out_lines and out_lines[-1] == "":
out_lines.pop()
return "\n".join(out_lines)
def render(self, text: str, style: Union[Dict[str, Any], str], cache: bool = True) -> str:
"""
渲染文本并返回图片路径
style 结构:
{
"font_family": str,
"font_size": int,
"font_color": str,
"stroke": [{"color": str, "width": int}, ...],
"shadow": {"color": str, "blur": int, "offset": [x, y], "opacity": float},
"background": {
"type": "box", "color": str/list, "corner_radius": int, "padding": [t, r, b, l]
}
}
"""
# 0. 解析样式
if isinstance(style, str):
style = get_style(style)
# 1. 缓存检查
cache_key = hashlib.md5(f"{text}_{str(style)}".encode()).hexdigest()
if cache:
cache_path = CACHE_DIR / f"{cache_key}.png"
if cache_path.exists():
return str(cache_path)
# 2. 解析基本参数
font_path = self._resolve_font_path(style.get("font_family"))
font_size = style.get("font_size", 60)
font = self._get_font(font_path, font_size)
font_color = self._parse_color(style.get("font_color", "#FFFFFF"))
bold = bool(style.get("bold", False))
italic = bool(style.get("italic", False))
underline = bool(style.get("underline", False))
# 3. 自动换行(可选)
max_width = style.get("max_width") or style.get("maxWidth") or style.get("text_box_width")
try:
max_width = int(max_width) if max_width is not None else 0
except Exception:
max_width = 0
if max_width > 0:
text = self._wrap_text_to_width(text, font, max_width)
# 4. 测量文本尺寸(支持多行)
dummy_draw = ImageDraw.Draw(Image.new("RGBA", (1, 1)))
try:
bbox = dummy_draw.multiline_textbbox((0, 0), text, font=font, spacing=int(font_size * 0.25), align="center")
except Exception:
bbox = dummy_draw.textbbox((0, 0), text, font=font)
text_w = (bbox[2] - bbox[0]) if bbox else 0
text_h = (bbox[3] - bbox[1]) if bbox else 0
# 5. 计算总尺寸 (包含 padding, stroke, shadow)
strokes = style.get("stroke", [])
if isinstance(strokes, dict): strokes = [strokes] # 兼容旧格式
max_stroke = 0
for s in strokes:
max_stroke = max(max_stroke, s.get("width", 0))
shadow = style.get("shadow", {})
shadow_blur = shadow.get("blur", 0)
shadow_offset = shadow.get("offset", [0, 0])
bg = style.get("background", {})
padding = bg.get("padding", [0, 0, 0, 0])
if isinstance(padding, int): padding = [padding] * 4
if len(padding) == 2: padding = [padding[0], padding[1], padding[0], padding[1]] # v, h -> t, r, b, l
# 内容区域尺寸 (文本 + padding)
content_w = text_w + padding[1] + padding[3]
content_h = text_h + padding[0] + padding[2]
# 扩展区域 (描边 + 阴影)
extra_margin = max_stroke + shadow_blur + max(abs(shadow_offset[0]), abs(shadow_offset[1])) + 10
canvas_w = content_w + extra_margin * 2
canvas_h = content_h + extra_margin * 2
# 6. 创建画布
img = Image.new("RGBA", (int(canvas_w), int(canvas_h)), (0, 0, 0, 0))
draw = ImageDraw.Draw(img)
# 锚点位置 (文本中心点)
center_x = canvas_w // 2
center_y = canvas_h // 2
# 7. 绘制顺序: 阴影 -> 背景 -> 描边 -> 文本
# --- 绘制阴影 (针对整个块) ---
if shadow:
shadow_color = self._parse_color(shadow.get("color", "#000000"))
opacity = shadow.get("opacity", 0.5)
shadow_color = (shadow_color[0], shadow_color[1], shadow_color[2], int(255 * opacity))
# 临时画布绘制形状用于生成阴影
shadow_layer = Image.new("RGBA", (int(canvas_w), int(canvas_h)), (0, 0, 0, 0))
shadow_draw = ImageDraw.Draw(shadow_layer)
# 如果有背景,阴影跟随背景形状;否则跟随文字
if bg and bg.get("type") != "none":
self._draw_background(shadow_draw, bg, center_x, center_y, content_w, content_h, shadow_color)
else:
# 文字阴影
txt_x = center_x - text_w / 2
txt_y = center_y - text_h / 2
# 多行阴影
try:
shadow_draw.multiline_text((txt_x, txt_y), text, font=font, fill=shadow_color, spacing=int(font_size * 0.25), align="center")
except Exception:
shadow_draw.text((txt_x, txt_y), text, font=font, fill=shadow_color)
# 描边阴影
for s in strokes:
width = s.get("width", 0)
# 简单模拟描边阴影:多次绘制
# (略: 完整描边阴影开销大,暂只做文字阴影)
# 应用模糊
if shadow_blur > 0:
shadow_layer = shadow_layer.filter(ImageFilter.GaussianBlur(shadow_blur))
# 应用偏移
final_shadow = Image.new("RGBA", (int(canvas_w), int(canvas_h)), (0, 0, 0, 0))
final_shadow.paste(shadow_layer, (int(shadow_offset[0]), int(shadow_offset[1])), mask=shadow_layer)
img = Image.alpha_composite(final_shadow, img)
draw = ImageDraw.Draw(img) # 重置 draw
# --- 绘制背景 ---
if bg and bg.get("type") in ["box", "circle"]:
bg_color = self._parse_color(bg.get("color", "#000000"))
# TODO: 支持渐变背景
self._draw_background(draw, bg, center_x, center_y, content_w, content_h, bg_color)
# --- 绘制描边 (仅针对文字) ---
# 从外向内绘制
txt_x = center_x - text_w / 2
txt_y = center_y - text_h / 2
for s in reversed(strokes):
color = self._parse_color(s.get("color", "#000000"))
width = s.get("width", 0)
if width > 0:
# 通过偏移模拟描边 (Pillow stroke_width 效果一般,但这里先用原生参数)
try:
draw.multiline_text((txt_x, txt_y), text, font=font, fill=color, spacing=int(font_size * 0.25), align="center", stroke_width=width, stroke_fill=color)
except Exception:
draw.text((txt_x, txt_y), text, font=font, fill=color, stroke_width=width, stroke_fill=color)
# --- 绘制文字 ---
# italic通过仿射变换做简单斜体先绘制到单独图层再 shear
# bold通过多次微小偏移叠加模拟加粗比改 stroke 更接近“字重”)
if italic:
text_layer = Image.new("RGBA", img.size, (0, 0, 0, 0))
text_draw = ImageDraw.Draw(text_layer)
if bold:
for dx in (0, 1):
try:
text_draw.multiline_text((txt_x + dx, txt_y), text, font=font, fill=font_color, spacing=int(font_size * 0.25), align="center")
except Exception:
text_draw.text((txt_x + dx, txt_y), text, font=font, fill=font_color)
else:
try:
text_draw.multiline_text((txt_x, txt_y), text, font=font, fill=font_color, spacing=int(font_size * 0.25), align="center")
except Exception:
text_draw.text((txt_x, txt_y), text, font=font, fill=font_color)
shear = 0.22 # 经验值:适中倾斜
text_layer = text_layer.transform(
text_layer.size,
Image.AFFINE,
(1, shear, 0, 0, 1, 0),
resample=Image.BICUBIC
)
img = Image.alpha_composite(img, text_layer)
draw = ImageDraw.Draw(img)
else:
if bold:
for dx in (0, 1):
try:
draw.multiline_text((txt_x + dx, txt_y), text, font=font, fill=font_color, spacing=int(font_size * 0.25), align="center")
except Exception:
draw.text((txt_x + dx, txt_y), text, font=font, fill=font_color)
else:
try:
draw.multiline_text((txt_x, txt_y), text, font=font, fill=font_color, spacing=int(font_size * 0.25), align="center")
except Exception:
draw.text((txt_x, txt_y), text, font=font, fill=font_color)
# underline在文本底部画线与字号相关
if underline:
line_y = txt_y + text_h + max(2, int(font_size * 0.08))
line_th = max(2, int(font_size * 0.06))
draw.rectangle([txt_x, line_y, txt_x + text_w, line_y + line_th], fill=font_color)
# 7. 裁剪多余透明区域
bbox = img.getbbox()
if bbox:
img = img.crop(bbox)
# 8. 保存
output_path = str(CACHE_DIR / f"{cache_key}.png")
img.save(output_path)
logger.info(f"Rendered text: {text} -> {output_path}")
return output_path
def _draw_background(self, draw, bg, cx, cy, w, h, color):
"""绘制背景形状"""
corner_radius = bg.get("corner_radius", 0)
x0 = cx - w / 2
y0 = cy - h / 2
x1 = cx + w / 2
y1 = cy + h / 2
if bg.get("type") == "box":
draw.rounded_rectangle([x0, y0, x1, y1], radius=corner_radius, fill=color)
elif bg.get("type") == "circle":
draw.ellipse([x0, y0, x1, y1], fill=color)
# 全局单例
renderer = TextRenderer()