df900dfd59
gnommo.sh: - Bash wrapper for easy CLI usage - Commands: validate, transcribe, align, render, all - `gnommo.sh -p video1 all` runs full pipeline Slide scaling: - Slides now scale to full frame (1920x1080) - Transparent areas show through to layers below - Positioned at 0,0 for full overlay targetheight percentage: - Supports percentage values like "100%" - Calculates actual height from frame resolution - "100%" on 1080p = 1080px height Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
202 lines
6.3 KiB
Python
202 lines
6.3 KiB
Python
"""Load stage: generate and execute FFmpeg commands."""
|
|
|
|
import subprocess
|
|
from pathlib import Path
|
|
|
|
from .errors import RenderError
|
|
from .models import RenderPlan, SlideEvent, SLIDE_LAYOUTS
|
|
|
|
|
|
def render(plan: RenderPlan, output_path: Path, verbose: bool = False) -> None:
|
|
"""
|
|
Render the final video using FFmpeg.
|
|
|
|
Generates a filter_complex command that:
|
|
1. Scales background video (if present) or creates solid color
|
|
2. Overlays talking head at configured position
|
|
3. Overlays slides at their configured positions with time-based enable
|
|
"""
|
|
# Ensure output directory exists
|
|
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
# Build and execute FFmpeg command
|
|
cmd = build_ffmpeg_command(plan, output_path)
|
|
|
|
if verbose:
|
|
print("FFmpeg command:")
|
|
print(" ".join(cmd))
|
|
print()
|
|
|
|
result = subprocess.run(
|
|
cmd,
|
|
capture_output=True,
|
|
text=True,
|
|
)
|
|
|
|
if result.returncode != 0:
|
|
raise RenderError(
|
|
"FFmpeg rendering failed",
|
|
command=" ".join(cmd),
|
|
stderr=result.stderr,
|
|
)
|
|
|
|
|
|
def build_ffmpeg_command(plan: RenderPlan, output_path: Path) -> list[str]:
|
|
"""Build the complete FFmpeg command as a list of arguments."""
|
|
cmd = ["ffmpeg", "-y"] # -y to overwrite output
|
|
|
|
# Resolve paths to absolute
|
|
project_path = plan.project_path.resolve()
|
|
output_path = output_path.resolve()
|
|
|
|
# Input: talking head video
|
|
talking_head_path = project_path / plan.talking_head.file
|
|
cmd.extend(["-i", str(talking_head_path)])
|
|
|
|
# Input: background video (if specified)
|
|
has_background = bool(plan.config.background_video)
|
|
if has_background:
|
|
bg_path = project_path / plan.config.background_video
|
|
cmd.extend(["-i", str(bg_path)])
|
|
|
|
# Input: slide images (from slides_dir, same directory as slides.json)
|
|
slides_dir = plan.slides_dir.resolve() if plan.slides_dir else project_path / "media" / "slides"
|
|
slide_inputs: list[str] = [] # Track which slides we've added
|
|
|
|
for event in plan.slide_events:
|
|
if event.slide_id not in slide_inputs:
|
|
image_path = slides_dir / event.slide_def.image
|
|
cmd.extend(["-i", str(image_path)])
|
|
slide_inputs.append(event.slide_id)
|
|
|
|
# Build filter_complex
|
|
filter_complex = build_filter_complex(plan, has_background, slide_inputs)
|
|
cmd.extend(["-filter_complex", filter_complex])
|
|
|
|
# Map output video and audio
|
|
cmd.extend(["-map", "[vout]"])
|
|
cmd.extend(["-map", "0:a"]) # Audio from talking head
|
|
|
|
# Output settings
|
|
cmd.extend([
|
|
"-t", str(plan.total_duration), # Limit output duration
|
|
"-c:v", "libx264",
|
|
"-preset", "fast",
|
|
"-crf", "23",
|
|
"-c:a", "aac",
|
|
"-b:a", "192k",
|
|
"-r", str(plan.config.fps),
|
|
str(output_path),
|
|
])
|
|
|
|
return cmd
|
|
|
|
|
|
def build_filter_complex(
|
|
plan: RenderPlan,
|
|
has_background: bool,
|
|
slide_inputs: list[str],
|
|
) -> str:
|
|
"""
|
|
Build the filter_complex string for FFmpeg.
|
|
|
|
Layer structure:
|
|
- Layer 1: Background (solid color or video)
|
|
- Layer 2: Talking head
|
|
- Layer 3: Slides (with time-based enable)
|
|
"""
|
|
width, height = plan.config.resolution
|
|
filters: list[str] = []
|
|
|
|
# Input indices:
|
|
# 0 = talking head
|
|
# 1 = background (if present)
|
|
# 2+ = slides
|
|
talking_head_idx = 0
|
|
bg_idx = 1 if has_background else None
|
|
slide_start_idx = 2 if has_background else 1
|
|
|
|
# Create base layer (background)
|
|
if has_background:
|
|
filters.append(f"[{bg_idx}:v]scale={width}:{height}:force_original_aspect_ratio=increase,"
|
|
f"crop={width}:{height}[bg]")
|
|
base_label = "bg"
|
|
else:
|
|
# Create solid color background
|
|
filters.append(f"color=c=black:s={width}x{height}:r={plan.config.fps}[bg]")
|
|
base_label = "bg"
|
|
|
|
# Scale and position talking head
|
|
th_config = plan.config.talking_head
|
|
if th_config.target_height > 0:
|
|
th_height = th_config.target_height
|
|
else:
|
|
# Percentage-based: calculate from frame height
|
|
th_height = int(height * th_config.target_height_percent)
|
|
|
|
filters.append(
|
|
f"[{talking_head_idx}:v]scale=-1:{th_height}[head]"
|
|
)
|
|
|
|
# Overlay talking head on background
|
|
filters.append(
|
|
f"[{base_label}][head]overlay=x={th_config.x}:y={th_config.y}[base]"
|
|
)
|
|
|
|
current_label = "base"
|
|
|
|
# Add slide overlays with time-based enable
|
|
# Slides are scaled to full frame - transparency shows layers below
|
|
for i, event in enumerate(plan.slide_events):
|
|
slide_idx = slide_start_idx + slide_inputs.index(event.slide_id)
|
|
|
|
# Scale slide to full frame size (transparent areas show through)
|
|
slide_label = f"s{i}"
|
|
filters.append(
|
|
f"[{slide_idx}:v]scale={width}:{height}:"
|
|
f"force_original_aspect_ratio=decrease,pad={width}:{height}:(ow-iw)/2:(oh-ih)/2:color=0x00000000[{slide_label}]"
|
|
)
|
|
|
|
# Overlay at 0,0 (full frame) with time-based enable
|
|
next_label = f"v{i}" if i < len(plan.slide_events) - 1 else "vout"
|
|
enable_expr = f"between(t,{event.start_time:.3f},{event.end_time:.3f})"
|
|
|
|
filters.append(
|
|
f"[{current_label}][{slide_label}]overlay="
|
|
f"x=0:y=0:"
|
|
f"enable='{enable_expr}'[{next_label}]"
|
|
)
|
|
|
|
current_label = next_label
|
|
|
|
# If no slides, just rename base to vout
|
|
if not plan.slide_events:
|
|
filters.append(f"[{current_label}]copy[vout]")
|
|
|
|
return ";".join(filters)
|
|
|
|
|
|
def generate_ffmpeg_command_string(plan: RenderPlan, output_path: Path) -> str:
|
|
"""Generate a human-readable FFmpeg command string (for debugging)."""
|
|
cmd = build_ffmpeg_command(plan, output_path)
|
|
|
|
# Format nicely with line breaks
|
|
result = []
|
|
i = 0
|
|
while i < len(cmd):
|
|
if cmd[i] == "-filter_complex":
|
|
result.append(f" -filter_complex \"\n {cmd[i+1].replace(';', ';' + chr(10) + ' ')}\n \"")
|
|
i += 2
|
|
elif cmd[i].startswith("-"):
|
|
if i + 1 < len(cmd) and not cmd[i + 1].startswith("-"):
|
|
result.append(f" {cmd[i]} {cmd[i+1]}")
|
|
i += 2
|
|
else:
|
|
result.append(f" {cmd[i]}")
|
|
i += 1
|
|
else:
|
|
result.append(f" {cmd[i]}")
|
|
i += 1
|
|
|
|
return "ffmpeg \\\n" + " \\\n".join(result)
|