Refactor CLI and add preprocessing pipeline

- New CLI structure: -p project, -a action (required flags)
- Add -i import, -f force, -v verbose, --dry-run, --no-cache options
- Add preprocessor.py with chroma key filter (ProRes 4444 output)
- Support background images from shared_assets folder
- Support video metadata JSON files (talkinghead.json)
- Add validation for preprocessed output before render
- Update gnommo.sh with import command and new CLI interface
- Fix Python 3.9 compatibility (Optional[] instead of | None)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-01-12 15:45:19 +01:00
parent df900dfd59
commit 93fa820275
9 changed files with 763 additions and 287 deletions
+29 -8
View File
@@ -50,14 +50,23 @@ def build_ffmpeg_command(plan: RenderPlan, output_path: Path) -> list[str]:
output_path = output_path.resolve()
# Input: talking head video
talking_head_path = project_path / plan.talking_head.file
# Use resolved path if available, otherwise construct from file
talking_head_path = plan.talking_head_path or (project_path / plan.talking_head.file)
cmd.extend(["-i", str(talking_head_path)])
# Input: background video (if specified)
has_background = bool(plan.config.background_video)
# Input: background image/video (if specified)
bg_file = plan.config.background or plan.config.background_video
has_background = bool(bg_file)
bg_is_image = False
if has_background:
bg_path = project_path / plan.config.background_video
# Try project folder first, then parent (for shared_assets)
bg_path = project_path / bg_file
if not bg_path.exists():
bg_path = project_path.parent / bg_file
cmd.extend(["-i", str(bg_path)])
# Check if background is an image
image_extensions = {".png", ".jpg", ".jpeg", ".gif", ".bmp", ".tiff", ".webp"}
bg_is_image = bg_path.suffix.lower() in image_extensions
# Input: slide images (from slides_dir, same directory as slides.json)
slides_dir = plan.slides_dir.resolve() if plan.slides_dir else project_path / "media" / "slides"
@@ -70,7 +79,7 @@ def build_ffmpeg_command(plan: RenderPlan, output_path: Path) -> list[str]:
slide_inputs.append(event.slide_id)
# Build filter_complex
filter_complex = build_filter_complex(plan, has_background, slide_inputs)
filter_complex = build_filter_complex(plan, has_background, slide_inputs, bg_is_image)
cmd.extend(["-filter_complex", filter_complex])
# Map output video and audio
@@ -96,12 +105,13 @@ def build_filter_complex(
plan: RenderPlan,
has_background: bool,
slide_inputs: list[str],
bg_is_image: bool = False,
) -> str:
"""
Build the filter_complex string for FFmpeg.
Layer structure:
- Layer 1: Background (solid color or video)
- Layer 1: Background (solid color, image, or video)
- Layer 2: Talking head
- Layer 3: Slides (with time-based enable)
"""
@@ -118,8 +128,19 @@ def build_filter_complex(
# Create base layer (background)
if has_background:
filters.append(f"[{bg_idx}:v]scale={width}:{height}:force_original_aspect_ratio=increase,"
f"crop={width}:{height}[bg]")
if bg_is_image:
# For images: loop to create video stream, then scale
filters.append(
f"[{bg_idx}:v]loop=loop=-1:size=1:start=0,"
f"scale={width}:{height}:force_original_aspect_ratio=increase,"
f"crop={width}:{height},fps={plan.config.fps}[bg]"
)
else:
# For videos: just scale
filters.append(
f"[{bg_idx}:v]scale={width}:{height}:force_original_aspect_ratio=increase,"
f"crop={width}:{height}[bg]"
)
base_label = "bg"
else:
# Create solid color background