Add gnommo.sh wrapper and fix slide/scaling issues

gnommo.sh: - Bash wrapper for easy CLI usage - Commands: validate, transcribe, align, render, all - `gnommo.sh -p video1 all` runs full pipeline Slide scaling: - Slides now scale to full frame (1920x1080) - Transparent areas show through to layers below - Positioned at 0,0 for full overlay targetheight percentage: - Supports percentage values like "100%" - Calculates actual height from frame resolution - "100%" on 1080p = 1080px height Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-12 15:13:33 +01:00
parent 216131e072
commit df900dfd59
4 changed files with 184 additions and 14 deletions
@@ -0,0 +1,157 @@
+#!/bin/bash
+#
+# GnommoEditor - Code-first video editing pipeline
+#
+# Usage:
+#   gnommo.sh -p <project>              Render project
+#   gnommo.sh -p <project> validate     Validate only
+#   gnommo.sh -p <project> transcribe   Transcribe video
+#   gnommo.sh -p <project> align        Align markers to transcript
+#   gnommo.sh -p <project> all          Full pipeline: transcribe → align → render
+#
+
+set -e
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+VENV_PYTHON="$SCRIPT_DIR/venv/bin/python"
+
+# Check for venv
+if [[ ! -f "$VENV_PYTHON" ]]; then
+    echo "Error: Virtual environment not found at $SCRIPT_DIR/venv"
+    echo "Create it with: python -m venv venv && ./venv/bin/pip install openai-whisper"
+    exit 1
+fi
+
+# Parse arguments
+PROJECT=""
+COMMAND="render"
+VERBOSE=""
+
+usage() {
+    echo "Usage: gnommo.sh -p <project> [command] [options]"
+    echo ""
+    echo "Commands:"
+    echo "  render      Render video (default)"
+    echo "  validate    Validate project only"
+    echo "  transcribe  Transcribe video audio"
+    echo "  align       Align manuscript to transcript"
+    echo "  all         Full pipeline: transcribe → align → render"
+    echo ""
+    echo "Options:"
+    echo "  -p <dir>    Project directory (required)"
+    echo "  -v          Verbose output"
+    echo "  -h          Show this help"
+    echo ""
+    echo "Examples:"
+    echo "  gnommo.sh -p video1              # Render video1 project"
+    echo "  gnommo.sh -p video1 validate     # Validate only"
+    echo "  gnommo.sh -p video1 all          # Full pipeline"
+    exit 0
+}
+
+while [[ $# -gt 0 ]]; do
+    case $1 in
+        -p|--project)
+            PROJECT="$2"
+            shift 2
+            ;;
+        -v|--verbose)
+            VERBOSE="--verbose"
+            shift
+            ;;
+        -h|--help)
+            usage
+            ;;
+        validate|render|transcribe|align|all)
+            COMMAND="$1"
+            shift
+            ;;
+        *)
+            echo "Unknown option: $1"
+            usage
+            ;;
+    esac
+done
+
+# Validate project argument
+if [[ -z "$PROJECT" ]]; then
+    echo "Error: Project directory required (-p <project>)"
+    echo ""
+    usage
+fi
+
+if [[ ! -d "$PROJECT" ]]; then
+    echo "Error: Project directory not found: $PROJECT"
+    exit 1
+fi
+
+if [[ ! -f "$PROJECT/project.json" ]]; then
+    echo "Error: project.json not found in $PROJECT"
+    exit 1
+fi
+
+# Run commands
+run_gnommo() {
+    "$VENV_PYTHON" -m gnommo "$@"
+}
+
+case $COMMAND in
+    validate)
+        echo "=== Validating $PROJECT ==="
+        run_gnommo validate "$PROJECT"
+        ;;
+
+    transcribe)
+        echo "=== Transcribing $PROJECT ==="
+        VIDEO=$(find "$PROJECT/media" -name "*.mov" -o -name "*.mp4" | head -1)
+        if [[ -z "$VIDEO" ]]; then
+            echo "Error: No video file found in $PROJECT/media/"
+            exit 1
+        fi
+        run_gnommo transcribe "$VIDEO"
+        ;;
+
+    align)
+        echo "=== Aligning $PROJECT ==="
+        run_gnommo align "$PROJECT"
+        ;;
+
+    render)
+        echo "=== Rendering $PROJECT ==="
+        run_gnommo render "$PROJECT" $VERBOSE
+        ;;
+
+    all)
+        echo "=== Full Pipeline: $PROJECT ==="
+        echo ""
+
+        # Step 1: Transcribe
+        echo ">>> Step 1/3: Transcribe"
+        VIDEO=$(find "$PROJECT/media" -name "*.mov" -o -name "*.mp4" | grep -v transcript | head -1)
+        if [[ -z "$VIDEO" ]]; then
+            echo "Error: No video file found in $PROJECT/media/"
+            exit 1
+        fi
+        TRANSCRIPT="${VIDEO%.*}.transcript.json"
+        if [[ -f "$TRANSCRIPT" ]]; then
+            echo "    Transcript exists, skipping: $TRANSCRIPT"
+        else
+            run_gnommo transcribe "$VIDEO"
+        fi
+        echo ""
+
+        # Step 2: Align
+        echo ">>> Step 2/3: Align"
+        run_gnommo align "$PROJECT"
+        echo ""
+
+        # Step 3: Render
+        echo ">>> Step 3/3: Render"
+        run_gnommo render "$PROJECT" $VERBOSE
+        ;;
+
+    *)
+        echo "Unknown command: $COMMAND"
+        usage
+        ;;
+esac
@@ -11,6 +11,7 @@ class TalkingHeadConfig:
    x: int
    y: int
    target_height: int  # in pixels, or -1 for percentage-based
+    target_height_percent: float = 0.0  # percentage (0.0-1.0) if target_height is -1


@dataclass
@@ -113,10 +113,12 @@ def parse_project_config(project_path: Path) -> ProjectConfig:

    # Parse talking head config
    th_data = data.get("talkinghead", {})
+    th_height, th_height_pct = _parse_dimension(th_data.get("targetheight", 200))
    talking_head = TalkingHeadConfig(
        x=th_data.get("x", 100),
        y=th_data.get("y", 100),
-        target_height=_parse_dimension(th_data.get("targetheight", 200)),
+        target_height=th_height,
+        target_height_percent=th_height_pct,
    )

    # Parse resolution
@@ -135,15 +137,21 @@ def parse_project_config(project_path: Path) -> ProjectConfig:
    )


-def _parse_dimension(value: Any) -> int:
-    """Parse a dimension value (can be int or string like '100%')."""
+def _parse_dimension(value: Any) -> tuple[int, float]:
+    """
+    Parse a dimension value (can be int or string like '100%').
+
+    Returns:
+        Tuple of (pixels, percentage). If pixels is -1, use percentage.
+    """
    if isinstance(value, int):
-        return value
+        return value, 0.0
    if isinstance(value, str):
        if value.endswith("%"):
-            return -1  # Percentage marker, will be resolved during rendering
-        return int(value)
-    return 200  # default
+            pct = float(value[:-1]) / 100.0
+            return -1, pct
+        return int(value), 0.0
+    return 200, 0.0  # default


 def parse_slides(project_path: Path, config: ProjectConfig = None) -> dict[str, SlideDefinition]:
@@ -128,7 +128,11 @@ def build_filter_complex(

    # Scale and position talking head
    th_config = plan.config.talking_head
-    th_height = th_config.target_height if th_config.target_height > 0 else height
+    if th_config.target_height > 0:
+        th_height = th_config.target_height
+    else:
+        # Percentage-based: calculate from frame height
+        th_height = int(height * th_config.target_height_percent)

    filters.append(
        f"[{talking_head_idx}:v]scale=-1:{th_height}[head]"
@@ -142,24 +146,24 @@ def build_filter_complex(
    current_label = "base"

    # Add slide overlays with time-based enable
+    # Slides are scaled to full frame - transparency shows layers below
    for i, event in enumerate(plan.slide_events):
        slide_idx = slide_start_idx + slide_inputs.index(event.slide_id)
-        layout = SLIDE_LAYOUTS.get(event.slide_def.type, SLIDE_LAYOUTS["square"])

-        # Scale slide to fit layout while preserving aspect ratio
+        # Scale slide to full frame size (transparent areas show through)
        slide_label = f"s{i}"
        filters.append(
-            f"[{slide_idx}:v]scale={layout['width']}:{layout['height']}:"
-            f"force_original_aspect_ratio=decrease[{slide_label}]"
+            f"[{slide_idx}:v]scale={width}:{height}:"
+            f"force_original_aspect_ratio=decrease,pad={width}:{height}:(ow-iw)/2:(oh-ih)/2:color=0x00000000[{slide_label}]"
        )

-        # Overlay with time-based enable
+        # Overlay at 0,0 (full frame) with time-based enable
        next_label = f"v{i}" if i < len(plan.slide_events) - 1 else "vout"
        enable_expr = f"between(t,{event.start_time:.3f},{event.end_time:.3f})"

        filters.append(
            f"[{current_label}][{slide_label}]overlay="
-            f"x={layout['x']}:y={layout['y']}:"
+            f"x=0:y=0:"
            f"enable='{enable_expr}'[{next_label}]"
        )