diff --git a/README.md b/README.md new file mode 100644 index 0000000..a769d9c --- /dev/null +++ b/README.md @@ -0,0 +1,362 @@ +# Gnommo + +Gnommo is ADHD friendly video-editor for coders. + +1. Design the presentation in keynote +2. Set up the greenscreen and audio settings once +3. Automatically times slides and videos to your voice. +4. Limited options means you waste less time on stuff that isn't important. + +A code-first video editing pipeline for creating narrated presentations with slides, video overlays, and synchronized audio. + +## Quick Start + +```bash +# Create a project +gnommo -p myproject init + +# Import slides and presenter notes from Keynote file +gnommo -p myproject import + +# Process the narration videos with video and audio filters +gnommo -p myproject pre + +# Stitch together the narration segments to one full length narration. +gnommo -p myproject stitch + +# Transcribe the actual narrated content +gnommo -p myproject transcribe + +# Generate the final video +gnommo -p myproject render + +# Generate the final youtube assets. Manuscript file, description +gnommo -p myproject youtubeready + +# Free up disk space locally by saving your project to an external drive +gnommo -p myproject archive +``` + +## Proxying +Using the --proxy keyword makes everything faster because it creates some smaller files. +``` +gnommo -p myproject pre --proxy +gnommo -p myproject stitch --proxy +gnommo -p myproject render --proxy +``` + +## Lowres +Renders the final video in a low-res mode, for faster iteration +``` +gnommo -p myproject render --res low +``` + +## Project Structure + +``` +myproject/ +├── project.json # Project configuration +├── manuscript.txt # Narration script with [markers] +├── media/ +│ ├── slides/ +│ │ ├── slides.json # Slide definitions +│ │ └── *.png # Slide images +│ ├── videos/ +│ │ ├── videos.json # Video source definitions +│ │ └── *.mov # Video files +│ ├── narration/ +│ │ ├── narration.json # Narration segment definitions +│ │ └── *.mov # Raw narration recordings +│ └── audio/ +│ ├── audio.json # Audio effect definitions +│ └── *.mp3 # Sound effects +└── output/ + └── final.mp4 # Rendered output + └── preview.mp4 # Preview (lower resolution, faster render) +``` + +## The Five Stages + +Gnommo uses a five-stage pipeline for processing video projects: + +### Stage 1: Init + +Creates a folder and a default project.json file inside it. +```bash +gnommo -p myproject init +``` + +### Stage 2: Import +First : Place the myproject.key Keynote presentation in the myproject folder. +Place videos, audio and narration you want to use in their respective folders in side myproject/media +Then : This command media scans directories and generates JSON definition files. + +```bash +gnommo -p myproject import +``` + +**What it does:** +- Opens the keynote presentation and exports all slides a PNG images into media/slides/ +- Scans `media/slides/` for images → generates `slides.json` +- Scans `media/videos/` for video files → generates `videos.json` +- Scans `media/narration/` for recordings → generates `narration.json` +- Scans `media/audio/` for sound effects → generates `audio.json` + +**When to use:** After adding new media files to populate the JSON definitions with the actual files in the folders + +--- + +### Stage 3: Preprocess + +Applies video filters (chroma key, scaling, etc.) to narration segments. + +```bash +gnommo -p myproject pre +``` + +**What it does:** +- Reads filter definitions from `project.json` and `narration.json` +- Processes each narration segment with its configured filters +- Outputs processed files (e.g., `segment1_processed.mov`) + +**When to use:** After recording narration that needs background removal, sound normalization or other processing. + +--- + +### Stage 4: stitch +First : Go through the source videos, and add trim settings to `begin` and `end` parameters in `narration.json` + +Then : Run command to sticth the usable parts of narration segments into a single continuous video + +```bash +gnommo -p myproject stitch +``` + +**What it does:** +- Reads segments from `narration.json` +- Concatenates them in order, respecting `begin`/`end` trim points +- Outputs `narration_combined.mov` in `media/videos/` +- Adds `narration_combined` entry to `videos.json` with volume settings +- Generates word-level timestamps from the narration using Whisper speech recognition. + +**When to use:** After preprocessing, or adjusting trim settings, to create the main narration scaffolding. + +### Stage 5: Render + +Composites all elements into the final video. + +```bash +gnommo -p myproject render +``` + +**What it does:** +- Parses `manuscript.txt` for slide/video markers +- Aligns markers to transcription timestamps +- Composites background, narration, slides, and video overlays +- Outputs `final.mp4` + +**Options:** +```bash +gnommo -p myproject render --dry-run # Show FFmpeg command without running +gnommo -p myproject render --slides S1:S10 # Render only slides S1 through S10 +gnommo -p myproject render --proxy # Fast preview at reduced resolution +``` + +--- + +## Shortcut: All Stages + +Run all stages 2-5 and render in one command: + +```bash +gnommo -p myproject all +``` + +--- + +## Manuscript Format + +The manuscript is plain text with embedded markers: + +``` +[S1] Welcome to this presentation. + +[S2] Let me show you how this works. + +[video:demo] Here's a quick demonstration. + +[Zoom1] Notice this important detail. + +[Reset] And that concludes our overview. +``` + +**Marker types:** +- `[S1]`, `[S2]` - Slide markers (reference slides.json) +- `[video:id]` - Triggered video overlay +- `[narration:id]` - Start continuous narration video +- `[Zoom1]`, `[Reset]` - Camera presets +- `[Awoosh]` - Audio effect trigger + +--- + +## External Storage (GnommoCache) + +For large projects, gnommo supports transparent external storage fallback. + +**Setup:** Create `~/.gnommo.conf`: +```ini +[cache] +path = /Volumes/ExternalDrive/gnommo +``` + +**How it works:** +- Files are first looked up locally in the project directory +- If not found, gnommo checks `{cache_path}/{project_name}/...` +- The 📁 indicator shows files loaded from external storage + +**Archive to external storage:** +```bash +gnommo -p myproject archive # Sync project to cache +gnommo -p myproject archive --dry-run # Preview what would sync +``` + +This allows you to move large preprocessed files to external storage while keeping the project functional. + +--- + +## Common Workflows + +### New Project Setup +```bash +# 1. Create project structure and add media files +mkdir -p myproject/media/{slides,videos,narration,audio} + +# 2. Create project.json with basic config + +# 3. Import media to generate JSON definitions +gnommo -p myproject import + +# 4. Edit JSON files to configure filters, trim points, etc. + +# 5. Run full pipeline +gnommo -p myproject all +``` + +### Re-render After Editing Manuscript +```bash +gnommo -p myproject render +``` + +### Re-process After Recording New Narration +```bash +gnommo -p myproject pre +gnommo -p myproject stitch +gnommo -p myproject transcribe +gnommo -p myproject render +``` + +--- + +## Additional Commands + +```bash +gnommo -p myproject validate # Check for errors without rendering +gnommo -p myproject description # Generate YouTube description with chapters +gnommo -p myproject transcribe --final # Transcribe final.mp4 for subtitles +``` + +--- + +## Glitch University — Server Sync + +Gnommo can push project metadata and short scripts to a gnommoweb server, +and pull changes back. This keeps the platform database in sync with your +local project files without manual copy-paste. + +**Setup** — add to `gnommo/.env`: +```ini +GNOMMOWEB_URL=http://localhost:3001 +GNOMMOWEB_API_KEY=your_content_api_key +``` + +### Push + +Registers the project on the server and syncs all defined shorts (including +their scripts). Creates a filming task for each new short. + +```bash +gnommo -p myproject push # push local → server +gnommo -p myproject push --force # overwrite server even if it has newer changes +``` + +On the first push, gnommo creates: +- A stub video record in the platform database +- One short record per entry in `project.json["shorts"]` +- One task per new short ("Film short: …") + +Re-running push is safe — existing records are updated, no duplicate tasks. +Scripts are only overwritten on the server if the local file has changed; +edits made in the staff UI are preserved. + +### Pull + +Fetches the current project state from the server and merges the `shorts` +array back into `project.json`. Useful after editing short titles or hooks +in the web interface. + +```bash +gnommo -p myproject pull # pull server → local +gnommo -p myproject pull --force # overwrite local even if it has unsaved changes +``` + +Pull preserves local `script` file paths — it won't overwrite your `.md` +script files. + +### Conflict guards + +Both commands check for conflicts before writing: + +| Situation | Push behaviour | Pull behaviour | +|---|---|---| +| Server has changes you haven't pulled | Blocked — pull first | Proceeds (that's the point) | +| Local has changes you haven't pushed | Proceeds (that's the point) | Blocked — push first | +| `--force` flag | Overrides | Overrides | + +Sync state is stored in `/.gnommo_sync.json` (tracked by git, +so collaborators share the same reference point). + +### Defining shorts in `project.json` + +Add a `shorts` array to your project: + +```json +"shorts": [ + { + "id": "short_pixelated_universe", + "title": "Is the universe pixelated?", + "hook": "What if space is made of tiny blocks?", + "script": "shorts/short_pixelated_universe.md", + "platform_targets": ["youtube"] + } +] +``` + +- `id` — unique slug within the project, used as the upsert key +- `script` — relative path to a markdown file with the full short narration +- `hook` — opening line / thumbnail caption +- `platform_targets` — list of platforms (currently `["youtube"]`) + +Scripts are plain markdown with the same `[SLIDE: name]` markers and +`{word}` whisper timestamp tags used elsewhere in gnommo. + +--- + +## Requirements + +- Python 3.10+ +- FFmpeg +- OpenAI Whisper (for transcription) + +```bash +pip install openai-whisper +``` diff --git a/backup.json b/backup.json new file mode 100644 index 0000000..7d633a3 --- /dev/null +++ b/backup.json @@ -0,0 +1,23 @@ +{ + "drives": { + "lacie": { + "mount_path": "/Volumes/LaCie Jens", + "backups": { + "small": { + "last_attempt": "2026-02-26T10:09:08Z", + "last_status": "success", + "last_completed": "2026-02-26T10:09:14Z" + }, + "big": { + "last_attempt": "2026-02-27T12:17:30Z", + "last_status": "failed" + }, + "all": { + "last_attempt": "2026-02-27T12:46:26Z", + "last_status": "success", + "last_completed": "2026-02-27T13:41:50Z" + } + } + } + } +} \ No newline at end of file diff --git a/backup.sh b/backup.sh new file mode 100755 index 0000000..ab024b3 --- /dev/null +++ b/backup.sh @@ -0,0 +1,410 @@ +#!/bin/zsh +# +# Gnommo Backup Utility +# Syncs project files to an external drive using rsync +# +# Usage: ./backup.sh [options] +# +# Modes: +# small - Keynotes, images, metadata, code (excludes large media) +# big - Large video/audio files only (for offloading) +# all - Complete mirror of entire project +# +# Drives: +# lacie - /Volumes/LaCie Jens/gnommo +# gnommodisk - /Volumes/gnommodisk/gnommo +# status - Show backup status for all drives +# +# Options: +# --dry-run Show what would be transferred without copying +# --delete Delete files on destination that don't exist in source +# --progress Show detailed transfer progress (default: on) +# + +set -e + +# Configuration +PROJECT_DIR="/Users/jenstandstad/Projects/gnommo" +BACKUP_JSON="$PROJECT_DIR/backup.json" +BIG_FILE_SIZE="100M" + +# Known drives (name -> mount path) +typeset -A KNOWN_DRIVES +KNOWN_DRIVES=( + lacie "/Volumes/LaCie Jens" + gnommodisk "/Volumes/gnommodisk" +) + +# Big file extensions (video/audio that tend to be large) +BIG_EXTENSIONS=("mov" "mp4" "m4v" "avi" "mkv" "aifc" "aiff" "wav") + +# Initialize backup.json if it doesn't exist +init_backup_json() { + if [[ ! -f "$BACKUP_JSON" ]]; then + cat > "$BACKUP_JSON" << 'EOF' +{ + "drives": {} +} +EOF + fi +} + +# Update backup.json using Python (reliable JSON handling) +update_backup_json() { + local drive_name="$1" + local backup_mode="$2" + local backup_status="$3" # "started" or "completed" + local timestamp=$(date -u +"%Y-%m-%dT%H:%M:%SZ") + + python3 << PYTHON +import json +import os + +backup_file = "$BACKUP_JSON" +drive_name = "$drive_name" +mode = "$backup_mode" +status = "$backup_status" +timestamp = "$timestamp" + +# Load existing data +if os.path.exists(backup_file): + with open(backup_file, 'r') as f: + data = json.load(f) +else: + data = {"drives": {}} + +# Ensure drive entry exists +if drive_name not in data["drives"]: + data["drives"][drive_name] = { + "mount_path": "${KNOWN_DRIVES[$drive_name]:-$DESTINATION}", + "backups": {} + } + +# Ensure mode entry exists +if mode not in data["drives"][drive_name]["backups"]: + data["drives"][drive_name]["backups"][mode] = {} + +# Update based on status +backup_entry = data["drives"][drive_name]["backups"][mode] +if status == "started": + backup_entry["last_attempt"] = timestamp + backup_entry["last_status"] = "in_progress" +elif status == "completed": + backup_entry["last_completed"] = timestamp + backup_entry["last_status"] = "success" +elif status == "failed": + backup_entry["last_status"] = "failed" + +# Write back +with open(backup_file, 'w') as f: + json.dump(data, f, indent=2) +PYTHON +} + +# Show backup status +show_status() { + echo "========================================" + echo "Gnommo Backup Status" + echo "========================================" + + if [[ ! -f "$BACKUP_JSON" ]]; then + echo "No backups recorded yet." + exit 0 + fi + + python3 << 'PYTHON' +import json +import os +from datetime import datetime + +backup_file = os.environ.get('BACKUP_JSON', 'backup.json') +known_drives = {"lacie": "/Volumes/LaCie Jens", "gnommodisk": "/Volumes/gnommodisk"} + +with open(backup_file, 'r') as f: + data = json.load(f) + +for drive_name, drive_info in data.get("drives", {}).items(): + mount_path = drive_info.get("mount_path", "unknown") + mounted = "CONNECTED" if os.path.exists(mount_path) else "not connected" + + print(f"\n{drive_name} ({mounted})") + print(f" Path: {mount_path}") + + backups = drive_info.get("backups", {}) + if not backups: + print(" No backups recorded") + continue + + for mode, info in backups.items(): + status = info.get("last_status", "unknown") + completed = info.get("last_completed", "never") + attempt = info.get("last_attempt", "never") + + # Format the completed time nicely + if completed != "never": + try: + dt = datetime.fromisoformat(completed.replace('Z', '+00:00')) + completed = dt.strftime("%Y-%m-%d %H:%M UTC") + except: + pass + + status_icon = "✓" if status == "success" else "⋯" if status == "in_progress" else "✗" + print(f" {mode}: {status_icon} {completed}") + +print() +PYTHON + + echo "========================================" +} + +usage() { + cat << EOF +Gnommo Backup Utility + +Usage: $(basename "$0") [options] + +Modes: + small Sync small files only: Keynotes, images, JSON, code, manuscripts + Excludes: .mov, .mp4, .aifc, and other large media files + + big Sync large files only: video and audio media files + Useful for offloading to free up local space + + all Full mirror of the entire gnommo project + + status Show backup status for all known drives + +Drives: + lacie /Volumes/LaCie Jens/gnommo + gnommodisk /Volumes/gnommodisk/gnommo + Or specify a custom path + +Options: + --dry-run Preview what would be transferred (no actual copying) + --delete Remove files on destination that no longer exist in source + --no-progress Disable progress display + --help Show this help message + +Examples: + $(basename "$0") status + $(basename "$0") small lacie + $(basename "$0") big gnommodisk --delete + $(basename "$0") all lacie --dry-run + +EOF + exit 0 +} + +# Parse arguments +MODE="" +DRIVE="" +DESTINATION="" +DRY_RUN="" +DELETE="" +PROGRESS="--progress" + +while [[ $# -gt 0 ]]; do + case "$1" in + small|big|all) + MODE="$1" + shift + ;; + status) + export BACKUP_JSON + show_status + exit 0 + ;; + lacie|gnommodisk) + DRIVE="$1" + DESTINATION="${KNOWN_DRIVES[$1]}/gnommo" + shift + ;; + --dry-run) + DRY_RUN="--dry-run" + shift + ;; + --delete) + DELETE="--delete" + shift + ;; + --no-progress) + PROGRESS="" + shift + ;; + --help|-h) + usage + ;; + -*) + echo "Unknown option: $1" + usage + ;; + *) + if [[ -z "$DESTINATION" ]]; then + DRIVE="custom" + DESTINATION="$1" + fi + shift + ;; + esac +done + +# Handle status command +if [[ "$MODE" == "status" ]]; then + show_status + exit 0 +fi + +# Validate arguments +if [[ -z "$MODE" ]]; then + echo "Error: Mode is required (small, big, all, or status)" + echo "" + usage +fi + +if [[ -z "$DESTINATION" ]]; then + echo "Error: Drive or destination path is required" + echo "" + usage +fi + +# Check if drive is mounted (get the volume path, handling spaces) +MOUNT_PATH="${DESTINATION%/gnommo}" +if [[ ! -d "$MOUNT_PATH" ]]; then + echo "Error: Drive not mounted at: $MOUNT_PATH" + echo "" + echo "Available volumes:" + ls /Volumes/ 2>/dev/null | sed 's/^/ /' + exit 1 +fi + +# Create destination directory if needed +mkdir -p "$DESTINATION" + +# Initialize backup tracking +init_backup_json + +# Build rsync command +RSYNC_OPTS="-avh" +[[ -n "$PROGRESS" ]] && RSYNC_OPTS="$RSYNC_OPTS --progress" +[[ -n "$DRY_RUN" ]] && RSYNC_OPTS="$RSYNC_OPTS --dry-run" +[[ -n "$DELETE" ]] && RSYNC_OPTS="$RSYNC_OPTS --delete" + +# Always exclude these +EXCLUDE_ALWAYS=( + ".DS_Store" + "__pycache__" + "*.pyc" + ".git" + ".env" + "*.egg-info" + ".venv" + "venv" + "node_modules" +) + +# Build exclusion patterns for big files +build_big_excludes() { + local excludes="" + for ext in "${BIG_EXTENSIONS[@]}"; do + excludes="$excludes --exclude='*.$ext'" + done + echo "$excludes" +} + +# Build inclusion patterns for big files only +build_big_includes() { + local includes="" + for ext in "${BIG_EXTENSIONS[@]}"; do + includes="$includes --include='*.$ext'" + done + echo "$includes" +} + +# Build common exclusions +build_common_excludes() { + local excludes="" + for pattern in "${EXCLUDE_ALWAYS[@]}"; do + excludes="$excludes --exclude='$pattern'" + done + echo "$excludes" +} + +echo "========================================" +echo "Gnommo Backup Utility" +echo "========================================" +echo "Mode: $MODE" +echo "Drive: $DRIVE" +echo "Source: $PROJECT_DIR" +echo "Destination: $DESTINATION" +[[ -n "$DRY_RUN" ]] && echo "DRY RUN: Yes (no files will be copied)" +[[ -n "$DELETE" ]] && echo "Delete: Yes (will remove orphaned files)" +echo "========================================" +echo "" + +# Record backup attempt (skip for dry-run) +if [[ -z "$DRY_RUN" ]]; then + update_backup_json "$DRIVE" "$MODE" "started" +fi + +# Track success +BACKUP_SUCCESS=false + +run_backup() { + case "$MODE" in + small) + echo "Syncing SMALL files (excluding large media)..." + echo "Excludes: ${BIG_EXTENSIONS[*]}" + echo "" + + EXCLUDES=$(build_common_excludes) + BIG_EXCLUDES=$(build_big_excludes) + + eval rsync $RSYNC_OPTS $EXCLUDES $BIG_EXCLUDES "'$PROJECT_DIR/'" "'$DESTINATION/'" + ;; + + big) + echo "Syncing BIG files only (large media)..." + echo "Includes: ${BIG_EXTENSIONS[*]}" + echo "" + + EXCLUDES=$(build_common_excludes) + INCLUDES="--include='*/' $(build_big_includes)" + + eval rsync $RSYNC_OPTS $EXCLUDES $INCLUDES --exclude="'*'" "'$PROJECT_DIR/'" "'$DESTINATION/'" + ;; + + all) + echo "Syncing ALL files (complete mirror)..." + echo "" + + EXCLUDES=$(build_common_excludes) + + eval rsync $RSYNC_OPTS $EXCLUDES "'$PROJECT_DIR/'" "'$DESTINATION/'" + ;; + esac +} + +# Run backup and track result +# Exit codes: 0=success, 23=partial transfer (files changed during sync, usually OK), 24=vanished files +run_backup && BACKUP_SUCCESS=true || { + local exit_code=$? + if [[ $exit_code -eq 23 || $exit_code -eq 24 ]]; then + echo "Note: Some files changed during transfer (rsync exit $exit_code) - backup completed" + BACKUP_SUCCESS=true + fi +} + +echo "" +echo "========================================" +if [[ -n "$DRY_RUN" ]]; then + echo "DRY RUN complete. No files were copied." +else + if [[ "$BACKUP_SUCCESS" == true ]]; then + update_backup_json "$DRIVE" "$MODE" "completed" + echo "Backup complete!" + else + update_backup_json "$DRIVE" "$MODE" "failed" + echo "Backup FAILED!" + fi +fi +echo "========================================" diff --git a/docs/partial-rendering-spec.md b/docs/partial-rendering-spec.md index 2b1fc25..9eaeed6 100644 --- a/docs/partial-rendering-spec.md +++ b/docs/partial-rendering-spec.md @@ -302,7 +302,7 @@ All events (slides, videos, audio) are filtered by whether their START marker fa ### Parallel Rendering Pipeline ```bash -# Render in parallel, then concatenate +# Render in parallel, then stitch gnommo render proj.json seg1.mp4 --slides S1:S10 & gnommo render proj.json seg2.mp4 --slides S10:S20 & gnommo render proj.json seg3.mp4 --slides S20: & diff --git a/example/manuscript.txt b/example/manuscript.txt index 7844a7f..dba3092 100644 --- a/example/manuscript.txt +++ b/example/manuscript.txt @@ -1,12 +1,12 @@ [S1] -This is the first slide. It appears immediately. [cite:Gnommo Documentation - https://github.com/example/gnommo] +This is the first slide. It appears immediately. [S2] -However, this is the second slide. It should appear 1 second prior to when I say "however" +However, this is the second slide. It should appear 1 second prior to when I say “however” [S3] -[video:Zoomin_MontageZoom] -This is me talking alongside a video. The video is constrained within the red square. Notice how the video stops immediately when we make the transition to the next slide. [cite:FFmpeg Documentation - https://ffmpeg.org/documentation.html] +[video:KnightRotating] +This is me talking alongside a video. The video is constrained within the red square. Notice how the video stops immediately when we make the transition to the next slide. [S4] I will continue to talk without pause, but in the finished recording - there will be a pause before the narration continues. Now a video will play that pauses the narration @@ -14,6 +14,26 @@ I will continue to talk without pause, but in the finished recording - there wil [S5] [video:gnommologo] -Notice how my voice continues after the video finished. +Notice how my voice continues after the video finished [S6] + +[S7] +This is the first slide. It appears immediately. + +[S8] +However, this is the second slide. It should appear 1 second prior to when I say “however” + +[S9] +[video:KnightRotating] +This is me talking alongside a video. The video is constrained within the red square. Notice how the video stops immediately when we make the transition to the next slide. + +[S10] +I will continue to talk without pause, but in the finished recording - there will be a pause before the narration continues. Now a video will play that pauses the narration + +[S11] +[video:gnommologo] + +Notice how my voice continues after the video finished + +[S12] diff --git a/example/media/narration/narration.json b/example/media/narration/narration.json new file mode 100644 index 0000000..8a095f2 --- /dev/null +++ b/example/media/narration/narration.json @@ -0,0 +1,16 @@ +{ + "talking_head_S1": { + "source_file": "talking_head_S1.mov", + "output_file": "talking_head_S1_processed.mov", + "cutout": "talkinghead", + "always_visible": true, + "filter": "talkinghead" + }, + "talking_head_S3": { + "source_file": "talking_head_S3.mov", + "output_file": "talking_head_S3_processed.mov", + "cutout": "talkinghead", + "always_visible": true, + "filter": "talkinghead" + } +} \ No newline at end of file diff --git a/example/media/slides/example/slides.json b/example/media/slides/example/slides.json index 1ea7963..dab0045 100644 --- a/example/media/slides/example/slides.json +++ b/example/media/slides/example/slides.json @@ -22,5 +22,29 @@ "S6": { "image": "example.006.png", "type": "fullscreen" + }, + "S7": { + "image": "example.007.png", + "type": "fullscreen" + }, + "S8": { + "image": "example.008.png", + "type": "fullscreen" + }, + "S9": { + "image": "example.009.png", + "type": "fullscreen" + }, + "S10": { + "image": "example.010.png", + "type": "fullscreen" + }, + "S11": { + "image": "example.011.png", + "type": "fullscreen" + }, + "S12": { + "image": "example.012.png", + "type": "fullscreen" } } \ No newline at end of file diff --git a/example/media/videos/narration_combined.transcript.json b/example/media/videos/narration_combined.transcript.json new file mode 100644 index 0000000..4f18a0e --- /dev/null +++ b/example/media/videos/narration_combined.transcript.json @@ -0,0 +1,992 @@ +[ + { + "word": "This", + "start": 10.739999999999997, + "end": 11.44 + }, + { + "word": "is", + "start": 11.44, + "end": 11.64 + }, + { + "word": "the", + "start": 11.64, + "end": 11.82 + }, + { + "word": "first", + "start": 11.82, + "end": 12.04 + }, + { + "word": "slide.", + "start": 12.04, + "end": 12.44 + }, + { + "word": "It", + "start": 12.92, + "end": 13.34 + }, + { + "word": "appears", + "start": 13.34, + "end": 13.7 + }, + { + "word": "immediate.", + "start": 13.7, + "end": 14.18 + }, + { + "word": "However,", + "start": 15.36, + "end": 16.06 + }, + { + "word": "this", + "start": 16.38, + "end": 16.48 + }, + { + "word": "is", + "start": 16.48, + "end": 16.62 + }, + { + "word": "the", + "start": 16.62, + "end": 16.8 + }, + { + "word": "second", + "start": 16.8, + "end": 17.08 + }, + { + "word": "slide.", + "start": 17.08, + "end": 17.42 + }, + { + "word": "It", + "start": 17.78, + "end": 18.02 + }, + { + "word": "should", + "start": 18.02, + "end": 18.24 + }, + { + "word": "appear", + "start": 18.24, + "end": 18.56 + }, + { + "word": "one", + "start": 18.56, + "end": 19.02 + }, + { + "word": "second", + "start": 19.02, + "end": 19.5 + }, + { + "word": "prior", + "start": 19.5, + "end": 19.92 + }, + { + "word": "to", + "start": 19.92, + "end": 20.16 + }, + { + "word": "the", + "start": 20.16, + "end": 20.26 + }, + { + "word": "word", + "start": 20.26, + "end": 20.54 + }, + { + "word": "when", + "start": 20.54, + "end": 21.24 + }, + { + "word": "I", + "start": 21.24, + "end": 21.32 + }, + { + "word": "say", + "start": 21.32, + "end": 21.5 + }, + { + "word": "whoever", + "start": 21.5, + "end": 21.86 + }, + { + "word": "first", + "start": 21.86, + "end": 22.44 + }, + { + "word": "time.", + "start": 22.44, + "end": 22.7 + }, + { + "word": "This", + "start": 24.3, + "end": 25.0 + }, + { + "word": "is", + "start": 25.0, + "end": 25.14 + }, + { + "word": "me", + "start": 25.14, + "end": 25.38 + }, + { + "word": "taking,", + "start": 25.38, + "end": 25.78 + }, + { + "word": "talking", + "start": 26.14, + "end": 27.18 + }, + { + "word": "alongside", + "start": 27.18, + "end": 27.66 + }, + { + "word": "a", + "start": 27.66, + "end": 27.92 + }, + { + "word": "video.", + "start": 27.92, + "end": 28.16 + }, + { + "word": "The", + "start": 28.68, + "end": 28.96 + }, + { + "word": "video", + "start": 28.96, + "end": 29.2 + }, + { + "word": "is", + "start": 29.2, + "end": 29.4 + }, + { + "word": "constrained", + "start": 29.4, + "end": 29.82 + }, + { + "word": "within", + "start": 29.82, + "end": 30.18 + }, + { + "word": "the", + "start": 30.18, + "end": 30.36 + }, + { + "word": "red", + "start": 30.36, + "end": 30.52 + }, + { + "word": "square.", + "start": 30.52, + "end": 30.94 + }, + { + "word": "Notice", + "start": 31.3, + "end": 31.48 + }, + { + "word": "how", + "start": 31.48, + "end": 31.78 + }, + { + "word": "the", + "start": 31.78, + "end": 31.96 + }, + { + "word": "video", + "start": 31.96, + "end": 32.16 + }, + { + "word": "stops", + "start": 32.16, + "end": 32.48 + }, + { + "word": "immediately", + "start": 32.48, + "end": 32.98 + }, + { + "word": "when", + "start": 32.98, + "end": 33.4 + }, + { + "word": "we", + "start": 33.4, + "end": 33.58 + }, + { + "word": "make", + "start": 33.58, + "end": 33.76 + }, + { + "word": "the", + "start": 33.76, + "end": 34.0 + }, + { + "word": "transition", + "start": 34.0, + "end": 34.42 + }, + { + "word": "to", + "start": 34.42, + "end": 34.72 + }, + { + "word": "the", + "start": 34.72, + "end": 34.84 + }, + { + "word": "next", + "start": 34.84, + "end": 35.06 + }, + { + "word": "slide.", + "start": 35.06, + "end": 35.48 + }, + { + "word": "I", + "start": 37.2, + "end": 37.76 + }, + { + "word": "will", + "start": 37.76, + "end": 37.82 + }, + { + "word": "continue", + "start": 37.82, + "end": 38.12 + }, + { + "word": "to", + "start": 38.12, + "end": 38.34 + }, + { + "word": "talk", + "start": 38.34, + "end": 38.58 + }, + { + "word": "without", + "start": 38.58, + "end": 38.92 + }, + { + "word": "pause,", + "start": 38.92, + "end": 39.26 + }, + { + "word": "but", + "start": 39.5, + "end": 39.6 + }, + { + "word": "in", + "start": 39.6, + "end": 39.72 + }, + { + "word": "the", + "start": 39.72, + "end": 39.8 + }, + { + "word": "finished", + "start": 39.8, + "end": 40.0 + }, + { + "word": "recording", + "start": 40.0, + "end": 40.48 + }, + { + "word": "there", + "start": 40.48, + "end": 41.22 + }, + { + "word": "will", + "start": 41.22, + "end": 41.38 + }, + { + "word": "be", + "start": 41.38, + "end": 41.58 + }, + { + "word": "a", + "start": 41.58, + "end": 41.68 + }, + { + "word": "pause", + "start": 41.68, + "end": 41.96 + }, + { + "word": "before", + "start": 41.96, + "end": 42.32 + }, + { + "word": "the", + "start": 42.32, + "end": 42.52 + }, + { + "word": "narration", + "start": 42.52, + "end": 43.06 + }, + { + "word": "continues.", + "start": 43.06, + "end": 43.66 + }, + { + "word": "Now", + "start": 44.44, + "end": 44.56 + }, + { + "word": "a", + "start": 44.56, + "end": 44.7 + }, + { + "word": "video", + "start": 44.7, + "end": 44.94 + }, + { + "word": "will", + "start": 44.94, + "end": 45.12 + }, + { + "word": "play", + "start": 45.12, + "end": 45.4 + }, + { + "word": "that", + "start": 45.4, + "end": 45.8 + }, + { + "word": "pauses", + "start": 45.8, + "end": 46.52 + }, + { + "word": "the", + "start": 46.52, + "end": 46.8 + }, + { + "word": "narration.", + "start": 46.8, + "end": 47.22 + }, + { + "word": "Notice", + "start": 48.66, + "end": 49.22 + }, + { + "word": "how", + "start": 49.22, + "end": 49.44 + }, + { + "word": "my", + "start": 49.44, + "end": 49.6 + }, + { + "word": "voice", + "start": 49.6, + "end": 49.84 + }, + { + "word": "continues", + "start": 49.84, + "end": 50.38 + }, + { + "word": "after", + "start": 50.38, + "end": 50.88 + }, + { + "word": "the", + "start": 50.88, + "end": 51.04 + }, + { + "word": "video", + "start": 51.04, + "end": 51.28 + }, + { + "word": "finished.", + "start": 51.28, + "end": 51.8 + }, + { + "word": "This", + "start": 65.46000000000001, + "end": 66.14 + }, + { + "word": "is", + "start": 66.14, + "end": 66.34 + }, + { + "word": "the", + "start": 66.34, + "end": 66.52 + }, + { + "word": "first", + "start": 66.52, + "end": 66.74 + }, + { + "word": "slide.", + "start": 66.74, + "end": 67.14 + }, + { + "word": "It", + "start": 67.68, + "end": 68.02 + }, + { + "word": "appears", + "start": 68.02, + "end": 68.38 + }, + { + "word": "immediate.", + "start": 68.38, + "end": 68.86 + }, + { + "word": "However,", + "start": 70.28, + "end": 70.76 + }, + { + "word": "this", + "start": 71.1, + "end": 71.18 + }, + { + "word": "is", + "start": 71.18, + "end": 71.32 + }, + { + "word": "the", + "start": 71.32, + "end": 71.48 + }, + { + "word": "second", + "start": 71.48, + "end": 71.78 + }, + { + "word": "slide.", + "start": 71.78, + "end": 72.12 + }, + { + "word": "It", + "start": 72.4, + "end": 72.7 + }, + { + "word": "should", + "start": 72.7, + "end": 72.94 + }, + { + "word": "appear", + "start": 72.94, + "end": 73.26 + }, + { + "word": "one", + "start": 73.26, + "end": 73.72 + }, + { + "word": "second", + "start": 73.72, + "end": 74.2 + }, + { + "word": "prior", + "start": 74.2, + "end": 74.62 + }, + { + "word": "to", + "start": 74.62, + "end": 74.86 + }, + { + "word": "the", + "start": 74.86, + "end": 74.98 + }, + { + "word": "word", + "start": 74.98, + "end": 75.24 + }, + { + "word": "when", + "start": 75.24, + "end": 75.94 + }, + { + "word": "I", + "start": 75.94, + "end": 76.02 + }, + { + "word": "say", + "start": 76.02, + "end": 76.18 + }, + { + "word": "whoever", + "start": 76.18, + "end": 76.56 + }, + { + "word": "first", + "start": 76.56, + "end": 77.14 + }, + { + "word": "time.", + "start": 77.14, + "end": 77.42 + }, + { + "word": "This", + "start": 79.36, + "end": 79.7 + }, + { + "word": "is", + "start": 79.7, + "end": 79.86 + }, + { + "word": "me", + "start": 79.86, + "end": 80.08 + }, + { + "word": "taking,", + "start": 80.08, + "end": 80.48 + }, + { + "word": "talking", + "start": 80.92, + "end": 81.88 + }, + { + "word": "alongside", + "start": 81.88, + "end": 82.36 + }, + { + "word": "a", + "start": 82.36, + "end": 82.62 + }, + { + "word": "video.", + "start": 82.62, + "end": 82.88 + }, + { + "word": "The", + "start": 83.48, + "end": 83.66 + }, + { + "word": "video", + "start": 83.66, + "end": 83.92 + }, + { + "word": "is", + "start": 83.92, + "end": 84.1 + }, + { + "word": "constrained", + "start": 84.1, + "end": 84.54 + }, + { + "word": "within", + "start": 84.54, + "end": 84.88 + }, + { + "word": "the", + "start": 84.88, + "end": 85.06 + }, + { + "word": "red", + "start": 85.06, + "end": 85.22 + }, + { + "word": "square.", + "start": 85.22, + "end": 85.62 + }, + { + "word": "Notice", + "start": 85.62, + "end": 86.18 + }, + { + "word": "how", + "start": 86.18, + "end": 86.48 + }, + { + "word": "the", + "start": 86.48, + "end": 86.66 + }, + { + "word": "video", + "start": 86.66, + "end": 86.86 + }, + { + "word": "stops", + "start": 86.86, + "end": 87.2 + }, + { + "word": "immediately", + "start": 87.2, + "end": 87.68 + }, + { + "word": "when", + "start": 87.68, + "end": 88.1 + }, + { + "word": "we", + "start": 88.1, + "end": 88.28 + }, + { + "word": "make", + "start": 88.28, + "end": 88.46 + }, + { + "word": "the", + "start": 88.46, + "end": 88.7 + }, + { + "word": "transition", + "start": 88.7, + "end": 89.12 + }, + { + "word": "to", + "start": 89.12, + "end": 89.42 + }, + { + "word": "the", + "start": 89.42, + "end": 89.54 + }, + { + "word": "next", + "start": 89.54, + "end": 89.76 + }, + { + "word": "slide.", + "start": 89.76, + "end": 90.22 + }, + { + "word": "I", + "start": 91.94, + "end": 92.46 + }, + { + "word": "will", + "start": 92.46, + "end": 92.52 + }, + { + "word": "continue", + "start": 92.52, + "end": 92.82 + }, + { + "word": "to", + "start": 92.82, + "end": 93.04 + }, + { + "word": "talk", + "start": 93.04, + "end": 93.28 + }, + { + "word": "without", + "start": 93.28, + "end": 93.62 + }, + { + "word": "pause,", + "start": 93.62, + "end": 93.96 + }, + { + "word": "but", + "start": 94.2, + "end": 94.3 + }, + { + "word": "in", + "start": 94.3, + "end": 94.42 + }, + { + "word": "the", + "start": 94.42, + "end": 94.48 + }, + { + "word": "finished", + "start": 94.48, + "end": 94.7 + }, + { + "word": "recording", + "start": 94.7, + "end": 95.18 + }, + { + "word": "there", + "start": 95.18, + "end": 95.92 + }, + { + "word": "will", + "start": 95.92, + "end": 96.08 + }, + { + "word": "be", + "start": 96.08, + "end": 96.28 + }, + { + "word": "a", + "start": 96.28, + "end": 96.38 + }, + { + "word": "pause", + "start": 96.38, + "end": 96.64 + }, + { + "word": "before", + "start": 96.64, + "end": 97.02 + }, + { + "word": "the", + "start": 97.02, + "end": 97.22 + }, + { + "word": "narration", + "start": 97.22, + "end": 97.76 + }, + { + "word": "continues.", + "start": 97.76, + "end": 98.38 + }, + { + "word": "Now", + "start": 99.06, + "end": 99.26 + }, + { + "word": "a", + "start": 99.26, + "end": 99.4 + }, + { + "word": "video", + "start": 99.4, + "end": 99.64 + }, + { + "word": "will", + "start": 99.64, + "end": 99.8 + }, + { + "word": "play", + "start": 99.8, + "end": 100.1 + }, + { + "word": "that", + "start": 100.1, + "end": 100.5 + }, + { + "word": "pauses", + "start": 100.5, + "end": 101.24 + }, + { + "word": "the", + "start": 101.24, + "end": 101.5 + }, + { + "word": "narration.", + "start": 101.5, + "end": 101.92 + }, + { + "word": "Notice", + "start": 103.18, + "end": 103.92 + }, + { + "word": "how", + "start": 103.92, + "end": 104.14 + }, + { + "word": "my", + "start": 104.14, + "end": 104.32 + }, + { + "word": "voice", + "start": 104.32, + "end": 104.58 + }, + { + "word": "continues", + "start": 104.58, + "end": 105.1 + }, + { + "word": "after", + "start": 105.1, + "end": 105.58 + }, + { + "word": "the", + "start": 105.58, + "end": 105.76 + }, + { + "word": "video", + "start": 105.76, + "end": 105.98 + }, + { + "word": "finished.", + "start": 105.98, + "end": 106.48 + } +] \ No newline at end of file diff --git a/example/media/videos/proxy/narration_combined.transcript.json b/example/media/videos/proxy/narration_combined.transcript.json new file mode 100644 index 0000000..4f18a0e --- /dev/null +++ b/example/media/videos/proxy/narration_combined.transcript.json @@ -0,0 +1,992 @@ +[ + { + "word": "This", + "start": 10.739999999999997, + "end": 11.44 + }, + { + "word": "is", + "start": 11.44, + "end": 11.64 + }, + { + "word": "the", + "start": 11.64, + "end": 11.82 + }, + { + "word": "first", + "start": 11.82, + "end": 12.04 + }, + { + "word": "slide.", + "start": 12.04, + "end": 12.44 + }, + { + "word": "It", + "start": 12.92, + "end": 13.34 + }, + { + "word": "appears", + "start": 13.34, + "end": 13.7 + }, + { + "word": "immediate.", + "start": 13.7, + "end": 14.18 + }, + { + "word": "However,", + "start": 15.36, + "end": 16.06 + }, + { + "word": "this", + "start": 16.38, + "end": 16.48 + }, + { + "word": "is", + "start": 16.48, + "end": 16.62 + }, + { + "word": "the", + "start": 16.62, + "end": 16.8 + }, + { + "word": "second", + "start": 16.8, + "end": 17.08 + }, + { + "word": "slide.", + "start": 17.08, + "end": 17.42 + }, + { + "word": "It", + "start": 17.78, + "end": 18.02 + }, + { + "word": "should", + "start": 18.02, + "end": 18.24 + }, + { + "word": "appear", + "start": 18.24, + "end": 18.56 + }, + { + "word": "one", + "start": 18.56, + "end": 19.02 + }, + { + "word": "second", + "start": 19.02, + "end": 19.5 + }, + { + "word": "prior", + "start": 19.5, + "end": 19.92 + }, + { + "word": "to", + "start": 19.92, + "end": 20.16 + }, + { + "word": "the", + "start": 20.16, + "end": 20.26 + }, + { + "word": "word", + "start": 20.26, + "end": 20.54 + }, + { + "word": "when", + "start": 20.54, + "end": 21.24 + }, + { + "word": "I", + "start": 21.24, + "end": 21.32 + }, + { + "word": "say", + "start": 21.32, + "end": 21.5 + }, + { + "word": "whoever", + "start": 21.5, + "end": 21.86 + }, + { + "word": "first", + "start": 21.86, + "end": 22.44 + }, + { + "word": "time.", + "start": 22.44, + "end": 22.7 + }, + { + "word": "This", + "start": 24.3, + "end": 25.0 + }, + { + "word": "is", + "start": 25.0, + "end": 25.14 + }, + { + "word": "me", + "start": 25.14, + "end": 25.38 + }, + { + "word": "taking,", + "start": 25.38, + "end": 25.78 + }, + { + "word": "talking", + "start": 26.14, + "end": 27.18 + }, + { + "word": "alongside", + "start": 27.18, + "end": 27.66 + }, + { + "word": "a", + "start": 27.66, + "end": 27.92 + }, + { + "word": "video.", + "start": 27.92, + "end": 28.16 + }, + { + "word": "The", + "start": 28.68, + "end": 28.96 + }, + { + "word": "video", + "start": 28.96, + "end": 29.2 + }, + { + "word": "is", + "start": 29.2, + "end": 29.4 + }, + { + "word": "constrained", + "start": 29.4, + "end": 29.82 + }, + { + "word": "within", + "start": 29.82, + "end": 30.18 + }, + { + "word": "the", + "start": 30.18, + "end": 30.36 + }, + { + "word": "red", + "start": 30.36, + "end": 30.52 + }, + { + "word": "square.", + "start": 30.52, + "end": 30.94 + }, + { + "word": "Notice", + "start": 31.3, + "end": 31.48 + }, + { + "word": "how", + "start": 31.48, + "end": 31.78 + }, + { + "word": "the", + "start": 31.78, + "end": 31.96 + }, + { + "word": "video", + "start": 31.96, + "end": 32.16 + }, + { + "word": "stops", + "start": 32.16, + "end": 32.48 + }, + { + "word": "immediately", + "start": 32.48, + "end": 32.98 + }, + { + "word": "when", + "start": 32.98, + "end": 33.4 + }, + { + "word": "we", + "start": 33.4, + "end": 33.58 + }, + { + "word": "make", + "start": 33.58, + "end": 33.76 + }, + { + "word": "the", + "start": 33.76, + "end": 34.0 + }, + { + "word": "transition", + "start": 34.0, + "end": 34.42 + }, + { + "word": "to", + "start": 34.42, + "end": 34.72 + }, + { + "word": "the", + "start": 34.72, + "end": 34.84 + }, + { + "word": "next", + "start": 34.84, + "end": 35.06 + }, + { + "word": "slide.", + "start": 35.06, + "end": 35.48 + }, + { + "word": "I", + "start": 37.2, + "end": 37.76 + }, + { + "word": "will", + "start": 37.76, + "end": 37.82 + }, + { + "word": "continue", + "start": 37.82, + "end": 38.12 + }, + { + "word": "to", + "start": 38.12, + "end": 38.34 + }, + { + "word": "talk", + "start": 38.34, + "end": 38.58 + }, + { + "word": "without", + "start": 38.58, + "end": 38.92 + }, + { + "word": "pause,", + "start": 38.92, + "end": 39.26 + }, + { + "word": "but", + "start": 39.5, + "end": 39.6 + }, + { + "word": "in", + "start": 39.6, + "end": 39.72 + }, + { + "word": "the", + "start": 39.72, + "end": 39.8 + }, + { + "word": "finished", + "start": 39.8, + "end": 40.0 + }, + { + "word": "recording", + "start": 40.0, + "end": 40.48 + }, + { + "word": "there", + "start": 40.48, + "end": 41.22 + }, + { + "word": "will", + "start": 41.22, + "end": 41.38 + }, + { + "word": "be", + "start": 41.38, + "end": 41.58 + }, + { + "word": "a", + "start": 41.58, + "end": 41.68 + }, + { + "word": "pause", + "start": 41.68, + "end": 41.96 + }, + { + "word": "before", + "start": 41.96, + "end": 42.32 + }, + { + "word": "the", + "start": 42.32, + "end": 42.52 + }, + { + "word": "narration", + "start": 42.52, + "end": 43.06 + }, + { + "word": "continues.", + "start": 43.06, + "end": 43.66 + }, + { + "word": "Now", + "start": 44.44, + "end": 44.56 + }, + { + "word": "a", + "start": 44.56, + "end": 44.7 + }, + { + "word": "video", + "start": 44.7, + "end": 44.94 + }, + { + "word": "will", + "start": 44.94, + "end": 45.12 + }, + { + "word": "play", + "start": 45.12, + "end": 45.4 + }, + { + "word": "that", + "start": 45.4, + "end": 45.8 + }, + { + "word": "pauses", + "start": 45.8, + "end": 46.52 + }, + { + "word": "the", + "start": 46.52, + "end": 46.8 + }, + { + "word": "narration.", + "start": 46.8, + "end": 47.22 + }, + { + "word": "Notice", + "start": 48.66, + "end": 49.22 + }, + { + "word": "how", + "start": 49.22, + "end": 49.44 + }, + { + "word": "my", + "start": 49.44, + "end": 49.6 + }, + { + "word": "voice", + "start": 49.6, + "end": 49.84 + }, + { + "word": "continues", + "start": 49.84, + "end": 50.38 + }, + { + "word": "after", + "start": 50.38, + "end": 50.88 + }, + { + "word": "the", + "start": 50.88, + "end": 51.04 + }, + { + "word": "video", + "start": 51.04, + "end": 51.28 + }, + { + "word": "finished.", + "start": 51.28, + "end": 51.8 + }, + { + "word": "This", + "start": 65.46000000000001, + "end": 66.14 + }, + { + "word": "is", + "start": 66.14, + "end": 66.34 + }, + { + "word": "the", + "start": 66.34, + "end": 66.52 + }, + { + "word": "first", + "start": 66.52, + "end": 66.74 + }, + { + "word": "slide.", + "start": 66.74, + "end": 67.14 + }, + { + "word": "It", + "start": 67.68, + "end": 68.02 + }, + { + "word": "appears", + "start": 68.02, + "end": 68.38 + }, + { + "word": "immediate.", + "start": 68.38, + "end": 68.86 + }, + { + "word": "However,", + "start": 70.28, + "end": 70.76 + }, + { + "word": "this", + "start": 71.1, + "end": 71.18 + }, + { + "word": "is", + "start": 71.18, + "end": 71.32 + }, + { + "word": "the", + "start": 71.32, + "end": 71.48 + }, + { + "word": "second", + "start": 71.48, + "end": 71.78 + }, + { + "word": "slide.", + "start": 71.78, + "end": 72.12 + }, + { + "word": "It", + "start": 72.4, + "end": 72.7 + }, + { + "word": "should", + "start": 72.7, + "end": 72.94 + }, + { + "word": "appear", + "start": 72.94, + "end": 73.26 + }, + { + "word": "one", + "start": 73.26, + "end": 73.72 + }, + { + "word": "second", + "start": 73.72, + "end": 74.2 + }, + { + "word": "prior", + "start": 74.2, + "end": 74.62 + }, + { + "word": "to", + "start": 74.62, + "end": 74.86 + }, + { + "word": "the", + "start": 74.86, + "end": 74.98 + }, + { + "word": "word", + "start": 74.98, + "end": 75.24 + }, + { + "word": "when", + "start": 75.24, + "end": 75.94 + }, + { + "word": "I", + "start": 75.94, + "end": 76.02 + }, + { + "word": "say", + "start": 76.02, + "end": 76.18 + }, + { + "word": "whoever", + "start": 76.18, + "end": 76.56 + }, + { + "word": "first", + "start": 76.56, + "end": 77.14 + }, + { + "word": "time.", + "start": 77.14, + "end": 77.42 + }, + { + "word": "This", + "start": 79.36, + "end": 79.7 + }, + { + "word": "is", + "start": 79.7, + "end": 79.86 + }, + { + "word": "me", + "start": 79.86, + "end": 80.08 + }, + { + "word": "taking,", + "start": 80.08, + "end": 80.48 + }, + { + "word": "talking", + "start": 80.92, + "end": 81.88 + }, + { + "word": "alongside", + "start": 81.88, + "end": 82.36 + }, + { + "word": "a", + "start": 82.36, + "end": 82.62 + }, + { + "word": "video.", + "start": 82.62, + "end": 82.88 + }, + { + "word": "The", + "start": 83.48, + "end": 83.66 + }, + { + "word": "video", + "start": 83.66, + "end": 83.92 + }, + { + "word": "is", + "start": 83.92, + "end": 84.1 + }, + { + "word": "constrained", + "start": 84.1, + "end": 84.54 + }, + { + "word": "within", + "start": 84.54, + "end": 84.88 + }, + { + "word": "the", + "start": 84.88, + "end": 85.06 + }, + { + "word": "red", + "start": 85.06, + "end": 85.22 + }, + { + "word": "square.", + "start": 85.22, + "end": 85.62 + }, + { + "word": "Notice", + "start": 85.62, + "end": 86.18 + }, + { + "word": "how", + "start": 86.18, + "end": 86.48 + }, + { + "word": "the", + "start": 86.48, + "end": 86.66 + }, + { + "word": "video", + "start": 86.66, + "end": 86.86 + }, + { + "word": "stops", + "start": 86.86, + "end": 87.2 + }, + { + "word": "immediately", + "start": 87.2, + "end": 87.68 + }, + { + "word": "when", + "start": 87.68, + "end": 88.1 + }, + { + "word": "we", + "start": 88.1, + "end": 88.28 + }, + { + "word": "make", + "start": 88.28, + "end": 88.46 + }, + { + "word": "the", + "start": 88.46, + "end": 88.7 + }, + { + "word": "transition", + "start": 88.7, + "end": 89.12 + }, + { + "word": "to", + "start": 89.12, + "end": 89.42 + }, + { + "word": "the", + "start": 89.42, + "end": 89.54 + }, + { + "word": "next", + "start": 89.54, + "end": 89.76 + }, + { + "word": "slide.", + "start": 89.76, + "end": 90.22 + }, + { + "word": "I", + "start": 91.94, + "end": 92.46 + }, + { + "word": "will", + "start": 92.46, + "end": 92.52 + }, + { + "word": "continue", + "start": 92.52, + "end": 92.82 + }, + { + "word": "to", + "start": 92.82, + "end": 93.04 + }, + { + "word": "talk", + "start": 93.04, + "end": 93.28 + }, + { + "word": "without", + "start": 93.28, + "end": 93.62 + }, + { + "word": "pause,", + "start": 93.62, + "end": 93.96 + }, + { + "word": "but", + "start": 94.2, + "end": 94.3 + }, + { + "word": "in", + "start": 94.3, + "end": 94.42 + }, + { + "word": "the", + "start": 94.42, + "end": 94.48 + }, + { + "word": "finished", + "start": 94.48, + "end": 94.7 + }, + { + "word": "recording", + "start": 94.7, + "end": 95.18 + }, + { + "word": "there", + "start": 95.18, + "end": 95.92 + }, + { + "word": "will", + "start": 95.92, + "end": 96.08 + }, + { + "word": "be", + "start": 96.08, + "end": 96.28 + }, + { + "word": "a", + "start": 96.28, + "end": 96.38 + }, + { + "word": "pause", + "start": 96.38, + "end": 96.64 + }, + { + "word": "before", + "start": 96.64, + "end": 97.02 + }, + { + "word": "the", + "start": 97.02, + "end": 97.22 + }, + { + "word": "narration", + "start": 97.22, + "end": 97.76 + }, + { + "word": "continues.", + "start": 97.76, + "end": 98.38 + }, + { + "word": "Now", + "start": 99.06, + "end": 99.26 + }, + { + "word": "a", + "start": 99.26, + "end": 99.4 + }, + { + "word": "video", + "start": 99.4, + "end": 99.64 + }, + { + "word": "will", + "start": 99.64, + "end": 99.8 + }, + { + "word": "play", + "start": 99.8, + "end": 100.1 + }, + { + "word": "that", + "start": 100.1, + "end": 100.5 + }, + { + "word": "pauses", + "start": 100.5, + "end": 101.24 + }, + { + "word": "the", + "start": 101.24, + "end": 101.5 + }, + { + "word": "narration.", + "start": 101.5, + "end": 101.92 + }, + { + "word": "Notice", + "start": 103.18, + "end": 103.92 + }, + { + "word": "how", + "start": 103.92, + "end": 104.14 + }, + { + "word": "my", + "start": 104.14, + "end": 104.32 + }, + { + "word": "voice", + "start": 104.32, + "end": 104.58 + }, + { + "word": "continues", + "start": 104.58, + "end": 105.1 + }, + { + "word": "after", + "start": 105.1, + "end": 105.58 + }, + { + "word": "the", + "start": 105.58, + "end": 105.76 + }, + { + "word": "video", + "start": 105.76, + "end": 105.98 + }, + { + "word": "finished.", + "start": 105.98, + "end": 106.48 + } +] \ No newline at end of file diff --git a/example/media/videos/videos.json b/example/media/videos/videos.json index b3ff4ca..4e99af5 100644 --- a/example/media/videos/videos.json +++ b/example/media/videos/videos.json @@ -1,39 +1,47 @@ { - "talking_head": { - "source_file": "talking_head.mov", - "output_file": "talking_head_processed.mov", + "talking_head_S1": { + "source_file": "talking_head_S1.mov", + "output_file": "talking_head_S1_processed.mov", "cutout": "talkinghead", "always_visible": true, - "filter": [ - { - "type": "chroma_key", - "color": [131, 177, 83], - "similarity": 0.04, - "blend": 0.025, - "spill": 0.05 - }, - { - "type": "mask", - "left": 0.05, - "right": 0.10 - } - ] + "filter": "talkinghead" }, - "gnommologo": { - "source_file": "Logo.mov", - "is_shared": true, - "cutout": "fullscreen", - "pause_narration": 0 , - "take": 10, - "skip": 0 + "talking_head_S3": { + "source_file": "talking_head_S3.mov", + "output_file": "talking_head_S3_processed.mov", + "cutout": "talkinghead", + "always_visible": true, + "filter": "talkinghead" }, - "Zoomin_MontageZoom": { + "KnightRotating": { + "description": "Knight model rotating in place", + "source_file": "KnightRotating.mp4", + "output_file": "KnightRotating.mp4", + "cutout": "square", + "filter": [], + "is_shared": true + }, + "gnommologo": { + "source_file": "Logo.mov", + "is_shared": true, + "cutout": "fullscreen", + "pause_narration": 17, + "take": 25, + "skip": 0 + }, + "Zoomin_MontageZoom": { "description": "Montage zoom", "source_file": "MontageZoom.mp4", "output_file": "MontageZoom.mp4", - "pause_narration":3, + "pause_narration": 5, "cutout": "square", "is_shared": true, "filter": [] + }, + "narration_combined": { + "source_file": "narration_combined.mov", + "output_file": "narration_combined.mov", + "cutout": "square", + "filter": [] } -} +} \ No newline at end of file diff --git a/example/project.json b/example/project.json index 7c37d64..f3bd989 100644 --- a/example/project.json +++ b/example/project.json @@ -13,7 +13,65 @@ "videos": "media/videos/videos.json", "slides": "media/slides/Example/slides.json", "audio": "media/audio/audio.json", - "main_video": "talking_head", + "default_filters": { + "talkinghead": [ + { + "type": "audio_normalize", + "eq_bands": [ + {"freq": 47, "gain": -15, "type": "lowshelf"}, + {"freq": 107, "gain": -1.3, "q": 1.2}, + {"freq": 597, "gain": -5.2, "q": 2}, + {"freq": 11811, "gain": 2.8, "q": 1}, + {"freq": 24000, "gain": 3.9, "type": "highshelf"} + ], + "highpass": 0, + "room_eq": false, + "dereverb_model": "shared_assets/models/std.rnnn", + "dereverb_mix": 0.8, + "denoise": true, + "noise_floor": -25, + "gate": true, + "gate_threshold": -35, + "gate_range": -20, + "compress": true, + "threshold": -20, + "ratio": 3, + "attack": 12, + "release": 100, + "makeup": 2, + "normalize": true, + "target_lufs": -16, + "target_lra": 11, + "target_tp": -1.5 + }, + { + "type": "color_grade", + "saturation": 1.15, + "contrast": 1.05, + "bm": -0.10, + "rm": 0.04 + }, + { + "type": "gnommokey", + "screen_color": [81, 137, 65], + "screen_gain": 175, + "screen_balance": 58, + "despill_bias": [217, 240, 255], + "despill_strength": 5.0, + "edge_erode": 1.0, + "clip_black": 0, + "clip_white": 100 + }, + { + "type": "mask", + "left": 0.05, + "right": 0.1, + "top": 0.1, + "bottom": 0.0 + } + ] + }, + "main_video": ["talking_head_S1", "talking_head_S3"], "cutouts": { "talkinghead": { "x": "-10%", diff --git a/gnommo/cache.py b/gnommo/cache.py new file mode 100644 index 0000000..0a3e3b6 --- /dev/null +++ b/gnommo/cache.py @@ -0,0 +1,100 @@ +"""GnommoCache - External storage extension for large media files. + +Provides transparent fallback to external storage when files are not found locally. +Configure via ~/.gnommo.conf: + + [cache] + path = /Volumes/GnommoDisk/gnommo + +Files are looked up first locally, then in the cache at: + {cache_path}/{project_name}/{relative_path} +""" + +import configparser +from pathlib import Path +from typing import Optional, Tuple + +_cache_config: Optional[dict] = None + + +def load_cache_config() -> Optional[Path]: + """Load gnommo.conf and return cache path if configured. + + Configuration file location: ~/.gnommo.conf + + Returns: + Path to the cache root directory, or None if not configured. + """ + global _cache_config + if _cache_config is not None: + return _cache_config.get("path") + + config_path = Path.home() / ".gnommo.conf" + if not config_path.exists(): + _cache_config = {} + return None + + config = configparser.ConfigParser() + config.read(config_path) + + if config.has_option("cache", "path"): + cache_path = Path(config.get("cache", "path")) + _cache_config = {"path": cache_path} + return cache_path + + _cache_config = {} + return None + + +def resolve_with_cache( + local_path: Path, + project_path: Path, +) -> Tuple[Path, bool]: + """ + Resolve a file path with cache fallback (read-only). + + Checks the local path first. If not found and cache is configured, + checks the cache directory which mirrors the project structure. + + Args: + local_path: The expected local path to the file + project_path: The project root directory + + Returns: + Tuple of (resolved_path, is_cached) where is_cached=True if + the file was found in the external cache instead of locally. + """ + # Check local path first + if local_path.exists(): + return local_path, False + + # Check cache + cache_base = load_cache_config() + if cache_base is None: + return local_path, False # No cache configured + + # Build cache path: {cache_base}/{project_name}/{relative_path} + try: + relative = local_path.relative_to(project_path) + cache_path = cache_base / project_path.name / relative + if cache_path.exists(): + return cache_path, True + except ValueError: + pass # local_path is not relative to project_path + + return local_path, False + + +def is_cache_configured() -> bool: + """Check if cache is configured (for status messages).""" + return load_cache_config() is not None + + +def get_cache_info() -> Optional[str]: + """Get a human-readable cache configuration string.""" + cache_path = load_cache_config() + if cache_path is None: + return None + if cache_path.exists(): + return f"{cache_path} (connected)" + return f"{cache_path} (not connected)" diff --git a/gnommo/cli.py b/gnommo/cli.py index e88ac77..c2470ba 100644 --- a/gnommo/cli.py +++ b/gnommo/cli.py @@ -10,6 +10,7 @@ from datetime import datetime from pathlib import Path from . import __version__ from .errors import GnommoError, ParseError, ValidationError, RenderError +from .cache import get_cache_info, resolve_with_cache from typing import Optional, Union @@ -33,9 +34,17 @@ Examples: gnommo -p video1 validate Validate only gnommo -p video1 import Generate slides.json from images gnommo -p video1 pre Preprocess videos (chroma key, etc.) + gnommo -p video1 stitch --proxy -f Fast stitch with new begin/end values gnommo -p video1 all Full pipeline: transcribe → align → render gnommo -p video1 render --dry-run Show FFmpeg command without running gnommo -p video1 description Generate YouTube description file + gnommo -p video1 transcribe --final Transcribe final.mp4 and generate SRT for YouTube + gnommo -p video1 archive Sync project to external cache storage + gnommo -p video1 archive --dry-run Preview what would be synced + gnommo -p video1 extract-audio --combined Extract audio from narration_combined.mov + gnommo -p video1 extract-audio --combined --channel left Extract left channel only + gnommo -p video1 extract-audio --segment seg01 Extract from a specific segment + gnommo -p video1 master Extract raw + processed audio for A/B comparison """, ) parser.add_argument( @@ -61,12 +70,19 @@ Examples: "validate", "preprocess", "pre", + "stitch", "render", "all", "transcribe", "align", "import", "description", + "archive", + "extract-audio", + "master", + "push", + "pull", + "handoff", ], help="Action to perform (default: render)", ) @@ -108,6 +124,38 @@ Examples: default=1, help="Number of parallel workers for preprocessing (default: 1)", ) + parser.add_argument( + "--proxy", + action="store_true", + help="Use proxy workflow: downsample to 160x90 for fast iteration", + ) + parser.add_argument( + "--final", + action="store_true", + help="For transcribe: transcribe the final rendered video and generate SRT captions for YouTube", + ) + parser.add_argument( + "--segment", + type=str, + help="For extract-audio: specific segment ID to extract (default: all segments)", + ) + parser.add_argument( + "--channel", + type=str, + choices=["left", "right", "both"], + default="both", + help="For extract-audio: which audio channel(s) to extract (default: both)", + ) + parser.add_argument( + "--combined", + action="store_true", + help="For extract-audio: extract from narration_combined.mov instead of individual segments", + ) + parser.add_argument( + "--file", + default=None, + help="For handoff: path to video file (overrides output_video in project.json)", + ) args = parser.parse_args() @@ -125,7 +173,21 @@ Examples: elif action == "validate": return cmd_validate(project_path, args.verbose) elif action in ("preprocess", "pre"): - return cmd_preprocess(project_path, args.verbose, args.dry_run, args.force, args.workers) + return cmd_preprocess( + project_path, + args.verbose, + args.dry_run, + args.force, + args.workers, + args.proxy, + ) + elif action in ("stitch"): + return cmd_stitch( + project_path, + args.verbose, + args.force, + args.proxy, + ) elif action == "render": return cmd_render( project_path, @@ -134,9 +196,10 @@ Examples: args.slides, args.res, args.force, + args.proxy, ) elif action == "transcribe": - return cmd_transcribe(project_path, args.verbose) + return cmd_transcribe(project_path, args.verbose, args.proxy, args.final) elif action == "align": return cmd_align(project_path, args.verbose) elif action == "all": @@ -145,6 +208,23 @@ Examples: ) elif action == "description": return cmd_description(project_path, args.verbose) + elif action == "archive": + return cmd_archive(project_path, args.verbose, args.dry_run) + elif action == "extract-audio": + return cmd_extract_audio( + project_path, args.verbose, args.segment, args.channel, args.combined + ) + elif action == "master": + return cmd_master(project_path, args.verbose, args.channel) + elif action == "push": + from .push import cmd_push + return cmd_push(project_path, args.verbose, args.force) + elif action == "pull": + from .pull import cmd_pull + return cmd_pull(project_path, args.verbose, args.force) + elif action == "handoff": + from .handoff import cmd_handoff + return cmd_handoff(project_path, args.verbose, args.file) except GnommoError as e: print(f"Error: {e}", file=sys.stderr) @@ -163,16 +243,33 @@ Examples: def cmd_import(project_path: Path, force: bool, verbose: bool) -> int: """Import assets and generate metadata JSON files.""" + from .parser import parse_project_config + print(f"Importing assets for: {project_path.name}") if not project_path.exists(): print(f"Error: Project directory not found: {project_path}", file=sys.stderr) return 1 + # Load project config if it exists (for videos_path and default_filters) + config = None + if (project_path / "project.json").exists(): + config = parse_project_config(project_path) + # Import videos from media/videos directory - videos_dir = project_path / "media" / "videos" + if config and config.videos_path: + videos_json_path = project_path / config.videos_path + videos_dir = videos_json_path.parent + else: + videos_dir = project_path / "media" / "videos" + if videos_dir.exists(): - _import_videos(videos_dir, verbose) + _import_videos(videos_dir, config, verbose) + + # Import narration segments from media/narration directory + narration_dir = project_path / "media" / "narration" + if narration_dir.exists(): + _import_narration_segments(narration_dir, config, verbose) # Import presenter notes from Keynote file (also exports slide PNGs) keynote_files = list(project_path.glob("*.key")) @@ -343,25 +440,32 @@ def _generate_slides_json(directory: Path, verbose: bool) -> None: print(f" [{slide_id}]") -def _import_videos(videos_dir: Path, verbose: bool) -> None: +def _import_videos(videos_dir: Path, config, verbose: bool) -> None: """Import video files into videos.json. Scans the videos directory for video files and adds them to videos.json. Uses the filename (without extension) as the video_id. Does not overwrite existing entries - only adds new ones. + + If the video filename matches a pattern like 'talkinghead*' and a 'talkinghead' + filter preset exists in default_filters, it will be applied automatically. """ video_extensions = {".mov", ".mp4", ".webm", ".avi", ".mkv", ".m4v"} - # Find all video files (exclude processed outputs and files in subdirs) + # Find all video files (exclude processed outputs, proxies, and intermediate files) video_files = [ f for f in videos_dir.iterdir() if f.is_file() and f.suffix.lower() in video_extensions - and not f.name.endswith("_processed.mov") - and not f.name.endswith("_processed.webm") + and "_processed" not in f.stem # Exclude any _processed files + and "_fixed" not in f.stem # Exclude any _fixed files + and not f.name.startswith("narration_combined") ] + # Also exclude files in subdirectories (proxy/, intermediate/, etc.) + video_files = [f for f in video_files if f.parent == videos_dir] + if not video_files: if verbose: print(f" No new video files found in {videos_dir}") @@ -374,6 +478,9 @@ def _import_videos(videos_dir: Path, verbose: bool) -> None: with open(videos_json_path, "r", encoding="utf-8") as f: existing_videos = json.load(f) + # Get available filter presets from config + default_filters = config.default_filters if config else {} + # Add new videos (don't overwrite existing) added_count = 0 for video_file in sorted(video_files): @@ -382,28 +489,119 @@ def _import_videos(videos_dir: Path, verbose: bool) -> None: if video_id in existing_videos: if verbose: - print( - f" Skipping {video_id} (already exists). Change manually if needed" - ) + print(f" Skipping {video_id} (already exists)") continue - existing_videos[video_id] = { + # Determine if this is a talking head segment + # Match patterns like: talkinghead, talkingheadS01, talkinghead_s01, etc. + is_narration_combined = "narration_combined" in video_file.stem.lower() + # Build the video entry + video_entry = { "source_file": video_file.name, - "output_file": video_file.name, - "cutout": "square", - "filter": [], } + + if is_narration_combined: + video_entry["output_file"] = None + video_entry["cutout"] = "talkinghead" + video_entry["always_visible"] = True + video_entry["skip"] = 0 + video_entry["filter"] = [] + print(f" Added talking head segment: {video_id}") + else: + # Regular video + + video_entry["output_file"] = video_file.name + video_entry["cutout"] = "square" + video_entry["filter"] = [] + if verbose: + print(f" Added: {video_id}") + + existing_videos[video_id] = video_entry added_count += 1 - if verbose: - print(f" Added: {video_id}") if added_count > 0: # Write updated videos.json with open(videos_json_path, "w", encoding="utf-8") as f: json.dump(existing_videos, f, indent=2) - print(f" Updated {videos_json_path} (+{added_count} videos)") + print(f" Updated {videos_json_path.name} (+{added_count} videos)") else: - print(f" No new videos to add in {videos_dir}") + print(f" No new videos to add") + + +def _import_narration_segments(narration_dir: Path, config, verbose: bool) -> None: + """Import narration video files into narration.json. + + Scans the narration directory for video files and adds them to narration.json. + Uses the filename (without extension) as the segment_id. + Does not overwrite existing entries - only adds new ones. + + If a 'talkinghead' filter preset exists in default_filters, it will be + applied automatically to all narration segments. + """ + video_extensions = {".mov", ".mp4", ".webm", ".avi", ".mkv", ".m4v"} + + # Find all video files (exclude processed outputs and combined files) + video_files = [ + f + for f in narration_dir.iterdir() + if f.is_file() + and f.suffix.lower() in video_extensions + and "_processed" not in f.stem # Exclude any _processed files + and not f.name.startswith("narration_combined") + ] + + if not video_files: + if verbose: + print(f" No narration files found in {narration_dir}") + return + + # Load existing narration.json if it exists + narration_json_path = narration_dir / "narration.json" + existing_narration: dict = {} + if narration_json_path.exists(): + with open(narration_json_path, "r", encoding="utf-8") as f: + existing_narration = json.load(f) + + # Get available filter presets from config + default_filters = config.default_filters if config else {} + + # Add new segments (don't overwrite existing) + added_count = 0 + for video_file in sorted(video_files): + # Use filename without extension as segment_id + segment_id = video_file.stem + + if segment_id in existing_narration: + if verbose: + print(f" Skipping {segment_id} (already exists)") + continue + + # Build the narration entry + narration_entry = { + "source_file": video_file.name, + "output_file": f"{video_file.stem}_processed.mov", + } + + # Apply talkinghead preset if available + if "talkinghead" in default_filters: + narration_entry["filter"] = "talkinghead" + narration_entry["cutout"] = "talkinghead" + + # Default audio settings for narration + narration_entry["use_audio_channels"] = "left" + narration_entry["defer_loudnorm"] = True + + existing_narration[segment_id] = narration_entry + added_count += 1 + print(f" Added narration segment: {segment_id}") + + if added_count > 0: + # Write updated narration.json + with open(narration_json_path, "w", encoding="utf-8") as f: + json.dump(existing_narration, f, indent=2) + print(f" Updated narration.json (+{added_count} segments)") + else: + print(f" No new narration segments to add") def _import_presenter_notes( @@ -531,17 +729,38 @@ def cmd_validate(project_path: Path, verbose: bool) -> int: def cmd_preprocess( - project_path: Path, verbose: bool, dry_run: bool, force: bool = False, workers: int = 1 + project_path: Path, + verbose: bool, + dry_run: bool, + force: bool = False, + workers: int = 1, + proxy: bool = False, ) -> int: - """Run preprocessing pipeline on video sources.""" + """Run preprocessing pipeline on narration segments.""" from concurrent.futures import ThreadPoolExecutor, as_completed - from .parser import parse_project_config, parse_videos - from .preprocessor import preprocess_video + from .parser import parse_project_config, parse_narration + from .preprocessor import ( + preprocess_video, + create_proxies_for_videos, + ) - print(f"Preprocessing: {project_path.name}") + mode_str = " (PROXY MODE)" if proxy else "" + print(f"Preprocessing narration: {project_path.name}{mode_str}") config = parse_project_config(project_path) - videos, videos_dir = parse_videos(project_path, config) + narration, narration_dir = parse_narration(project_path, config) + + if not narration: + print(" No narration segments found in media/narration/narration.json") + print(" Run 'gnommo -p import' first to populate narration.json") + return 1 + + # Proxy mode: create low-res copies first, then work from proxy dir + if proxy: + proxy_dir = create_proxies_for_videos(narration_dir, narration, force, verbose) + # Switch to proxy directory for all subsequent operations + narration_dir = proxy_dir + print(f" Working from proxy dir: {proxy_dir}") # Resolve intermediate directory gnommo_scratch = None @@ -551,66 +770,196 @@ def cmd_preprocess( gnommo_scratch = project_path / gnommo_scratch print(f" Using intermediate dir: {gnommo_scratch}") - # Filter videos that need preprocessing - videos_to_process = [] - for video_id, video_source in videos.items(): - if not video_source.filter: - print(f" {video_id}: No filters defined, skipping.") + # Filter segments that need preprocessing + segments_to_process = [] + for segment_id, segment_source in narration.items(): + if not segment_source.filter: + if verbose: + print(f" {segment_id}: No filters defined, skipping.") continue - videos_to_process.append((video_id, video_source)) + segments_to_process.append((segment_id, segment_source)) - if not videos_to_process: - print("\nNo videos to preprocess.") + if not segments_to_process: + print("\nNo narration segments to preprocess.") return 0 if dry_run: - for video_id, video_source in videos_to_process: - print(f"\n Would preprocess: {video_id}") - print(f" Source: {video_source.source_file}") - print(f" Output: {video_source.output_file or 'N/A'}") - for step in video_source.filter: + for segment_id, segment_source in segments_to_process: + print(f"\n Would preprocess: {segment_id}") + print(f" Source: {segment_source.source_file}") + print(f" Output: {segment_source.output_file or 'N/A'}") + for step in segment_source.filter: print(f" - {step}") return 0 - # Process videos - if workers > 1 and len(videos_to_process) > 1: + # Process segments + if workers > 1 and len(segments_to_process) > 1: # Parallel processing - num_workers = min(workers, len(videos_to_process)) - print(f"\n Processing {len(videos_to_process)} videos in parallel ({num_workers} workers)") + num_workers = min(workers, len(segments_to_process)) + print( + f"\n Processing {len(segments_to_process)} segments in parallel ({num_workers} workers)" + ) - def process_video_task(task): - video_id, video_source = task + def process_segment_task(task): + segment_id, segment_source = task preprocess_video( - videos_dir, video_id, video_source, verbose=False, force=force, - custom_gnommo_scratch=gnommo_scratch + narration_dir, + segment_id, + segment_source, + verbose=False, + force=force, + custom_gnommo_scratch=gnommo_scratch, ) - return video_id + return segment_id completed = 0 with ThreadPoolExecutor(max_workers=num_workers) as executor: - futures = {executor.submit(process_video_task, task): task for task in videos_to_process} + futures = { + executor.submit(process_segment_task, task): task + for task in segments_to_process + } for future in as_completed(futures): - video_id = future.result() + segment_id = future.result() completed += 1 - print(f" Completed: {video_id} ({completed}/{len(videos_to_process)})") + print(f" Completed: {segment_id} ({completed}/{len(segments_to_process)})") else: # Sequential processing - for video_id, video_source in videos_to_process: - print(f"\n Processing: {video_id}") - if video_source.take: - print(f" Taking only {video_source.take} seconds") - print(f" Source file: {video_source.source_file}") - print(f" Output file: {video_source.output_file or 'N/A'}") - print(f" Filters: {len(video_source.filter)} step(s)") + for segment_id, segment_source in segments_to_process: + print(f"\n Processing: {segment_id}") + print(f" Source file: {segment_source.source_file}") + print(f" Output file: {segment_source.output_file or 'N/A'}") + print(f" Filters: {len(segment_source.filter)} step(s)") preprocess_video( - videos_dir, video_id, video_source, verbose, force, gnommo_scratch + narration_dir, segment_id, segment_source, verbose, force, gnommo_scratch ) + print(f"\n Run 'gnommo -p stitch' to stitch narration segments into one fulll length narration file.") print("\nPreprocessing complete.") return 0 +# ============================================================================= +# Stitch Command (fast iteration on narration segments) +# ============================================================================= + + +def cmd_stitch( + project_path: Path, + verbose: bool, + force: bool = False, + proxy: bool = False, +) -> int: + """ + Stitch narration segments from narration.json. + + Reads segments from media/narration/narration.json, applies begin/end + trimming during concatenation, and writes output to media/videos/narration_combined.mov. + Also creates/updates an entry in videos.json with volume property. + + This is useful for quickly iterating on begin/end trim points without + waiting for the full preprocessing pipeline. Works especially well + with --proxy for fast feedback. + """ + from .parser import parse_project_config, parse_narration, parse_videos + from .preprocessor import stitch_narration_segments, ensure_proxy_files_exist + + mode_str = " (PROXY MODE)" if proxy else "" + print(f"Stitching narration: {project_path.name}{mode_str}") + + config = parse_project_config(project_path) + narration, narration_dir = parse_narration(project_path, config) + + if not narration: + print(" No narration segments found in media/narration/narration.json") + print(" Run 'gnommo -p import' first to populate narration.json") + return 1 + + # Get videos_dir for output + if config and config.videos_path: + videos_json_path = project_path / config.videos_path + videos_dir = videos_json_path.parent + else: + videos_dir = project_path / "media" / "videos" + + # Proxy mode: use proxy directory for both input and output + # Create proxy files on-the-fly if they don't exist + if proxy: + proxy_narration_dir = ensure_proxy_files_exist(narration_dir, force=False, verbose=verbose) + proxy_videos_dir = videos_dir / "proxy" + proxy_videos_dir.mkdir(parents=True, exist_ok=True) + narration_dir = proxy_narration_dir + videos_dir = proxy_videos_dir + print(f" Using proxy dirs: {narration_dir}, {videos_dir}") + + # Get segment IDs in sorted order + segment_ids = sorted(narration.keys()) + + # Show what we're stitching + print(f"\n Segments ({len(segment_ids)}):") + for segment_id in segment_ids: + seg = narration[segment_id] + skip_str = f"skip={seg.skip:.1f}s" if seg.skip else "" + take_str = f"take={seg.take:.1f}s" if seg.take else "" + trim_info = ", ".join(filter(None, [skip_str, take_str])) + trim_str = f" ({trim_info})" if trim_info else "" + print(f" - {segment_id}{trim_str}") + + stitch_output = videos_dir / "narration_combined.mov" + + if stitch_output.exists() and not force: + print(f"\n Combined narration exists: {stitch_output.name}") + print(" (use --force to regenerate)") + return 0 + + stitch_narration_segments( + narration_dir, + segment_ids, + narration, + stitch_output, + verbose=verbose, + ) + + # Run import videos again, because at this point narration_combined might have been created. + _import_videos(videos_dir, config, verbose) + + # Always update the MAIN videos.json (parent directory when in proxy mode) + # Proxy mode only affects file paths, not JSON metadata updates + main_videos_dir = videos_dir.parent if proxy else videos_dir + videos_json_path = main_videos_dir / "videos.json" + if True: # Always update JSON regardless of proxy mode + existing_videos: dict = {} + if videos_json_path.exists(): + with open(videos_json_path, "r", encoding="utf-8") as f: + existing_videos = json.load(f) + + # Get cutout from first narration segment + first_seg = narration[segment_ids[0]] + cutout = first_seg.cutout or "talkinghead" + + # Create/update narration_combined entry + existing_videos["narration_combined"] = { + "source_file": "narration_combined.mov", + "output_file": "narration_combined.mov", + "cutout": cutout, + "always_visible": True, + "volume": 1.0, + } + + with open(videos_json_path, "w", encoding="utf-8") as f: + json.dump(existing_videos, f, indent=2) + print(f"\n Updated videos.json with narration_combined entry (volume=1.0)") + print(" Edit videos.json to adjust volume if needed.") + + print("\nConcatenation complete.") + + # Automatically transcribe to keep transcript in sync with narration + print("\n" + "=" * 60) + print("Auto-running transcribe to sync with new narration...") + print("=" * 60 + "\n") + return cmd_transcribe(project_path, verbose, proxy=proxy) + + # ============================================================================= # Render Command # ============================================================================= @@ -687,10 +1036,12 @@ def _print_render_plan_details(plan, marker_timings, slides: dict) -> None: else: cutout = "?" duration = 0 - print(f" {marker_id:20} {time_str} in '{cutout}' ({duration:.1f}s)") + cache_ind = " 📁" if video_id in plan.cached_files else "" + print(f" {marker_id:20} {time_str} in '{cutout}' ({duration:.1f}s){cache_ind}") elif marker_id.startswith("narration:"): video_id = marker_id[10:] - print(f" {marker_id:20} {time_str} (continuous)") + cache_ind = " 📁" if video_id in plan.cached_files else "" + print(f" {marker_id:20} {time_str} (continuous){cache_ind}") elif marker_id in CAMERA_PRESETS: print(f" {time_str} [{marker_id}]") elif marker_id.startswith("A"): @@ -718,7 +1069,9 @@ def _print_render_plan_details(plan, marker_timings, slides: dict) -> None: if plan.outro_events: print(f" Outro: {len(plan.outro_events)} video(s)") for event in plan.outro_events: - print(f" - {event.video_id}: {_format_time(event.start_time)} - {_format_time(event.end_time)}") + print( + f" - {event.video_id}: {_format_time(event.start_time)} - {_format_time(event.end_time)}" + ) print(f" Duration: {_format_time(plan.total_duration)}") @@ -748,6 +1101,7 @@ def cmd_render( slides_arg: str = None, res: str = "full", force: bool = False, + proxy: bool = False, ) -> int: """Render final video.""" from .parser import ( @@ -762,6 +1116,7 @@ def cmd_render( from .validator import validate_project from .transformer import build_render_plan from .renderer import render, generate_ffmpeg_command_string + from .preprocessor import PROXY_WIDTH, PROXY_HEIGHT, ensure_proxy_files_exist # Parse slide range if provided slide_range = None @@ -772,9 +1127,16 @@ def cmd_render( print(f"Rendering: {project_path.name}") # Show resolution mode - if res == "low": + if proxy: + print(f" Resolution: PROXY ({PROXY_WIDTH}x{PROXY_HEIGHT}) - fast preview mode") + elif res == "low": print(" Resolution: LOW (490x270) - fast preview mode") + # Show cache status + cache_info = get_cache_info() + if cache_info: + print(f" Cache: {cache_info}") + # Stage 1: Parse print("\n[1/4] Parsing...") manuscript_text, markers, malformed, citations = parse_manuscript(project_path) @@ -785,30 +1147,107 @@ def cmd_render( save_citations(citations, citations_path) config = parse_project_config(project_path) - # Override resolution for low-res preview mode - if res == "low": + # Override resolution for proxy or low-res preview mode + if proxy: + config.resolution = (PROXY_WIDTH, PROXY_HEIGHT) + elif res == "low": config.resolution = (490, 270) slides = parse_slides(project_path, config) videos, videos_dir = parse_videos(project_path, config) + + # Proxy mode: use videos from proxy directory + # Create proxy files on-the-fly if they don't exist + if proxy: + proxy_dir = ensure_proxy_files_exist(videos_dir, force=False, verbose=verbose) + videos_dir = proxy_dir + if verbose: + print(f" Using proxy dir: {proxy_dir}") audio, audio_dir = parse_audio(project_path, config) # Load whisper transcription JSON - # Look for .transcript.json next to the narration video - result = _find_narration_video(config, videos) - if result: - _, narration_source = result - video_path = videos_dir / narration_source.source_file - transcript_path = video_path.with_suffix(".transcript.json") - else: - transcript_path = project_path / "transcript.json" + # Check for narration_combined in videos.json (new workflow) or multi-segment in config (legacy) + combined_path = videos_dir / "narration_combined.mov" + # Try cache fallback for combined narration + resolved_combined, _ = resolve_with_cache(combined_path, project_path) + if "narration_combined" in videos and resolved_combined.exists(): + # New workflow: narration_combined was created by 'gnommo concat' and is in videos.json + # This entry has the correct volume setting from videos.json + transcript_path = resolved_combined.with_suffix(".transcript.json") + config.main_video = "narration_combined" + if verbose: + print(f" Using combined narration: {resolved_combined.name} (volume={videos['narration_combined'].volume})") + elif isinstance(config.main_video, list) and len(config.main_video) > 1: + # Legacy: Multi-segment narration with main_video array in project.json + resolved_combined, _ = resolve_with_cache(combined_path, project_path) + transcript_path = resolved_combined.with_suffix(".transcript.json") + if not resolved_combined.exists(): + print( + f"Error: Combined narration not found: {combined_path}", file=sys.stderr + ) + print( + "Run 'gnommo -p concat' first to concatenate segments.", + file=sys.stderr, + ) + return 1 + + # Create a synthetic video entry for the combined narration + # Inherit settings from the first segment + first_segment_id = config.main_video[0] + if first_segment_id in videos: + first_segment = videos[first_segment_id] + from .models import VideoSource + + combined_video = VideoSource( + source_file="narration_combined.mov", + filter=first_segment.filter, + output_file=None, # Already processed + cutout=first_segment.cutout, + always_visible=True, + skip=0.0, # Already trimmed during concatenation + take=None, + ) + videos["_narration_combined"] = combined_video + config.main_video = "_narration_combined" + + if verbose: + print(f" Using combined narration: {combined_path.name}") + else: + # Check if narration.json exists (new workflow) - if so, require narration_combined + narration_json = project_path / "media" / "narration" / "narration.json" + if narration_json.exists(): + print( + f"Error: narration_combined not found in videos.json", file=sys.stderr + ) + print( + f"You have narration segments in narration.json but haven't stitched them.", + file=sys.stderr, + ) + print( + f"Run 'gnommo -p {project_path.name} stitch' first.", + file=sys.stderr, + ) + return 1 + + # Single video - look for .transcript.json next to the narration video + result = _find_narration_video(config, videos) + if result: + video_id, narration_source = result + config.main_video = video_id # Ensure main_video is set to the found video + video_path = videos_dir / narration_source.source_file + transcript_path = video_path.with_suffix(".transcript.json") + else: + transcript_path = project_path / "transcript.json" + + # Try cache fallback for transcript + transcript_path, _ = resolve_with_cache(transcript_path, project_path) if not transcript_path.exists(): print(f"Error: Transcription not found: {transcript_path}", file=sys.stderr) print(f"Run 'gnommo -p {project_path.name} transcribe' first.", file=sys.stderr) return 1 - transcription = load_transcript(transcript_path) + transcription = load_transcript(transcript_path, project_path) if verbose: print(f" - Markers in manuscript: {len(markers)}") @@ -836,6 +1275,7 @@ def cmd_render( audio, audio_dir, slide_range=slide_range, + proxy=proxy, ) if plan.time_offset > 0: print(f" Time offset: {plan.time_offset:.1f}s (partial render)") @@ -847,13 +1287,18 @@ def cmd_render( for event in plan.audio_events: loop_str = " (loop)" if event.audio_def.loop else "" pause_str = " [ignores pauses]" if event.audio_def.ignore_pauses else "" - print(f" - {event.audio_id}: '{event.audio_def.file}' @ {_format_time(event.start_time)}{loop_str}{pause_str}") + print( + f" - {event.audio_id}: '{event.audio_def.file}' @ {_format_time(event.start_time)}{loop_str}{pause_str}" + ) # Show always-visible videos if plan.narration_videos: print(f"\n Always-visible videos:") for video_id, video_source, cutout in plan.narration_videos: - skip_str = f" (skip: {video_source.skip:.1f}s)" if video_source.skip > 0 else "" - print(f" - {video_id} in '{video_source.cutout}'{skip_str}") + skip_str = ( + f" (skip: {video_source.skip:.1f}s)" if video_source.skip > 0 else "" + ) + cache_ind = " 📁" if video_id in plan.cached_files else "" + print(f" - {video_id} in '{video_source.cutout}'{skip_str}{cache_ind}") # Show narration pauses if plan.narration_pauses: @@ -932,12 +1377,20 @@ def _find_narration_video(config, videos: dict) -> Optional[tuple[str, "VideoSou return None -def cmd_transcribe(project_path: Path, verbose: bool) -> int: +def cmd_transcribe( + project_path: Path, verbose: bool, proxy: bool = False, final: bool = False +) -> int: """Transcribe video audio using Whisper.""" - from .transcriber import transcribe_video, save_transcript + from .transcriber import transcribe_video, save_transcript, words_to_srt from .parser import parse_project_config, parse_videos + from .preprocessor import ensure_proxy_files_exist - print(f"Transcribing: {project_path.name}") + # Handle --final mode: transcribe the rendered output for YouTube captions + if final: + return _transcribe_final(project_path, verbose) + + mode_str = " (PROXY)" if proxy else "" + print(f"Transcribing: {project_path.name}{mode_str}") config = parse_project_config(project_path) videos, videos_dir = parse_videos(project_path, config) @@ -945,20 +1398,38 @@ def cmd_transcribe(project_path: Path, verbose: bool) -> int: print("Error: No videos defined in videos.json", file=sys.stderr) return 1 - # Find the narration video - result = _find_narration_video(config, videos) - if not result: - print("Error: No suitable video found for transcription", file=sys.stderr) - return 1 + # Proxy mode: use videos from proxy directory + # Create proxy files on-the-fly if they don't exist + if proxy: + proxy_dir = ensure_proxy_files_exist(videos_dir, force=False, verbose=verbose) + videos_dir = proxy_dir - video_id, video_source = result - video_path = videos_dir / video_source.source_file + # Check for multi-segment narration (concatenated file) + if isinstance(config.main_video, list) and len(config.main_video) > 1: + video_path = videos_dir / "narration_combined.mov" + if not video_path.exists(): + print(f"Error: Combined narration not found: {video_path}", file=sys.stderr) + print( + "Run 'gnommo -p pre' first to concatenate segments.", + file=sys.stderr, + ) + return 1 + print(f" Using combined narration: {video_path.name}") + else: + # Single video - find it using existing logic + result = _find_narration_video(config, videos) + if not result: + print("Error: No suitable video found for transcription", file=sys.stderr) + return 1 - if not video_path.exists(): - print(f"Error: Video not found: {video_path}", file=sys.stderr) - return 1 + video_id, video_source = result + video_path = videos_dir / video_source.source_file - print(f" Video: {video_path.name}") + if not video_path.exists(): + print(f"Error: Video not found: {video_path}", file=sys.stderr) + return 1 + + print(f" Video: {video_path.name}") words = transcribe_video(video_path, model="base") @@ -976,6 +1447,60 @@ def cmd_transcribe(project_path: Path, verbose: bool) -> int: return 0 +def _transcribe_final(project_path: Path, verbose: bool) -> int: + """ + Transcribe the final rendered video and generate SRT captions for YouTube. + + Looks for out/final.mp4 and creates out/final.srt suitable for upload. + """ + from .transcriber import transcribe_video, save_transcript, words_to_srt + + print(f"Transcribing final output: {project_path.name}") + + # Look for the final rendered video + out_dir = project_path / "out" + final_video = out_dir / "final.mp4" + + if not final_video.exists(): + print(f"Error: Final video not found: {final_video}", file=sys.stderr) + print(f"Run 'gnommo -p {project_path.name} render' first.", file=sys.stderr) + return 1 + + print(f" Video: {final_video.name}") + + # Transcribe with word-level timestamps + words = transcribe_video(final_video, model="base") + + if not words: + print("Error: No words transcribed from video", file=sys.stderr) + return 1 + + # Save JSON transcript + transcript_path = out_dir / "final.transcript.json" + save_transcript(words, transcript_path) + + # Generate SRT captions + srt_path = out_dir / "final.srt" + srt_content = words_to_srt(words) + srt_path.write_text(srt_content, encoding="utf-8") + + print(f" - Transcribed {len(words)} words") + print(f" - Duration: {words[-1].end:.1f}s") + print(f" - Transcript: {transcript_path}") + print(f" - Captions: {srt_path}") + + # Count caption segments + caption_count = srt_content.count("\n\n") + 1 + print(f" - Caption segments: {caption_count}") + + if verbose and words: + preview = " ".join(w.word for w in words[:15]) + print(f" - Preview: {preview}...") + + print("\nSRT file ready for YouTube upload.") + return 0 + + # ============================================================================= # Align Command # ============================================================================= @@ -985,7 +1510,14 @@ def cmd_align(project_path: Path, verbose: bool) -> int: """Preview manuscript marker alignment (no files written).""" from .transcriber import load_transcript from .transformer import align_markers_to_transcription - from .parser import parse_project_config, parse_videos, parse_slides, parse_audio, parse_manuscript, save_citations + from .parser import ( + parse_project_config, + parse_videos, + parse_slides, + parse_audio, + parse_manuscript, + save_citations, + ) print(f"Alignment preview: {project_path.name}") print(" (This is a preview - alignment happens automatically during render)") @@ -1014,13 +1546,15 @@ def cmd_align(project_path: Path, verbose: bool) -> int: video_path = videos_dir / video_source.source_file transcript_path = video_path.with_suffix(".transcript.json") + # Try cache fallback for transcript + transcript_path, _ = resolve_with_cache(transcript_path, project_path) if not transcript_path.exists(): print(f"Error: Transcription not found: {transcript_path}", file=sys.stderr) print(f"Run 'gnommo -p {project_path.name} transcribe' first.", file=sys.stderr) return 1 print(f" Loading: {transcript_path.name}") - transcription = load_transcript(transcript_path) + transcription = load_transcript(transcript_path, project_path) print(f" - {len(transcription)} words") # Align (cite markers already stripped at parse time) @@ -1091,13 +1625,15 @@ def cmd_all( video_path = videos_dir / video_source.source_file transcript_path = video_path.with_suffix(".transcript.json") - if not transcript_path.exists(): + # Try cache fallback for transcript + resolved_transcript, _ = resolve_with_cache(transcript_path, project_path) + if not resolved_transcript.exists(): print(">>> Step 1/2: Transcribe\n") result = cmd_transcribe(project_path, verbose) if result != 0: return result else: - print(f">>> Step 1/2: Transcribe (cached: {transcript_path.name})\n") + print(f">>> Step 1/2: Transcribe (cached: {resolved_transcript.name})\n") # Render (alignment happens automatically) print("\n>>> Step 2/2: Render\n") @@ -1143,13 +1679,17 @@ def cmd_description(project_path: Path, verbose: bool) -> int: _, narration_source = result video_path = videos_dir / narration_source.source_file transcript_path = video_path.with_suffix(".transcript.json") + # Try cache fallback for transcript + transcript_path, _ = resolve_with_cache(transcript_path, project_path) if transcript_path.exists(): - transcription = load_transcript(transcript_path) + transcription = load_transcript(transcript_path, project_path) if verbose: print(f" Loaded transcription: {len(transcription)} words") else: print(f" Warning: No transcription found at {transcript_path}") - print(f" Run 'gnommo -p {project_path.name} transcribe' for better timestamps.") + print( + f" Run 'gnommo -p {project_path.name} transcribe' for better timestamps." + ) # Align markers to get timings print(" Aligning markers...") @@ -1210,5 +1750,545 @@ def cmd_description(project_path: Path, verbose: bool) -> int: return 0 +def cmd_archive(project_path: Path, verbose: bool, dry_run: bool) -> int: + """Archive project files to external cache storage.""" + from .cache import load_cache_config + + print(f"Archiving: {project_path.name}") + + # Check cache is configured + cache_base = load_cache_config() + if cache_base is None: + print("Error: Cache not configured. Create ~/.gnommo.conf with:") + print(" [cache]") + print(" path = /Volumes/YourDisk/gnommo") + return 1 + + if not cache_base.exists(): + print(f"Error: Cache path not accessible: {cache_base}") + print("Make sure the external drive is connected.") + return 1 + + # Build destination path + dest_path = cache_base / project_path.name + print(f" Source: {project_path}") + print(f" Destination: {dest_path}") + + # Create destination if needed + if not dry_run: + dest_path.mkdir(parents=True, exist_ok=True) + + # Use rsync to sync media files + # -a: archive mode (preserves permissions, timestamps, etc.) + # -v: verbose + # --progress: show progress + # --exclude: skip files we don't want to sync + rsync_cmd = [ + "rsync", + "-av", + "--progress", + "--exclude=*.py", + "--exclude=__pycache__", + "--exclude=.git", + "--exclude=.DS_Store", + f"{project_path}/", + f"{dest_path}/", + ] + + if dry_run: + rsync_cmd.insert(1, "--dry-run") + print("\n [DRY RUN] Would execute:") + print(f" {' '.join(rsync_cmd)}") + else: + print("\n Syncing files...") + + if verbose: + print(f" Command: {' '.join(rsync_cmd)}") + + result = subprocess.run(rsync_cmd) + if result.returncode != 0: + print(f"Error: rsync failed with code {result.returncode}") + return 1 + + # Update project.json with synced_time + if not dry_run: + project_json_path = project_path / "project.json" + if project_json_path.exists(): + try: + data = json.loads(project_json_path.read_text(encoding="utf-8")) + data["synced_time"] = datetime.now().isoformat() + project_json_path.write_text( + json.dumps(data, indent=2, ensure_ascii=False) + "\n", + encoding="utf-8", + ) + print(f"\n Updated project.json with synced_time: {data['synced_time']}") + except (json.JSONDecodeError, IOError) as e: + print(f"Warning: Could not update project.json: {e}") + + print("\nDone.") + return 0 + + +# ============================================================================= +# Extract Audio Command +# ============================================================================= + + +def _extract_audio_file( + source_path: Path, + output_dir: Path, + name: str, + channel: str, + verbose: bool, +) -> int: + """ + Extract audio from a single video file to WAV. + + Args: + source_path: Path to the source video file + output_dir: Directory to save the WAV file + name: Base name for the output file (without extension) + channel: "left", "right", or "both" + verbose: Print verbose output + + Returns: + 0 on success, 1 on error + """ + # Build output filename + if channel == "both": + output_name = f"{name}.wav" + else: + output_name = f"{name}_{channel}.wav" + output_path = output_dir / output_name + + print(f" Channel: {channel}") + print(f" Source: {source_path}") + print(f" Output: {output_path}") + + # Build ffmpeg command + cmd = [ + "ffmpeg", + "-y", # Overwrite + "-i", str(source_path), + "-vn", # No video + ] + + # Channel selection + if channel == "left": + cmd.extend(["-af", "pan=mono|c0=c0"]) + elif channel == "right": + cmd.extend(["-af", "pan=mono|c0=c1"]) + # "both" keeps stereo, no filter needed + + # Output format: 48kHz 16-bit WAV (standard for audio editing) + cmd.extend([ + "-ar", "48000", # 48kHz sample rate + "-acodec", "pcm_s16le", # 16-bit PCM + str(output_path), + ]) + + if verbose: + print(f" Command: {' '.join(cmd)}") + + print(f" Extracting...", end=" ", flush=True) + result = subprocess.run(cmd, capture_output=True, text=True) + if result.returncode != 0: + print(f"Error!") + print(f" {result.stderr}", file=sys.stderr) + return 1 + + # Get duration info + duration_cmd = [ + "ffprobe", "-v", "error", + "-show_entries", "format=duration", + "-of", "default=noprint_wrappers=1:nokey=1", + str(output_path), + ] + duration_result = subprocess.run(duration_cmd, capture_output=True, text=True) + duration_str = "" + if duration_result.returncode == 0: + try: + duration = float(duration_result.stdout.strip()) + duration_str = f" ({duration:.1f}s)" + except ValueError: + pass + + print(f"Done{duration_str}") + + print(f"\n Open in Audition to experiment with:") + print(f" - Effect > Noise Reduction") + print(f" - Effect > Compressor") + print(f" - Effect > Filter Curve EQ") + print(f" - Effect > Loudness Normalization") + print(f"\n Once you find good settings, update narration.json with matching filter config.") + + return 0 + + +def cmd_extract_audio( + project_path: Path, + verbose: bool, + segment: Optional[str] = None, + channel: str = "both", + combined: bool = False, +) -> int: + """ + Extract audio from narration segments to WAV files for editing in Audacity. + + This allows you to experiment with audio processing settings (EQ, compression, + noise reduction) in external software before applying them in the pipeline. + + Args: + project_path: Path to the project directory + verbose: Enable verbose output + segment: Specific segment ID to extract, or None for all segments + channel: Which channel(s) to extract: "left", "right", or "both" + combined: If True, extract from narration_combined.mov instead of segments + """ + from .parser import parse_project_config, parse_narration, parse_videos + + print(f"Extracting audio: {project_path.name}") + + config = parse_project_config(project_path) + + # Handle --combined mode: extract from narration_combined.mov + if combined: + videos, videos_dir = parse_videos(project_path, config) + combined_path = videos_dir / "narration_combined.mov" + + if not combined_path.exists(): + print(f"Error: narration_combined.mov not found at {combined_path}", file=sys.stderr) + print("Run 'gnommo -p stitch' first.", file=sys.stderr) + return 1 + + # Output to project out/ directory + audio_dir = project_path / "out" + audio_dir.mkdir(parents=True, exist_ok=True) + + return _extract_audio_file( + combined_path, audio_dir, "narration_combined", channel, verbose + ) + + # Normal mode: extract from individual segments + narration, narration_dir = parse_narration(project_path, config) + + if not narration: + print(" No narration segments found in media/narration/narration.json") + print(" Run 'gnommo -p import' first to populate narration.json") + return 1 + + # Create output directory + audio_dir = narration_dir / "audio" + audio_dir.mkdir(parents=True, exist_ok=True) + + # Determine which segments to process + if segment: + if segment not in narration: + print(f"Error: Segment '{segment}' not found in narration.json", file=sys.stderr) + print(f"Available segments: {', '.join(sorted(narration.keys()))}", file=sys.stderr) + return 1 + segments_to_process = [(segment, narration[segment])] + else: + segments_to_process = sorted(narration.items()) + + print(f" Channel: {channel}") + print(f" Output: {audio_dir}/") + print(f" Segments: {len(segments_to_process)}") + + # Process each segment + for segment_id, segment_source in segments_to_process: + source_path = narration_dir / segment_source.source_file + if not source_path.exists(): + print(f" Warning: Source not found: {source_path.name}, skipping") + continue + + # Build output filename + if channel == "both": + output_name = f"{segment_id}.wav" + else: + output_name = f"{segment_id}_{channel}.wav" + output_path = audio_dir / output_name + + print(f"\n {segment_id}:") + print(f" Source: {source_path.name}") + print(f" Output: {output_name}") + + # Build ffmpeg command + cmd = [ + "ffmpeg", + "-y", # Overwrite + "-i", str(source_path), + "-vn", # No video + ] + + # Channel selection + if channel == "left": + cmd.extend(["-af", "pan=mono|c0=c0"]) + elif channel == "right": + cmd.extend(["-af", "pan=mono|c0=c1"]) + # "both" keeps stereo, no filter needed + + # Output format: 48kHz 16-bit WAV (standard for audio editing) + cmd.extend([ + "-ar", "48000", # 48kHz sample rate + "-acodec", "pcm_s16le", # 16-bit PCM + str(output_path), + ]) + + if verbose: + print(f" Command: {' '.join(cmd)}") + + result = subprocess.run(cmd, capture_output=True, text=True) + if result.returncode != 0: + print(f" Error: {result.stderr}", file=sys.stderr) + return 1 + + # Get duration info + duration_cmd = [ + "ffprobe", "-v", "error", + "-show_entries", "format=duration", + "-of", "default=noprint_wrappers=1:nokey=1", + str(output_path), + ] + duration_result = subprocess.run(duration_cmd, capture_output=True, text=True) + if duration_result.returncode == 0: + try: + duration = float(duration_result.stdout.strip()) + print(f" Duration: {duration:.1f}s") + except ValueError: + pass + + print(f" Done") + + print(f"\n Audio files saved to: {audio_dir}") + print(f"\n Open in Audacity to experiment with:") + print(f" - Effect > Noise Reduction") + print(f" - Effect > Compressor") + print(f" - Effect > Filter Curve EQ") + print(f" - Effect > Loudness Normalization") + print(f"\n Once you find good settings, update narration.json with matching filter config.") + + return 0 + + +# ============================================================================= +# Master Command (A/B audio comparison) +# ============================================================================= + + +def cmd_master( + project_path: Path, + verbose: bool, + channel: str = "both", +) -> int: + """ + Extract raw and processed audio from narration_combined for A/B comparison. + + Outputs: + out/narration_combined.wav - Raw audio (no processing) + out/narration_combined_processed.wav - With audio filters applied + + This lets you compare the effect of your audio processing chain. + """ + from .parser import parse_project_config, parse_videos + from .preprocessor import parse_audio_normalize_config + + print(f"Audio mastering: {project_path.name}") + + config = parse_project_config(project_path) + videos, videos_dir = parse_videos(project_path, config) + + # Find narration_combined.mov + combined_path = videos_dir / "narration_combined.mov" + if not combined_path.exists(): + print(f"Error: narration_combined.mov not found at {combined_path}", file=sys.stderr) + print("Run 'gnommo -p stitch' first.", file=sys.stderr) + return 1 + + # Output directory + out_dir = project_path / "out" + out_dir.mkdir(parents=True, exist_ok=True) + + raw_output = out_dir / "narration_combined.wav" + processed_output = out_dir / "narration_combined_processed.wav" + + # Find audio_normalize config from default_filters + audio_config = None + if config.default_filters: + for preset_name, filters in config.default_filters.items(): + for f in filters: + if f.get("type") == "audio_normalize": + audio_config = f + print(f" Using audio config from: default_filters.{preset_name}") + break + if audio_config: + break + + if not audio_config: + print(" Warning: No audio_normalize filter found in default_filters") + print(" Will only extract raw audio.") + + # Build channel filter + channel_filter = "" + if channel == "left": + channel_filter = "pan=mono|c0=c0," + elif channel == "right": + channel_filter = "pan=mono|c0=c1," + + # Step 1: Extract raw audio + print(f"\n Extracting raw audio...") + raw_cmd = [ + "ffmpeg", "-y", + "-i", str(combined_path), + "-vn", + ] + if channel_filter: + raw_cmd.extend(["-af", channel_filter.rstrip(",")]) + raw_cmd.extend([ + "-ar", "48000", + "-acodec", "pcm_s16le", + str(raw_output), + ]) + + if verbose: + print(f" Command: {' '.join(raw_cmd)}") + + result = subprocess.run(raw_cmd, capture_output=True, text=True) + if result.returncode != 0: + print(f" Error extracting raw audio: {result.stderr}", file=sys.stderr) + return 1 + print(f" Saved: {raw_output.name}") + + # Step 2: Extract processed audio (if we have config) + if audio_config: + print(f"\n Applying audio filters...") + cfg = parse_audio_normalize_config(audio_config) + + # Build filter chain (same order as apply_audio_normalize) + audio_filters = [] + + # Channel mapping + if channel_filter: + audio_filters.append(channel_filter.rstrip(",")) + + # EQ bands + for band in cfg.eq_bands: + if band.type == "lowshelf": + audio_filters.append( + f"lowshelf=f={band.freq:.1f}:g={band.gain:.1f}:t=q:w={band.q:.2f}" + ) + elif band.type == "highshelf": + audio_filters.append( + f"highshelf=f={band.freq:.1f}:g={band.gain:.1f}:t=q:w={band.q:.2f}" + ) + else: + audio_filters.append( + f"equalizer=f={band.freq:.1f}:width_type=q:width={band.q:.2f}:g={band.gain:.1f}" + ) + + # High-pass + if cfg.highpass > 0: + audio_filters.append(f"highpass=f={cfg.highpass:.1f}") + + # Low-pass + if cfg.lowpass > 0: + audio_filters.append(f"lowpass=f={cfg.lowpass:.1f}") + + # Room EQ + if cfg.room_eq: + audio_filters.append( + f"equalizer=f={cfg.room_eq_freq:.1f}:width_type=q:width={cfg.room_eq_width:.2f}:g={cfg.room_eq_gain:.1f}" + ) + + # Denoise + if cfg.denoise: + audio_filters.append(f"afftdn=nf={cfg.noise_floor:.1f}") + + # Gate + if cfg.gate: + audio_filters.append( + f"agate=threshold={cfg.gate_threshold:.1f}dB" + f":range={cfg.gate_range:.1f}dB" + f":attack={cfg.gate_attack:.1f}" + f":release={cfg.gate_release:.1f}" + ) + + # Compressor + if cfg.compress: + audio_filters.append( + f"acompressor=threshold={cfg.threshold:.1f}dB" + f":ratio={cfg.ratio:.1f}" + f":attack={cfg.attack:.1f}" + f":release={cfg.release:.1f}" + f":makeup={cfg.makeup:.1f}dB" + ) + + # Loudness normalization + if cfg.normalize: + audio_filters.append( + f"loudnorm=I={cfg.target_lufs:.1f}" + f":LRA={cfg.target_lra:.1f}" + f":TP={cfg.target_tp:.1f}" + ) + + filter_chain = ",".join(audio_filters) + + if verbose: + print(f" Filter chain: {filter_chain}") + + # Print filter summary + print(f" Filters applied:") + if cfg.eq_bands: + print(f" - EQ: {len(cfg.eq_bands)} bands") + if cfg.highpass > 0: + print(f" - Highpass: {cfg.highpass}Hz") + if cfg.denoise: + print(f" - Denoise: floor={cfg.noise_floor}dB") + if cfg.gate: + print(f" - Gate: threshold={cfg.gate_threshold}dB") + if cfg.compress: + print(f" - Compressor: ratio={cfg.ratio}:1, attack={cfg.attack}ms") + if cfg.normalize: + print(f" - Loudnorm: target={cfg.target_lufs} LUFS") + + processed_cmd = [ + "ffmpeg", "-y", + "-i", str(combined_path), + "-vn", + "-af", filter_chain, + "-ar", "48000", + "-acodec", "pcm_s16le", + str(processed_output), + ] + + if verbose: + print(f" Command: {' '.join(processed_cmd)}") + + result = subprocess.run(processed_cmd, capture_output=True, text=True) + if result.returncode != 0: + print(f" Error applying filters: {result.stderr}", file=sys.stderr) + return 1 + print(f" Saved: {processed_output.name}") + + # Get durations + def get_duration(path): + cmd = ["ffprobe", "-v", "error", "-show_entries", "format=duration", + "-of", "default=noprint_wrappers=1:nokey=1", str(path)] + r = subprocess.run(cmd, capture_output=True, text=True) + try: + return float(r.stdout.strip()) + except: + return 0 + + duration = get_duration(raw_output) + + print(f"\n Output files ({duration:.1f}s):") + print(f" {raw_output}") + print(f" {processed_output}") + print(f"\n Open both in Audition to A/B compare the processing.") + + return 0 + + if __name__ == "__main__": sys.exit(main()) diff --git a/gnommo/description.py b/gnommo/description.py index 1c88576..a4cf73c 100644 --- a/gnommo/description.py +++ b/gnommo/description.py @@ -163,7 +163,9 @@ def generate_chapters( chapters = [] # Build timing lookup - timing_lookup = {t.marker_id: t.timestamp for t in marker_timings if t.timestamp >= 0} + timing_lookup = { + t.marker_id: t.timestamp for t in marker_timings if t.timestamp >= 0 + } # Process slides in order slide_ids = sorted( diff --git a/gnommo/handoff.py b/gnommo/handoff.py new file mode 100644 index 0000000..bbe9d0a --- /dev/null +++ b/gnommo/handoff.py @@ -0,0 +1,165 @@ +"""Hand off a finished video to the gnommoweb server. + +Works for any gnommo project type: parent videos and shorts alike. + +Usage: + gnommo handoff -p video1 + gnommo handoff -p short_pixelated_universe + gnommo handoff -p video1 --file /path/to/render.mp4 + +Reads project.json for the 'output_video' field (path relative to the +project directory). Override with --file. + +On success: + - Uploads the video to MinIO via POST /api/projects/:handle/handoff + - For shorts: server auto-advances status to 'processed' + - Bumps video_version on every upload + - Updates .gnommo_sync.json with new video_version + +Configuration (from .env or environment): + GNOMMOWEB_URL Base URL (e.g. http://localhost:3001) + GNOMMOWEB_API_KEY Bearer token (CONTENT_API_KEY from gnommoweb) +""" + +import json +import os +import sys +from datetime import datetime, timezone +from pathlib import Path + +try: + import requests +except ImportError: + print("Error: 'requests' package is required. Run: pip install requests", file=sys.stderr) + sys.exit(1) + +SYNC_FILE = ".gnommo_sync.json" + + +def _load_env_file(): + env_path = Path(__file__).parent.parent / ".env" + if not env_path.exists(): + return + with open(env_path) as f: + for line in f: + line = line.strip() + if not line or line.startswith("#") or "=" not in line: + continue + key, _, value = line.partition("=") + key = key.strip() + value = value.strip().strip('"').strip("'") + if key not in os.environ: + os.environ[key] = value + + +def _read_sync(project_path: Path) -> dict: + sync_file = project_path / SYNC_FILE + if sync_file.exists(): + with open(sync_file) as f: + return json.load(f) + return {} + + +def _write_sync(project_path: Path, data: dict): + with open(project_path / SYNC_FILE, "w") as f: + json.dump(data, f, indent=2) + + +def cmd_handoff(project_path: Path, verbose: bool = False, file_override: str | None = None) -> int: + _load_env_file() + + api_url = os.environ.get("GNOMMOWEB_URL", "").rstrip("/") + api_key = os.environ.get("GNOMMOWEB_API_KEY", "") + + if not api_url: + print("Error: GNOMMOWEB_URL is not set.", file=sys.stderr) + return 1 + if not api_key: + print("Error: GNOMMOWEB_API_KEY is not set.", file=sys.stderr) + return 1 + + project_file = project_path / "project.json" + if not project_file.exists(): + print(f"Error: {project_file} not found", file=sys.stderr) + return 1 + + with open(project_file) as f: + project = json.load(f) + + project_id = project.get("id") + if not project_id: + print("Error: project.json must have an 'id' field.", file=sys.stderr) + return 1 + + # ── Resolve video file ───────────────────────────────────────────────────── + if file_override: + video_path = Path(file_override) + else: + output_video = project.get("output_video") + if not output_video: + print( + "Error: no 'output_video' field in project.json and no --file provided.", + file=sys.stderr, + ) + return 1 + video_path = project_path / output_video + + if not video_path.exists(): + print(f"Error: video file not found: {video_path}", file=sys.stderr) + return 1 + + file_size_mb = video_path.stat().st_size / (1024 * 1024) + if verbose: + print(f"Handing off {project_id} → {api_url}") + print(f" File: {video_path} ({file_size_mb:.1f} MB)") + + # ── Upload ───────────────────────────────────────────────────────────────── + try: + with open(video_path, "rb") as vf: + r = requests.post( + f"{api_url}/api/projects/{project_id}/handoff", + files={"video": (video_path.name, vf, _mime_type(video_path))}, + headers={"Authorization": f"Bearer {api_key}"}, + timeout=None, # large files may take a while + ) + r.raise_for_status() + except requests.exceptions.ConnectionError: + print(f"Error: Could not connect to {api_url}", file=sys.stderr) + return 1 + except requests.exceptions.HTTPError as e: + print(f"Error: Server returned {e.response.status_code}", file=sys.stderr) + try: + print(f" {e.response.json()}", file=sys.stderr) + except Exception: + pass + return 1 + + result = r.json() + video_version = result.get("video_version", "?") + video_url = result.get("video_url", "") + + # ── Write sync state ─────────────────────────────────────────────────────── + now_iso = datetime.now(tz=timezone.utc).isoformat(timespec="seconds") + existing_sync = _read_sync(project_path) + _write_sync(project_path, { + **existing_sync, + "last_handoff_at": now_iso, + "video_version": video_version, + "server_updated_at": result.get("asset", {}).get("updated_at", existing_sync.get("server_updated_at")), + }) + + print(f"✓ {project_id} → v{video_version} [processed]") + if video_url: + print(f" {video_url}") + + return 0 + + +def _mime_type(path: Path) -> str: + ext = path.suffix.lower() + return { + ".mp4": "video/mp4", + ".mov": "video/quicktime", + ".webm": "video/webm", + ".mkv": "video/x-matroska", + }.get(ext, "application/octet-stream") diff --git a/gnommo/models.py b/gnommo/models.py index 50e0ee4..a5f9375 100644 --- a/gnommo/models.py +++ b/gnommo/models.py @@ -2,7 +2,7 @@ from dataclasses import dataclass, field from pathlib import Path -from typing import Optional +from typing import Optional, Union @dataclass @@ -41,13 +41,18 @@ class ProjectConfig: cutouts: dict[str, CutoutDefinition] = field( default_factory=dict ) # Named zones for video placement + default_filters: dict[str, list[dict]] = field( + default_factory=dict + ) # Named filter presets that can be referenced in videos.json background: str = "" # Background image or video path (in shared_assets/) background_video: str = "" # Deprecated: use background instead slides_path: str = "slides.json" # path to slides.json relative to project videos_path: str = "videos.json" # path to videos.json relative to project audio_path: str = "audio.json" # path to audio.json relative to project audio_source: Optional[str] = None # defaults to talking head - main_video: Optional[str] = None # ID of main video (e.g., talking head) + main_video: Optional[ + Union[str, list] + ] = None # ID(s) of main video(s) - array for multi-segment narration gnommo_scratch: Optional[ str ] = None # directory for intermediate files (e.g., external SSD) @@ -165,6 +170,16 @@ class ColorGradeConfig: curves_master: str = "" # Master (luminance) curve +@dataclass +class EQBand: + """A single parametric EQ band.""" + + freq: float # Center frequency in Hz + gain: float # Gain in dB (negative = cut, positive = boost) + q: float = 1.0 # Q factor (bandwidth), higher = narrower + type: str = "peak" # "peak", "lowshelf", or "highshelf" + + @dataclass class AudioNormalizeConfig: """Configuration for audio normalization filter. @@ -173,9 +188,43 @@ class AudioNormalizeConfig: to improve audio quality and consistency. """ + # Parametric EQ bands (applied before other processing) + eq_bands: list[EQBand] = field(default_factory=list) + + # High-pass filter (remove room rumble) + highpass: float = ( + 0.0 # High-pass frequency in Hz (0 = disabled, try 80-120 for voice) + ) + + # Low-pass filter (remove harsh highs) + lowpass: float = ( + 0.0 # Low-pass frequency in Hz (0 = disabled, try 12000-16000 if needed) + ) + + # Room resonance EQ cut (reduce muddy room buildup) + room_eq: bool = False # Enable room resonance cut + room_eq_freq: float = 300.0 # Center frequency for room cut (Hz, typically 200-400) + room_eq_gain: float = -4.0 # Gain in dB (negative = cut) + room_eq_width: float = 1.5 # Q/bandwidth (higher = narrower cut) + + # Noise gate (reduce reverb tails during pauses) + gate: bool = False # Enable noise gate + gate_threshold: float = -35.0 # Threshold in dB (signal below this gets attenuated) + gate_range: float = -20.0 # Attenuation amount in dB when gate is closed + gate_attack: float = 10.0 # Attack time in ms + gate_release: float = 150.0 # Release time in ms + + # Neural de-reverb (arnndn filter - very effective but needs model file) + dereverb_model: str = "" # Path to RNNoise model file (empty = disabled) + dereverb_mix: float = ( + 0.8 # Mix ratio 0.0-1.0 (1.0 = full effect, 0.8 = preserve some natural room) + ) + # Noise reduction (afftdn filter) denoise: bool = True # Enable noise reduction - noise_floor: float = -25.0 # Noise floor in dB (default -25, lower = more aggressive) + noise_floor: float = ( + -25.0 + ) # Noise floor in dB (default -25, lower = more aggressive) # Compression (acompressor filter) compress: bool = True # Enable dynamic range compression @@ -187,7 +236,9 @@ class AudioNormalizeConfig: # Loudness normalization (loudnorm filter - EBU R128) normalize: bool = True # Enable loudness normalization - target_lufs: float = -16.0 # Target integrated loudness (YouTube recommends -14 to -16) + target_lufs: float = ( + -16.0 + ) # Target integrated loudness (YouTube recommends -14 to -16) target_lra: float = 11.0 # Target loudness range target_tp: float = -1.5 # Target true peak in dB @@ -234,7 +285,13 @@ class VideoSource: 0.0 # Seconds to pause narration during this video (0 = no pause) ) attribution: Optional[Attribution] = None # Attribution for stock footage - use_audio_channels: str = "both" # Audio channel selection: "both", "left", or "right" + use_audio_channels: str = ( + "both" # Audio channel selection: "both", "left", or "right" + ) + defer_loudnorm: bool = ( + False # If True, skip loudnorm during preprocessing (apply after concatenation) + ) + volume: float = 1.0 # Volume multiplier (1.0=full, >1.0=boost, <1.0=reduce) @dataclass @@ -270,7 +327,10 @@ class AudioDefinition: file: str # Audio filename (relative to audio.json location) volume: float = 1.0 # Volume multiplier (0.0-1.0) loop: bool = False # If True, loop for entire duration from trigger point - ignore_pauses: bool = False # If True, audio continues playing during narration pauses + overlap: Optional[float] = None # Crossfade overlap in seconds when looping + ignore_pauses: bool = ( + False # If True, audio continues playing during narration pauses + ) @dataclass @@ -441,6 +501,10 @@ class RenderPlan: default_factory=list ) # Videos that play after narration ends narration_end_time: float = 0.0 # When narration ends (before outro starts) + # GnommoCache support + cached_files: set = field( + default_factory=set + ) # Video IDs loaded from external cache (show 📁 indicator) # Slide layout configurations (hardcoded for POC) diff --git a/gnommo/parser.py b/gnommo/parser.py index 5ef05bb..63abcc5 100644 --- a/gnommo/parser.py +++ b/gnommo/parser.py @@ -5,6 +5,7 @@ import re from pathlib import Path from typing import Any, Optional +from .cache import resolve_with_cache from .errors import ParseError from .models import ( Attribution, @@ -24,8 +25,9 @@ def parse_manuscript( """ Parse manuscript.txt and extract text content and slide markers. - Strips [cite:...] markers from the returned text so they never pollute - alignment contexts. Citations are extracted and returned separately. + Strips [cite:...] and [marker:...] markers from the returned text so they + never pollute alignment contexts. Citations are extracted and returned + separately. Marker cues are personal recording notes and are simply discarded. Returns: Tuple of (full text, list of marker IDs found, list of malformed markers, list of citations) @@ -43,6 +45,10 @@ def parse_manuscript( # Strip [cite:...] markers from text so they don't pollute alignment text = re.sub(r"\[cite:[^\]]+\]", "", text) + # Strip [marker:...] and [cue:...] markers (personal recording cues, ignored by pipeline) + text = re.sub(r"\[marker:[^\]]+\]", "", text) + text = re.sub(r"\[cue:[^\]]+\]", "", text) + # Extract all valid markers like [S1], [video:demo], [Zoom2], etc. # Include . in pattern to catch markers with file extensions (so validator can warn about them) markers = re.findall(r"\[([A-Za-z0-9_:.]+)\]", text) @@ -118,10 +124,7 @@ def parse_citations(manuscript_text: str) -> list[Citation]: def save_citations(citations: list[Citation], path: Path) -> None: """Save citations to a JSON file.""" - data = [ - {"reference": c.reference, "context": c.context} - for c in citations - ] + data = [{"reference": c.reference, "context": c.context} for c in citations] path.write_text(json.dumps(data, indent=2), encoding="utf-8") @@ -179,11 +182,15 @@ def parse_project_config(project_path: Path) -> ProjectConfig: if not isinstance(resolution, list) or len(resolution) != 2: raise ParseError("resolution must be [width, height]", config_path) + # Parse default_filters (named filter presets) + default_filters: dict[str, list[dict]] = data.get("default_filters", {}) + return ProjectConfig( resolution=tuple(resolution), fps=data.get("fps", 30), default_slide_type=data.get("defaultSlideType", "square"), cutouts=cutouts, + default_filters=default_filters, background=data.get("background", ""), background_video=data.get("background_video", ""), # Deprecated slides_path=data.get("slides", "slides.json"), @@ -220,12 +227,14 @@ def parse_slides( ) -> dict[str, SlideDefinition]: """Parse slides.json into slide definitions.""" if config and config.slides_path: - slides_path = project_path / config.slides_path + local_slides_path = project_path / config.slides_path else: - slides_path = project_path / "slides.json" + local_slides_path = project_path / "slides.json" + # Try cache fallback for reading JSON + slides_path, _ = resolve_with_cache(local_slides_path, project_path) if not slides_path.exists(): - raise ParseError(f"slides file not found: {slides_path}", slides_path) + raise ParseError(f"slides file not found: {local_slides_path}", local_slides_path) try: data = json.loads(slides_path.read_text(encoding="utf-8")) @@ -257,15 +266,19 @@ def parse_audio( containing audio.json (for resolving relative file paths). """ if config and config.audio_path: - audio_path = project_path / config.audio_path + local_audio_path = project_path / config.audio_path else: - audio_path = project_path / "audio.json" + local_audio_path = project_path / "audio.json" + + # Keep local directory for file lookups (cache fallback handles resolution) + audio_dir = local_audio_path.parent + + # Try cache fallback for reading JSON + audio_path, _ = resolve_with_cache(local_audio_path, project_path) # Audio is optional - return empty dict if not found if not audio_path.exists(): - return {}, project_path - - audio_dir = audio_path.parent + return {}, audio_dir try: data = json.loads(audio_path.read_text(encoding="utf-8")) @@ -278,41 +291,102 @@ def parse_audio( raise ParseError( f"Audio '{audio_id}' missing required field 'file'", audio_path ) + # Parse overlap if specified (timestamp string like "10s") + overlap = None + if "overlap" in audio_data and audio_data["overlap"]: + overlap = parse_timestamp(audio_data["overlap"]) + audio[audio_id] = AudioDefinition( file=audio_data["file"], volume=float(audio_data.get("volume", 1.0)), loop=bool(audio_data.get("loop", False)), + overlap=overlap, ignore_pauses=bool(audio_data.get("ignore_pauses", False)), ) return audio, audio_dir +def parse_timestamp(value: str) -> float: + """ + Parse a timestamp string into seconds. + + Supported formats: + - "3.5s" or "3.5" → 3.5 seconds + - "2:54" → 2 minutes 54 seconds (174.0) + - "1:23:45" → 1 hour 23 minutes 45 seconds + - "2:54.5" → 2 minutes 54.5 seconds + + Returns: + Time in seconds as a float. + """ + if not value: + return 0.0 + + value = value.strip() + + # Remove trailing 's' if present (e.g., "3.5s") + if value.endswith("s"): + value = value[:-1] + + # Check for colon-separated format (MM:SS or HH:MM:SS) + if ":" in value: + parts = value.split(":") + if len(parts) == 2: + # MM:SS format + minutes, seconds = parts + return float(minutes) * 60 + float(seconds) + elif len(parts) == 3: + # HH:MM:SS format + hours, minutes, seconds = parts + return float(hours) * 3600 + float(minutes) * 60 + float(seconds) + else: + raise ParseError(f"Invalid timestamp format: {value}", None) + + # Plain number (seconds) + return float(value) + + def parse_videos( project_path: Path, config: Optional[ProjectConfig] = None ) -> tuple[dict[str, VideoSource], Path]: """ Parse videos.json into video source definitions. + Filter can be specified as: + - A list of filter configs (inline definition) + - A string referencing a named preset in config.default_filters + + Trim points can be specified as: + - skip/take: raw values in seconds (traditional) + - begin/end: timestamp strings like "3.5s", "2:54", "1:23:45" (user-friendly) + These are converted to skip/take internally. + Returns: Tuple of (videos dict, videos_dir) where videos_dir is the directory containing videos.json (for resolving relative file paths). """ if config and config.videos_path: - videos_path = project_path / config.videos_path + local_videos_path = project_path / config.videos_path else: - videos_path = project_path / "videos.json" + local_videos_path = project_path / "videos.json" + # Keep local directory for file lookups (cache fallback handles resolution) + videos_dir = local_videos_path.parent + + # Try cache fallback for reading JSON + videos_path, _ = resolve_with_cache(local_videos_path, project_path) if not videos_path.exists(): - raise ParseError(f"videos.json not found: {videos_path}", videos_path) - - videos_dir = videos_path.parent + raise ParseError(f"videos.json not found: {local_videos_path}", local_videos_path) try: data = json.loads(videos_path.read_text(encoding="utf-8")) except json.JSONDecodeError as e: raise ParseError(f"Invalid JSON: {e}", videos_path) + # Get default_filters from config for resolving references + default_filters = config.default_filters if config else {} + videos = {} for video_id, video_data in data.items(): if "source_file" not in video_data: @@ -330,12 +404,39 @@ def parse_videos( url=attr_data.get("url"), ) + # Resolve filter - can be a list or a string reference to default_filters + filter_value = video_data.get("filter", []) + if isinstance(filter_value, str): + # It's a reference to a named filter preset + if filter_value not in default_filters: + raise ParseError( + f"Video '{video_id}' references unknown filter preset '{filter_value}'. " + f"Available presets: {list(default_filters.keys())}", + videos_path, + ) + filter_list = default_filters[filter_value] + else: + # It's an inline filter definition + filter_list = filter_value + + # Handle skip/take - can use begin/end as user-friendly alternatives + skip = video_data.get("skip", 0.0) + take = video_data.get("take") + + # Convert begin/end to skip/take if provided + if "begin" in video_data and video_data["begin"]: + skip = parse_timestamp(video_data["begin"]) + if "end" in video_data and video_data["end"]: + end_time = parse_timestamp(video_data["end"]) + # take = end - begin (duration from begin to end) + take = end_time - skip + videos[video_id] = VideoSource( source_file=video_data["source_file"], - filter=video_data.get("filter", []), + filter=filter_list, output_file=video_data.get("output_file"), - take=video_data.get("take"), - skip=video_data.get("skip", 0.0), + take=take, + skip=skip, zoom=video_data.get("zoom", 1.0), cutout=video_data.get("cutout"), always_visible=video_data.get("always_visible", False), @@ -343,11 +444,108 @@ def parse_videos( pause_narration=float(video_data.get("pause_narration", 0)), attribution=attribution, use_audio_channels=video_data.get("use_audio_channels", "both"), + defer_loudnorm=video_data.get("defer_loudnorm", False), + volume=float(video_data.get("volume", 1.0)), ) return videos, videos_dir +def parse_narration( + project_path: Path, config: Optional[ProjectConfig] = None +) -> tuple[dict[str, VideoSource], Path]: + """ + Parse narration.json into narration segment definitions. + + Narration segments are stored in media/narration/ and are processed + separately from videos. Each segment can have filters, begin/end trim + points, and other properties similar to videos. + + Filter can be specified as: + - A list of filter configs (inline definition) + - A string referencing a named preset in config.default_filters + + Trim points can be specified as: + - skip/take: raw values in seconds (traditional) + - begin/end: timestamp strings like "3.5s", "2:54", "1:23:45" (user-friendly) + These are converted to skip/take internally. + + Returns: + Tuple of (narration dict, narration_dir) where narration_dir is the directory + containing narration.json (for resolving relative file paths). + """ + # Narration is always in media/narration/ + # Keep local directory for file lookups (cache fallback handles resolution) + narration_dir = project_path / "media" / "narration" + local_narration_path = narration_dir / "narration.json" + + # Try cache fallback for reading JSON + narration_path, _ = resolve_with_cache(local_narration_path, project_path) + + # Narration is optional - return empty dict if not found + if not narration_path.exists(): + return {}, narration_dir + + try: + data = json.loads(narration_path.read_text(encoding="utf-8")) + except json.JSONDecodeError as e: + raise ParseError(f"Invalid JSON: {e}", narration_path) + + # Get default_filters from config for resolving references + default_filters = config.default_filters if config else {} + + narration = {} + for segment_id, segment_data in data.items(): + if "source_file" not in segment_data: + raise ParseError( + f"Narration segment '{segment_id}' missing required field 'source_file'", + narration_path, + ) + + # Resolve filter - can be a list or a string reference to default_filters + filter_value = segment_data.get("filter", []) + if isinstance(filter_value, str): + # It's a reference to a named filter preset + if filter_value not in default_filters: + raise ParseError( + f"Narration segment '{segment_id}' references unknown filter preset '{filter_value}'. " + f"Available presets: {list(default_filters.keys())}", + narration_path, + ) + filter_list = default_filters[filter_value] + else: + # It's an inline filter definition + filter_list = filter_value + + # Handle skip/take - can use begin/end as user-friendly alternatives + skip = segment_data.get("skip", 0.0) + take = segment_data.get("take") + + # Convert begin/end to skip/take if provided + if "begin" in segment_data and segment_data["begin"]: + skip = parse_timestamp(segment_data["begin"]) + if "end" in segment_data and segment_data["end"]: + end_time = parse_timestamp(segment_data["end"]) + # take = end - begin (duration from begin to end) + take = end_time - skip + + narration[segment_id] = VideoSource( + source_file=segment_data["source_file"], + filter=filter_list, + output_file=segment_data.get("output_file"), + take=take, + skip=skip, + zoom=segment_data.get("zoom", 1.0), + cutout=segment_data.get("cutout"), + always_visible=segment_data.get("always_visible", False), + use_audio_channels=segment_data.get("use_audio_channels", "both"), + defer_loudnorm=segment_data.get("defer_loudnorm", False), + volume=float(segment_data.get("volume", 1.0)), + ) + + return narration, narration_dir + + def get_video_duration(video_path: Path) -> float: """Get duration of a video file using ffprobe.""" import subprocess diff --git a/gnommo/preprocessor.py b/gnommo/preprocessor.py index 155e9b8..282e185 100644 --- a/gnommo/preprocessor.py +++ b/gnommo/preprocessor.py @@ -8,14 +8,25 @@ from pathlib import Path from typing import Any, Optional import shutil from .errors import PreprocessError -from .models import VideoSource, ChromaKeyConfig, ColorGradeConfig, GnommoKeyConfig, AudioNormalizeConfig +from .models import ( + VideoSource, + ChromaKeyConfig, + ColorGradeConfig, + GnommoKeyConfig, + AudioNormalizeConfig, + EQBand, +) from typing import Union, Optional -# Number of parallel workers for segment processing -DEFAULT_SEGMENT_WORKERS = 4 +# Number of parallel workers for chunk processing +DEFAULT_CHUNK_WORKERS = 4 -# Segment duration in seconds for chunked processing (avoids huge intermediate files) -SEGMENT_DURATION = 60 +# Chunk duration in seconds for parallel filter processing (avoids huge intermediate files) +CHUNK_DURATION = 60 + +# Proxy resolution for fast preview workflow +PROXY_WIDTH = 320 +PROXY_HEIGHT = 180 def get_video_duration(video_path: Path) -> float: @@ -39,6 +50,24 @@ def get_video_duration(video_path: Path) -> float: return 0.0 +def _video_has_alpha(video_path: Path) -> bool: + """Check if a video file has an alpha channel.""" + cmd = [ + "ffprobe", + "-v", "error", + "-select_streams", "v:0", + "-show_entries", "stream=pix_fmt", + "-of", "default=noprint_wrappers=1:nokey=1", + str(video_path), + ] + result = subprocess.run(cmd, capture_output=True, text=True) + if result.returncode != 0: + return False + pix_fmt = result.stdout.strip() + # Pixel formats with alpha contain 'a' (yuva, rgba, bgra, etc.) + return "yuva" in pix_fmt or "rgba" in pix_fmt or "bgra" in pix_fmt + + def format_time(seconds: float) -> str: """Format seconds as human-readable time string.""" if seconds < 60: @@ -53,6 +82,169 @@ def format_time(seconds: float) -> str: return f"{hours}h {mins}m" +def create_proxy_video( + source_path: Path, + proxy_dir: Path, + force: bool = False, +) -> Path: + """ + Create a low-resolution proxy of a video for fast preview workflow. + + Args: + source_path: Path to the source video file + proxy_dir: Directory to store proxy files + force: Overwrite existing proxy if True + + Returns: + Path to the proxy video file + """ + proxy_dir.mkdir(parents=True, exist_ok=True) + proxy_path = proxy_dir / source_path.name + + if proxy_path.exists() and not force: + return proxy_path + + # Downsample to proxy resolution, preserving audio quality + cmd = [ + "ffmpeg", + "-y", + "-i", + str(source_path), + "-vf", + f"scale={PROXY_WIDTH}:{PROXY_HEIGHT}", + "-c:v", + "libx264", + "-preset", + "ultrafast", + "-crf", + "28", + "-c:a", + "copy", # Keep original audio for transcription + str(proxy_path), + ] + + duration = get_video_duration(source_path) + result = subprocess.run(cmd, capture_output=True, text=True) + + if result.returncode != 0: + raise PreprocessError( + f"Failed to create proxy for {source_path.name}", + filter_type="proxy", + command=" ".join(cmd), + stderr=result.stderr, + ) + + return proxy_path + + +def create_proxies_for_videos( + videos_dir: Path, + videos: dict[str, VideoSource], + force: bool = False, + verbose: bool = False, +) -> Path: + """ + Create proxy versions of all source videos. + + Args: + videos_dir: Directory containing source videos + videos: Dict of video ID -> VideoSource + force: Overwrite existing proxies if True + verbose: Print progress + + Returns: + Path to the proxy directory + """ + proxy_dir = videos_dir / "proxy" + proxy_dir.mkdir(parents=True, exist_ok=True) + + # Collect unique source files that need proxies + source_files: set[str] = set() + for video_id, video_source in videos.items(): + source_files.add(video_source.source_file) + + print(f" Creating proxies ({PROXY_WIDTH}x{PROXY_HEIGHT})...") + + for source_file in sorted(source_files): + source_path = videos_dir / source_file + if not source_path.exists(): + if verbose: + print(f" Skipping {source_file} (not found)") + continue + + proxy_path = proxy_dir / source_file + if proxy_path.exists() and not force: + if verbose: + print(f" {source_file}: exists, skipping") + continue + + print(f" {source_file}...", end=" ", flush=True) + create_proxy_video(source_path, proxy_dir, force) + print("done") + + return proxy_dir + + +def ensure_proxy_files_exist( + source_dir: Path, + force: bool = False, + verbose: bool = False, +) -> Path: + """ + Ensure proxy files exist for all videos in source_dir, creating them on-the-fly if needed. + + This is used when running commands with --proxy to automatically create + missing proxy files without requiring a separate 'pre --proxy' step. + + Args: + source_dir: Directory containing source videos (e.g., media/videos or media/narration) + force: Overwrite existing proxy files if True + verbose: Print progress + + Returns: + Path to the proxy directory + """ + video_extensions = {".mov", ".mp4", ".webm", ".avi", ".mkv", ".m4v"} + + proxy_dir = source_dir / "proxy" + proxy_dir.mkdir(parents=True, exist_ok=True) + + # Find all video files in source_dir (exclude subdirectories like proxy/, intermediate/) + video_files = [ + f + for f in source_dir.iterdir() + if f.is_file() + and f.suffix.lower() in video_extensions + and "_processed" not in f.stem + and not f.name.startswith(".") + ] + + if not video_files: + if verbose: + print(f" No video files found in {source_dir}") + return proxy_dir + + # Check which proxies need to be created + missing_proxies = [] + for video_file in video_files: + proxy_path = proxy_dir / video_file.name + if not proxy_path.exists() or force: + missing_proxies.append(video_file) + + if not missing_proxies: + if verbose: + print(f" All proxies exist in {proxy_dir}") + return proxy_dir + + print(f" Creating {len(missing_proxies)} proxy file(s) on-the-fly...") + for video_file in missing_proxies: + print(f" {video_file.name}...", end=" ", flush=True) + create_proxy_video(video_file, proxy_dir, force=True) + print("done") + + return proxy_dir + + import selectors, time, sys, subprocess @@ -226,7 +418,7 @@ def preprocess_video( first_filter_type = batch[0].get("type") if first_filter_type in VIDEO_FILTER_TYPES: - # Combined video filter batch - use segmented processing for large files + # Combined video filter batch - use chunked processing for large files filter_names = "+".join(f.get("type") for f in batch) print(f" Video filters (combined): {filter_names}") @@ -234,13 +426,14 @@ def preprocess_video( step_output = gnommo_scratch / f"{video_id}_batch{batch_num}.mov" intermediate_files.append(step_output) - apply_combined_video_filters_segmented( + # Note: skip/take are NOT applied here - they're only used during concatenation + apply_combined_video_filters_chunked( current_input, step_output, batch, verbose, - take=video_source.take, - scratch_dir=gnommo_scratch / "segments", + take=None, + scratch_dir=gnommo_scratch / "chunks", ) current_input = step_output batch_num += 1 @@ -252,6 +445,7 @@ def preprocess_video( elif first_filter_type == "audio_normalize": # Audio normalization: denoise, compress, and normalize loudness + # Note: skip/take are NOT applied here - they're only used during concatenation print(" Filter: audio_normalize") step_output = gnommo_scratch / f"{video_id}_batch{batch_num}_audio.mov" intermediate_files.append(step_output) @@ -260,7 +454,9 @@ def preprocess_video( step_output, batch[0], verbose, - take=video_source.take, + take=None, + use_audio_channels=video_source.use_audio_channels, + skip_loudnorm=video_source.defer_loudnorm, ) current_input = step_output batch_num += 1 @@ -747,7 +943,7 @@ def parse_gnommokey_config(config: dict) -> GnommoKeyConfig: ) -def apply_combined_video_filters_segmented( +def apply_combined_video_filters_chunked( input_path: Path, output_path: Path, filters: list[dict], @@ -756,104 +952,98 @@ def apply_combined_video_filters_segmented( scratch_dir: Path = None, ) -> None: """ - Apply video filters using segment-based processing for large files. + Apply video filters using chunk-based processing for large files. - For videos longer than SEGMENT_DURATION: - 1. Split into segments - 2. Process each segment with filters - 3. Encode to VP9/WebM with alpha (compressed) - 4. Concatenate segments into final output + For videos longer than CHUNK_DURATION: + 1. Split into chunks + 2. Process each chunk with filters + 3. Encode to ProRes 4444 with alpha + 4. Concatenate chunks into final output - VP9/WebM is used instead of ProRes 4444 for much better compression - while maintaining alpha channel support. + Chunking allows parallel processing and avoids huge intermediate files. """ duration = take if take is not None else get_video_duration(input_path) - # Short video: process directly without segmentation - if duration <= SEGMENT_DURATION: - _process_segment_to_prores4444( + # Short video: process directly without chunking + if duration <= CHUNK_DURATION: + _process_chunk_to_prores4444( input_path, output_path, filters, 0, duration, verbose, take, True ) return - # Long video: process in segments (parallel) + # Long video: process in chunks (parallel) if scratch_dir is None: - scratch_dir = output_path.parent / "segments" + scratch_dir = output_path.parent / "chunks" scratch_dir.mkdir(parents=True, exist_ok=True) - num_segments = int(duration / SEGMENT_DURATION) + 1 - segment_files: list[Path] = [] - segment_tasks: list[tuple] = [] # (index, segment_path, start_time, segment_duration) + num_chunks = int(duration / CHUNK_DURATION) + 1 + chunk_files: list[Path] = [] + chunk_tasks: list[ + tuple + ] = [] # (index, chunk_path, start_time, chunk_duration) - # Build list of segment tasks - for i in range(num_segments): - start_time = i * SEGMENT_DURATION - segment_duration = min(SEGMENT_DURATION, duration - start_time) + # Build list of chunk tasks + for i in range(num_chunks): + start_time = i * CHUNK_DURATION + chunk_duration = min(CHUNK_DURATION, duration - start_time) - if segment_duration <= 0: + if chunk_duration <= 0: break - segment_path = scratch_dir / f"segment_{i:04d}.mov" - segment_files.append(segment_path) - segment_tasks.append((i, segment_path, start_time, segment_duration)) + chunk_path = scratch_dir / f"chunk_{i:04d}.mov" + chunk_files.append(chunk_path) + chunk_tasks.append((i, chunk_path, start_time, chunk_duration)) - num_workers = min(DEFAULT_SEGMENT_WORKERS, len(segment_tasks)) + num_workers = min(DEFAULT_CHUNK_WORKERS, len(chunk_tasks)) print( - f" Processing {len(segment_tasks)} segments in parallel ({num_workers} workers)" + f" Processing {len(chunk_tasks)} chunks in parallel ({num_workers} workers)" ) - # Process segments in parallel - def process_segment_task(task): - i, segment_path, start_time, seg_duration = task - _process_segment_to_prores4444( + # Process chunks in parallel + def process_chunk_task(task): + i, chunk_path, start_time, chunk_dur = task + _process_chunk_to_prores4444( input_path, - segment_path, + chunk_path, filters, start_time, - seg_duration, + chunk_dur, verbose=False, # Suppress verbose in parallel mode - take=seg_duration, + take=chunk_dur, ) - return i, segment_path + return i, chunk_path completed = 0 with ThreadPoolExecutor(max_workers=num_workers) as executor: - futures = {executor.submit(process_segment_task, task): task for task in segment_tasks} + futures = { + executor.submit(process_chunk_task, task): task for task in chunk_tasks + } for future in as_completed(futures): - i, segment_path = future.result() + i, chunk_path = future.result() completed += 1 - print(f" Completed segment {i+1}/{len(segment_tasks)} ({completed}/{len(segment_tasks)} done)") + print( + f" Completed chunk {i+1}/{len(chunk_tasks)} ({completed}/{len(chunk_tasks)} done)" + ) - # Concatenate all segments at once - print(f" Concatenating {len(segment_files)} segments...") - _concatenate_prores4444_segments( - segment_files, output_path, verbose, keep_audio=True - ) - - # Clean up segment files - for segment_file in segment_files: - if segment_file.exists(): - segment_file.unlink() - - # Remove segments directory if empty + # Remove chunks directory if empty try: scratch_dir.rmdir() except OSError: pass -def _process_segment_to_prores4444( +def _process_chunk_to_prores4444( input_path: Path, output_path: Path, filters: list[dict], start_time: float, - segment_duration: float, + chunk_duration: float, verbose: bool = False, take: float = None, keep_audio: bool = True, ) -> None: """ - Process a video segment with filters and encode to ProRes 4444 (MOV) with alpha. + Process a video chunk with filters and encode to ProRes 4444 (MOV) with alpha. This is intended as an intermediate format for compositing: - true alpha channel (non-binary edges) @@ -893,7 +1083,7 @@ def _process_segment_to_prores4444( cmd.extend(["-i", str(input_path)]) # Limit duration - actual_take = take if take is not None else segment_duration + actual_take = take if take is not None else chunk_duration if actual_take is not None: cmd.extend(["-t", str(actual_take)]) @@ -929,28 +1119,28 @@ def _process_segment_to_prores4444( print(f" Filter: {video_filter}") print(f" Command: {' '.join(cmd)}") - result = run_ffmpeg_with_progress(cmd, actual_take or segment_duration, "Encoding") + result = run_ffmpeg_with_progress(cmd, actual_take or chunk_duration, "Encoding") if result.returncode != 0: raise PreprocessError( - "Segment processing failed", - filter_type="segment", + "Chunk processing failed", + filter_type="chunk", command=" ".join(cmd), stderr=result.stderr, ) -def _process_segment_to_webm( +def _process_chunk_to_webm( input_path: Path, output_path: Path, filters: list[dict], start_time: float, - segment_duration: float, + chunk_duration: float, verbose: bool = False, take: float = None, ) -> None: """ - Process a video segment with filters and encode to VP9/WebM with alpha. + Process a video chunk with filters and encode to VP9/WebM with alpha. VP9 with alpha uses ~10-20% of ProRes 4444 file size while maintaining good quality for compositing. @@ -983,7 +1173,7 @@ def _process_segment_to_webm( cmd.extend(["-i", str(input_path)]) # Limit duration - actual_take = take if take is not None else segment_duration + actual_take = take if take is not None else chunk_duration if actual_take is not None: cmd.extend(["-t", str(actual_take)]) @@ -1017,36 +1207,36 @@ def _process_segment_to_webm( print(f" Filter: {video_filter}") print(f" Command: {' '.join(cmd)}") - result = run_ffmpeg_with_progress(cmd, actual_take or segment_duration, "Encoding") + result = run_ffmpeg_with_progress(cmd, actual_take or chunk_duration, "Encoding") if result.returncode != 0: raise PreprocessError( - "Segment processing failed", - filter_type="segment", + "Chunk processing failed", + filter_type="chunk", command=" ".join(cmd), stderr=result.stderr, ) -def _concatenate_prores4444_segments( - segment_files: list[Path], +def _concatenate_prores4444_chunks( + chunk_files: list[Path], output_path: Path, verbose: bool = False, keep_audio: bool = False, ) -> None: """ - Concatenate ProRes 4444 (MOV) segments into a single ProRes 4444 output. + Concatenate ProRes 4444 (MOV) chunks into a single ProRes 4444 output. Uses FFmpeg concat demuxer, then re-encodes once to ensure alpha and - stream consistency across segments. + stream consistency across chunks. """ concat_list = output_path.parent / "concat_list.txt" output_path.parent.mkdir(parents=True, exist_ok=True) with open(concat_list, "w", encoding="utf-8") as f: - for segment in segment_files: - f.write(f"file '{segment.resolve()}'\n") + for chunk in chunk_files: + f.write(f"file '{chunk.resolve()}'\n") cmd: list[str] = [ "ffmpeg", @@ -1071,7 +1261,7 @@ def _concatenate_prores4444_segments( ] if keep_audio: - # safest for intermediates; alternatively "-c:a copy" if identical across segments + # safest for intermediates; alternatively "-c:a copy" if identical across chunks cmd += ["-c:a", "pcm_s16le"] else: cmd += ["-an"] @@ -1086,20 +1276,20 @@ def _concatenate_prores4444_segments( if result.returncode != 0: raise PreprocessError( - "Segment concatenation failed", + "Chunk concatenation failed", filter_type="concat", command=" ".join(cmd), stderr=result.stderr, ) -def _concatenate_webm_segments( - segment_files: list[Path], +def _concatenate_webm_chunks( + chunk_files: list[Path], output_path: Path, verbose: bool = False, ) -> None: """ - Concatenate WebM segments into a single output file. + Concatenate WebM chunks into a single output file. Uses FFmpeg's concat demuxer for lossless concatenation. """ @@ -1107,9 +1297,9 @@ def _concatenate_webm_segments( concat_list = output_path.parent / "concat_list.txt" with open(concat_list, "w") as f: - for segment in segment_files: + for chunk in chunk_files: # FFmpeg concat format: file 'path' - f.write(f"file '{segment.resolve()}'\n") + f.write(f"file '{chunk.resolve()}'\n") cmd = [ "ffmpeg", @@ -1138,7 +1328,7 @@ def _concatenate_webm_segments( if result.returncode != 0: raise PreprocessError( - "Segment concatenation failed", + "Chunk concatenation failed", filter_type="concat", command=" ".join(cmd), stderr=result.stderr, @@ -1393,41 +1583,130 @@ def apply_audio_normalize( config: dict[str, Any], verbose: bool = False, take: float = None, + use_audio_channels: str = "both", + skip_loudnorm: bool = False, ) -> None: """ Apply audio normalization: denoise, compress, and loudness normalize. + If skip_loudnorm=True, the loudnorm filter is skipped. Use this for segments + that will be concatenated, then apply loudnorm once to the final output. + Config options: + # Room treatment + highpass: float - High-pass filter frequency in Hz (0 = disabled, try 80-120) + lowpass: float - Low-pass filter frequency in Hz (0 = disabled) + room_eq: bool - Enable room resonance EQ cut + room_eq_freq: float - Center frequency for room cut (default: 300) + room_eq_gain: float - Gain in dB, negative = cut (default: -4) + room_eq_width: float - Q/bandwidth (default: 1.5) + + # Gate (reverb tail reduction) + gate: bool - Enable noise gate + gate_threshold: float - Threshold in dB (default: -35) + gate_range: float - Attenuation in dB when closed (default: -20) + gate_attack: float - Attack time in ms (default: 10) + gate_release: float - Release time in ms (default: 150) + + # Neural de-reverb + dereverb_model: str - Path to RNNoise model file (empty = disabled) + + # Noise reduction denoise: bool - Enable noise reduction (default: True) noise_floor: float - Noise floor in dB (default: -25) + + # Compression compress: bool - Enable compression (default: True) threshold: float - Compression threshold in dB (default: -20) ratio: float - Compression ratio (default: 4) attack: float - Attack time in ms (default: 5) release: float - Release time in ms (default: 50) makeup: float - Makeup gain in dB (default: 2) + + # Loudness normalization normalize: bool - Enable loudness normalization (default: True) target_lufs: float - Target loudness in LUFS (default: -16) target_lra: float - Target loudness range (default: 11) target_tp: float - Target true peak in dB (default: -1.5) - Uses FFmpeg filters: - - afftdn: Adaptive frequency-domain noise reduction - - acompressor: Dynamic range compression - - loudnorm: EBU R128 loudness normalization + Args: + use_audio_channels: "both", "left", or "right" - which channel(s) to use, + output is always stereo with sound in both channels + + Filter chain order: + channel_map -> eq_bands -> highpass -> lowpass -> room_eq -> dereverb -> denoise -> gate -> compress -> normalize """ cfg = parse_audio_normalize_config(config) - # Build audio filter chain + # Build audio filter chain (order matters!) audio_filters: list[str] = [] - # 1. Noise reduction (afftdn) + # 0. Channel mapping - take specified channel(s) and output stereo + if use_audio_channels == "left": + # Take left channel, duplicate to both stereo channels + audio_filters.append("pan=stereo|c0=c0|c1=c0") + elif use_audio_channels == "right": + # Take right channel, duplicate to both stereo channels + audio_filters.append("pan=stereo|c0=c1|c1=c1") + + # 0.5. Parametric EQ bands (applied early for tonal shaping) + for band in cfg.eq_bands: + if band.type == "lowshelf": + # Low shelf filter: boosts/cuts frequencies below the center + audio_filters.append( + f"lowshelf=f={band.freq:.1f}:g={band.gain:.1f}:t=q:w={band.q:.2f}" + ) + elif band.type == "highshelf": + # High shelf filter: boosts/cuts frequencies above the center + audio_filters.append( + f"highshelf=f={band.freq:.1f}:g={band.gain:.1f}:t=q:w={band.q:.2f}" + ) + else: + # Peak/parametric EQ band + audio_filters.append( + f"equalizer=f={band.freq:.1f}:width_type=q:width={band.q:.2f}:g={band.gain:.1f}" + ) + + # 1. High-pass filter (remove room rumble and low-frequency buildup) + if cfg.highpass > 0: + audio_filters.append(f"highpass=f={cfg.highpass:.1f}") + + # 2. Low-pass filter (remove harsh highs if needed) + if cfg.lowpass > 0: + audio_filters.append(f"lowpass=f={cfg.lowpass:.1f}") + + # 3. Room resonance EQ cut (reduce muddy frequencies from room modes) + if cfg.room_eq: + # equalizer filter: f=frequency, width_type=q, width=Q, g=gain + audio_filters.append( + f"equalizer=f={cfg.room_eq_freq:.1f}" + f":width_type=q:width={cfg.room_eq_width:.2f}" + f":g={cfg.room_eq_gain:.1f}" + ) + + # 4. Neural de-reverb (arnndn - very effective if model available) + if cfg.dereverb_model: + model_path = Path(cfg.dereverb_model) + if model_path.exists(): + audio_filters.append(f"arnndn=m={model_path}:mix={cfg.dereverb_mix:.2f}") + else: + print(f" Warning: dereverb model not found: {model_path}") + + # 5. Noise reduction (afftdn) if cfg.denoise: - # afftdn with adaptive noise floor - # nr = noise reduction amount, nf = noise floor audio_filters.append(f"afftdn=nf={cfg.noise_floor:.1f}") - # 2. Compression (acompressor) + # 6. Noise gate (reduce reverb tails during pauses) + if cfg.gate: + # agate: threshold, range (attenuation), attack, release + audio_filters.append( + f"agate=threshold={cfg.gate_threshold:.1f}dB" + f":range={cfg.gate_range:.1f}dB" + f":attack={cfg.gate_attack:.1f}" + f":release={cfg.gate_release:.1f}" + ) + + # 7. Compression (acompressor) if cfg.compress: audio_filters.append( f"acompressor=threshold={cfg.threshold:.1f}dB" @@ -1437,8 +1716,9 @@ def apply_audio_normalize( f":makeup={cfg.makeup:.1f}dB" ) - # 3. Loudness normalization (loudnorm - EBU R128) - if cfg.normalize: + # 8. Loudness normalization (loudnorm - EBU R128) + # Skip if skip_loudnorm=True (for segments that will be concatenated) + if cfg.normalize and not skip_loudnorm: audio_filters.append( f"loudnorm=I={cfg.target_lufs:.1f}" f":LRA={cfg.target_lra:.1f}" @@ -1448,6 +1728,7 @@ def apply_audio_normalize( if not audio_filters: # No filters enabled, just copy import shutil + shutil.copy2(input_path, output_path) return @@ -1459,13 +1740,19 @@ def apply_audio_normalize( if take is not None: cmd.extend(["-t", str(take)]) - cmd.extend([ - "-i", str(input_path), - "-c:v", "copy", # Copy video stream unchanged - "-af", audio_filter, - "-c:a", "pcm_s16le", # Lossless audio output - str(output_path), - ]) + cmd.extend( + [ + "-i", + str(input_path), + "-c:v", + "copy", # Copy video stream unchanged + "-af", + audio_filter, + "-c:a", + "pcm_s16le", # Lossless audio output + str(output_path), + ] + ) if verbose: print(f" Audio filter: {audio_filter}") @@ -1487,15 +1774,46 @@ def apply_audio_normalize( def parse_audio_normalize_config(config: dict[str, Any]) -> AudioNormalizeConfig: """Parse an audio normalize config dictionary into AudioNormalizeConfig.""" + # Parse EQ bands + eq_bands = [] + for band in config.get("eq_bands", []): + eq_bands.append(EQBand( + freq=float(band.get("freq", 1000)), + gain=float(band.get("gain", 0)), + q=float(band.get("q", 1.0)), + type=str(band.get("type", "peak")), + )) + return AudioNormalizeConfig( + # Parametric EQ + eq_bands=eq_bands, + # Room treatment + highpass=float(config.get("highpass", 0.0)), + lowpass=float(config.get("lowpass", 0.0)), + room_eq=bool(config.get("room_eq", False)), + room_eq_freq=float(config.get("room_eq_freq", 300.0)), + room_eq_gain=float(config.get("room_eq_gain", -4.0)), + room_eq_width=float(config.get("room_eq_width", 1.5)), + # Gate + gate=bool(config.get("gate", False)), + gate_threshold=float(config.get("gate_threshold", -35.0)), + gate_range=float(config.get("gate_range", -20.0)), + gate_attack=float(config.get("gate_attack", 10.0)), + gate_release=float(config.get("gate_release", 150.0)), + # Neural de-reverb + dereverb_model=str(config.get("dereverb_model", "")), + dereverb_mix=float(config.get("dereverb_mix", 0.8)), + # Noise reduction denoise=bool(config.get("denoise", True)), noise_floor=float(config.get("noise_floor", -25.0)), + # Compression compress=bool(config.get("compress", True)), threshold=float(config.get("threshold", -20.0)), ratio=float(config.get("ratio", 4.0)), attack=float(config.get("attack", 5.0)), release=float(config.get("release", 50.0)), makeup=float(config.get("makeup", 2.0)), + # Loudness normalization normalize=bool(config.get("normalize", True)), target_lufs=float(config.get("target_lufs", -16.0)), target_lra=float(config.get("target_lra", 11.0)), @@ -1543,18 +1861,10 @@ def get_preprocessed_path(videos_dir: Path, video_source: VideoSource) -> Path: """ Get the path to the preprocessed video file. - Returns output_file if specified, otherwise returns the original file. - Also checks for WebM variant since preprocessing now outputs WebM. + Returns output_file if specified, otherwise returns source_file. """ if video_source.output_file: - output_path = videos_dir / video_source.output_file - if output_path.exists(): - return output_path - # Check for WebM variant - webm_path = output_path.with_suffix(".mov") - if webm_path.exists(): - return webm_path - return output_path # Return expected path even if doesn't exist + return videos_dir / video_source.output_file return videos_dir / video_source.source_file @@ -1574,3 +1884,213 @@ def needs_preprocessing(videos_dir: Path, video_source: VideoSource) -> bool: return True return True + + +def stitch_narration_segments( + videos_dir: Path, + segment_ids: list[str], + videos: dict[str, VideoSource], + output_path: Path, + verbose: bool = False, +) -> Path: + """ + Stitch multiple narration video segments into a single file. + + Each segment's skip and take values are applied to trim dead video at the + start/end of each recording. The segments are concatenated in the order + specified by segment_ids. + + Args: + videos_dir: Directory containing video files + segment_ids: Ordered list of video IDs from videos.json + videos: Dict of video ID -> VideoSource from videos.json + output_path: Path for the concatenated output file + verbose: Enable verbose output + + Returns: + Path to the stitched video file. + """ + if len(segment_ids) == 1: + # Single segment - just return its processed path + video_source = videos[segment_ids[0]] + return get_preprocessed_path(videos_dir, video_source) + + print(f" Concatenating {len(segment_ids)} narration segments...") + + # Create temp directory for trimmed segments + temp_dir = output_path.parent / "concat_temp" + temp_dir.mkdir(parents=True, exist_ok=True) + + trimmed_segments: list[Path] = [] + + for i, video_id in enumerate(segment_ids): + if video_id not in videos: + raise PreprocessError( + f"Narration segment '{video_id}' not found in videos.json", + filter_type=None, + ) + + video_source = videos[video_id] + source_path = get_preprocessed_path(videos_dir, video_source) + + if not source_path.exists(): + raise PreprocessError( + f"Narration segment not found: {source_path}", + filter_type=None, + ) + + # Get segment duration + full_duration = get_video_duration(source_path) + skip = video_source.skip or 0.0 + take = video_source.take + + # Calculate effective duration + if take is not None: + effective_duration = min(take, full_duration - skip) + else: + effective_duration = full_duration - skip + + if verbose: + print(f" Segment {i+1}: {video_id}") + print(f" Source: {source_path.name}") + print( + f" Skip: {skip}s, Take: {take or 'all'}s, Duration: {effective_duration:.1f}s" + ) + + # If no trimming needed, use source directly + if skip == 0 and take is None: + trimmed_segments.append(source_path) + continue + + # Trim the segment + trimmed_path = temp_dir / f"segment_{i:03d}.mov" + + # Check if source has alpha channel (for ProRes 4444, etc.) + has_alpha = _video_has_alpha(source_path) + + # Re-encode to normalize framerate and fix timestamps + # Different segments may have different framerates which breaks concatenation + cmd = ["ffmpeg", "-y"] + if skip > 0: + cmd.extend(["-ss", str(skip)]) + cmd.extend(["-i", str(source_path)]) + if take is not None: + cmd.extend(["-t", str(take)]) + + if has_alpha: + # Preserve alpha with ProRes 4444 + cmd.extend( + [ + "-vf", "fps=30,format=yuva444p10le", + "-c:v", "prores_ks", + "-profile:v", "4", + "-pix_fmt", "yuva444p10le", + "-c:a", "pcm_s16le", + "-avoid_negative_ts", "make_zero", + str(trimmed_path), + ] + ) + else: + # No alpha - use fast h264 encoding + cmd.extend( + [ + "-vf", "fps=30", + "-c:v", "libx264", + "-preset", "fast", + "-crf", "18", + "-c:a", "aac", + "-b:a", "192k", + "-avoid_negative_ts", "make_zero", + "-movflags", "+faststart", + str(trimmed_path), + ] + ) + + result = subprocess.run(cmd, capture_output=True, text=True) + if result.returncode != 0: + raise PreprocessError( + f"Failed to trim segment {video_id}", + filter_type="concat", + command=" ".join(cmd), + stderr=result.stderr, + ) + + trimmed_segments.append(trimmed_path) + + # Build concat file list + concat_list = temp_dir / "concat_list.txt" + with open(concat_list, "w", encoding="utf-8") as f: + for segment in trimmed_segments: + f.write(f"file '{segment.resolve()}'\n") + + # Concatenate all segments + print(f" Stitching {len(trimmed_segments)} segments -> {output_path.name}") + + cmd = [ + "ffmpeg", + "-y", + "-f", "concat", + "-safe", "0", + "-i", str(concat_list), + "-c:v", "copy", + "-c:a", "copy", + "-movflags", "+faststart", + str(output_path), + ] + + result = subprocess.run(cmd, capture_output=True, text=True) + if result.returncode != 0: + raise PreprocessError( + "Segment concatenation failed", + filter_type="concat", + command=" ".join(cmd), + stderr=result.stderr, + ) + + # Apply loudnorm if any segment had defer_loudnorm=True + needs_loudnorm = any( + videos[seg_id].defer_loudnorm for seg_id in segment_ids if seg_id in videos + ) + if needs_loudnorm: + print(" Applying loudness normalization to stitched output...") + normalized_path = output_path.parent / f"{output_path.stem}_normalized{output_path.suffix}" + + # Use EBU R128 loudnorm targeting YouTube's recommended levels + loudnorm_cmd = [ + "ffmpeg", "-y", + "-i", str(output_path), + "-c:v", "copy", + "-af", "loudnorm=I=-14:LRA=11:TP=-1.5", + "-c:a", "aac", "-b:a", "192k", + "-movflags", "+faststart", + str(normalized_path), + ] + + result = subprocess.run(loudnorm_cmd, capture_output=True, text=True) + if result.returncode != 0: + raise PreprocessError( + "Loudness normalization failed", + filter_type="loudnorm", + command=" ".join(loudnorm_cmd), + stderr=result.stderr, + ) + + # Replace original with normalized version + output_path.unlink() + normalized_path.rename(output_path) + print(" Loudness normalization complete.") + + # Clean up temp files + for segment in trimmed_segments: + if segment.parent == temp_dir and segment.exists(): + segment.unlink() + concat_list.unlink() + try: + temp_dir.rmdir() + except OSError: + pass + + total_duration = get_video_duration(output_path) + print(f" Stitched duration: {format_time(total_duration)}") + + return output_path diff --git a/gnommo/pull.py b/gnommo/pull.py new file mode 100644 index 0000000..81f4c1c --- /dev/null +++ b/gnommo/pull.py @@ -0,0 +1,202 @@ +"""Pull project metadata from gnommoweb server. + +Usage: + gnommo pull -p video1 # pull parent video project + gnommo pull -p short_pixelated_universe # pull a short project + gnommo pull -p myproject --force # force pull, overwrite local + +For a parent project: updates name, description, and the shorts index +(list of slugs) in project.json. + +For a short project: updates title, hook, platform_targets, resolution, +fps, duration_seconds. Preserves local script path reference. + +Conflict detection: + - If local project.json mtime > last_pushed_at → local has unpushed changes + → warn and abort unless --force + +Configuration (from .env or environment): + GNOMMOWEB_URL Base URL (e.g. http://localhost:3001) + GNOMMOWEB_API_KEY Bearer token (CONTENT_API_KEY) +""" + +import json +import os +import sys +from datetime import datetime, timezone +from pathlib import Path + +try: + import requests +except ImportError: + print("Error: 'requests' package is required. Run: pip install requests", file=sys.stderr) + sys.exit(1) + +SYNC_FILE = ".gnommo_sync.json" + + +def _load_env_file(): + env_path = Path(__file__).parent.parent / ".env" + if not env_path.exists(): + return + with open(env_path) as f: + for line in f: + line = line.strip() + if not line or line.startswith("#") or "=" not in line: + continue + key, _, value = line.partition("=") + key = key.strip() + value = value.strip().strip('"').strip("'") + if key not in os.environ: + os.environ[key] = value + + +def _read_sync(project_path: Path) -> dict: + sync_file = project_path / SYNC_FILE + if sync_file.exists(): + with open(sync_file) as f: + return json.load(f) + return {} + + +def _write_sync(project_path: Path, data: dict): + with open(project_path / SYNC_FILE, "w") as f: + json.dump(data, f, indent=2) + + +def _parse_ts(ts_str) -> datetime | None: + if not ts_str: + return None + try: + return datetime.fromisoformat(ts_str.replace("Z", "+00:00")) + except ValueError: + return None + + +def cmd_pull(project_path: Path, verbose: bool = False, force: bool = False) -> int: + _load_env_file() + + api_url = os.environ.get("GNOMMOWEB_URL", "").rstrip("/") + api_key = os.environ.get("GNOMMOWEB_API_KEY", "") + + if not api_url: + print("Error: GNOMMOWEB_URL is not set.", file=sys.stderr) + return 1 + if not api_key: + print("Error: GNOMMOWEB_API_KEY is not set.", file=sys.stderr) + return 1 + + project_file = project_path / "project.json" + if not project_file.exists(): + print(f"Error: {project_file} not found", file=sys.stderr) + return 1 + + with open(project_file) as f: + local_project = json.load(f) + + project_id = local_project.get("id") + if not project_id: + print("Error: project.json missing 'id'.", file=sys.stderr) + return 1 + + # ── Conflict check ──────────────────────────────────────────────────────── + if not force: + sync = _read_sync(project_path) + last_pushed_at = _parse_ts(sync.get("last_pushed_at")) + local_mtime = datetime.fromtimestamp( + project_file.stat().st_mtime, tz=timezone.utc + ) + if last_pushed_at and local_mtime > last_pushed_at: + print( + f"⚠ project.json has local changes since last push " + f"({local_mtime.strftime('%Y-%m-%d %H:%M')} > " + f"{last_pushed_at.strftime('%Y-%m-%d %H:%M')}).", + file=sys.stderr, + ) + print( + " Push first with `gnommo push -p` or use `gnommo pull -p --force`.", + file=sys.stderr, + ) + return 1 + + # ── Fetch from server ───────────────────────────────────────────────────── + if verbose: + print(f"Pulling {project_id} from {api_url}…") + + try: + r = requests.get( + f"{api_url}/api/projects/{project_id}", + headers={"Authorization": f"Bearer {api_key}"}, + timeout=30, + ) + r.raise_for_status() + except requests.exceptions.ConnectionError: + print(f"Error: Could not connect to {api_url}", file=sys.stderr) + return 1 + except requests.exceptions.HTTPError as e: + if e.response.status_code == 404: + print(f"Error: Project '{project_id}' not found on server. Push it first.", file=sys.stderr) + else: + print(f"Error: Server returned {e.response.status_code}", file=sys.stderr) + return 1 + + server = r.json() + server_updated_at = server.get("updated_at") + project_type = server.get("type") + + # ── Merge into project.json ─────────────────────────────────────────────── + if project_type == "parent": + _merge_parent(local_project, server, verbose) + count = len(server.get("shorts", [])) + print(f"✓ Pulled {project_id} (parent video) — {count} short(s) in index") + elif project_type == "short": + _merge_short(local_project, server, verbose) + print(f"✓ Pulled {project_id} (short) — [{server.get('status')}]") + else: + print(f"Error: unexpected project type: {project_type}", file=sys.stderr) + return 1 + + # ── Write back ──────────────────────────────────────────────────────────── + with open(project_file, "w") as f: + json.dump(local_project, f, indent=2, ensure_ascii=False) + f.write("\n") + + now_iso = datetime.now(tz=timezone.utc).isoformat(timespec="seconds") + existing_sync = _read_sync(project_path) + _write_sync(project_path, { + **existing_sync, + "last_pulled_at": now_iso, + "server_updated_at": server_updated_at, + "last_pushed_at": existing_sync.get("last_pushed_at"), + }) + + return 0 + + +def _merge_parent(local: dict, server: dict, verbose: bool): + """Update parent project.json: name, description, shorts index (slugs).""" + local["name"] = server.get("title", local.get("name")) + local["description"] = server.get("description") or local.get("description") + # shorts is a list of slugs — update from server's shorts list + server_shorts = server.get("shorts", []) + local["shorts"] = [s["project_id"] for s in server_shorts] + if verbose: + print(f" shorts index: {local['shorts']}") + + +def _merge_short(local: dict, server: dict, verbose: bool): + """Update short project.json: name, hook, platform_targets, resolution, fps, duration.""" + local["name"] = server.get("title", local.get("name")) + if server.get("hook"): + local["hook"] = server["hook"] + if server.get("platform_targets"): + local["platform_targets"] = server["platform_targets"] + if server.get("resolution"): + local["resolution"] = server["resolution"] + if server.get("fps"): + local["fps"] = server["fps"] + if server.get("duration_seconds"): + local["duration_seconds"] = server["duration_seconds"] + if server.get("parent_project_id"): + local["parent_project"] = server["parent_project_id"] + # Never overwrite local script path — that stays local diff --git a/gnommo/push.py b/gnommo/push.py new file mode 100644 index 0000000..4556ae7 --- /dev/null +++ b/gnommo/push.py @@ -0,0 +1,247 @@ +"""Push project metadata to gnommoweb server. + +Usage: + gnommo push -p video1 # push parent video project + gnommo push -p short_pixelated_universe # push a short project + gnommo push -p myproject --force # force push, overwrite server + +Reads project.json and POSTs to POST /api/projects/push. + +If project.json contains a "parent_project" field, the project is pushed +as a short and registered under that parent. Otherwise it is pushed as a +parent video project. + +Parent project.json "shorts" field is a list of slugs (just an index): + "shorts": ["short_pixelated_universe", "short_planck_length"] + +Short project.json has its own full config plus a parent_project field: + { + "id": "short_pixelated_universe", + "parent_project": "Video1", + "resolution": [1080, 1920], + "fps": 30, + "duration_seconds": 60, + ... + } + +Conflict detection: + - If server.updated_at > our recorded server_updated_at → server has newer changes + → warn and abort unless --force + +Configuration (from .env or environment): + GNOMMOWEB_URL Base URL (e.g. http://localhost:3001) + GNOMMOWEB_API_KEY Bearer token (CONTENT_API_KEY from gnommoweb) +""" + +import json +import os +import sys +from datetime import datetime, timezone +from pathlib import Path + +try: + import requests +except ImportError: + print("Error: 'requests' package is required. Run: pip install requests", file=sys.stderr) + sys.exit(1) + +SYNC_FILE = ".gnommo_sync.json" + + +def _load_env_file(): + env_path = Path(__file__).parent.parent / ".env" + if not env_path.exists(): + return + with open(env_path) as f: + for line in f: + line = line.strip() + if not line or line.startswith("#") or "=" not in line: + continue + key, _, value = line.partition("=") + key = key.strip() + value = value.strip().strip('"').strip("'") + if key not in os.environ: + os.environ[key] = value + + +def _read_sync(project_path: Path) -> dict: + sync_file = project_path / SYNC_FILE + if sync_file.exists(): + with open(sync_file) as f: + return json.load(f) + return {} + + +def _write_sync(project_path: Path, data: dict): + with open(project_path / SYNC_FILE, "w") as f: + json.dump(data, f, indent=2) + + +def _parse_ts(ts_str) -> datetime | None: + if not ts_str: + return None + try: + return datetime.fromisoformat(ts_str.replace("Z", "+00:00")) + except ValueError: + return None + + +def cmd_push(project_path: Path, verbose: bool = False, force: bool = False) -> int: + _load_env_file() + + api_url = os.environ.get("GNOMMOWEB_URL", "").rstrip("/") + api_key = os.environ.get("GNOMMOWEB_API_KEY", "") + + if not api_url: + print("Error: GNOMMOWEB_URL is not set.", file=sys.stderr) + return 1 + if not api_key: + print("Error: GNOMMOWEB_API_KEY is not set.", file=sys.stderr) + return 1 + + project_file = project_path / "project.json" + if not project_file.exists(): + print(f"Error: {project_file} not found", file=sys.stderr) + return 1 + + with open(project_file) as f: + project = json.load(f) + + project_id = project.get("id") + name = project.get("name") + if not project_id or not name: + print("Error: project.json must have 'id' and 'name' fields.", file=sys.stderr) + return 1 + + parent_project = project.get("parent_project") + + # ── Conflict check ──────────────────────────────────────────────────────── + if not force: + sync = _read_sync(project_path) + recorded_server_ts = _parse_ts(sync.get("server_updated_at")) + if recorded_server_ts: + try: + r_check = requests.get( + f"{api_url}/api/projects/{project_id}", + headers={"Authorization": f"Bearer {api_key}"}, + timeout=10, + ) + if r_check.status_code == 200: + current_server_ts = _parse_ts(r_check.json().get("updated_at")) + if current_server_ts and current_server_ts > recorded_server_ts: + print( + f"⚠ Server has changes since your last sync " + f"({current_server_ts.strftime('%Y-%m-%d %H:%M')} > " + f"{recorded_server_ts.strftime('%Y-%m-%d %H:%M')}).", + file=sys.stderr, + ) + print( + " Pull first with `gnommo pull -p` or use `gnommo push -p --force`.", + file=sys.stderr, + ) + return 1 + except requests.exceptions.ConnectionError: + pass + + # ── Build payload ───────────────────────────────────────────────────────── + if parent_project: + payload = _build_short_payload(project, project_path, verbose) + else: + payload = _build_parent_payload(project, project_path, verbose) + + if verbose: + kind = "short" if parent_project else "parent video" + print(f"Pushing {project_id} ({kind}) to {api_url}") + + # ── POST ────────────────────────────────────────────────────────────────── + try: + r = requests.post( + f"{api_url}/api/projects/push", + json=payload, + headers={"Authorization": f"Bearer {api_key}"}, + timeout=30, + ) + r.raise_for_status() + except requests.exceptions.ConnectionError: + print(f"Error: Could not connect to {api_url}", file=sys.stderr) + return 1 + except requests.exceptions.HTTPError as e: + print(f"Error: Server returned {e.response.status_code}", file=sys.stderr) + try: + print(f" {e.response.json()}", file=sys.stderr) + except Exception: + pass + return 1 + + result = r.json() + server_updated_at = result.get("server_updated_at") + + # ── Write sync state ────────────────────────────────────────────────────── + now_iso = datetime.now(tz=timezone.utc).isoformat(timespec="seconds") + existing_sync = _read_sync(project_path) + _write_sync(project_path, { + **existing_sync, + "last_pushed_at": now_iso, + "server_updated_at": server_updated_at, + }) + + # ── Print summary ───────────────────────────────────────────────────────── + asset = result.get("asset", {}) + if result.get("type") == "short": + print(f"✓ {project_id} → gn_asset #{asset.get('id')} [{asset.get('status')}]") + if result.get("task_created"): + print(f" task #{result['task_id']} created") + else: + print(f"✓ {project_id} → gn_asset #{asset.get('id')} ({asset.get('name')})") + + return 0 + + +def _build_parent_payload(project: dict, project_path: Path, verbose: bool) -> dict: + # Read the manuscript file if one is specified + script_content = None + manuscript_str = project.get("manuscript") + if manuscript_str: + manuscript_path = project_path / manuscript_str + if manuscript_path.exists(): + script_content = manuscript_path.read_text() + if verbose: + print(f" Read manuscript: {manuscript_path} ({len(script_content)} chars)") + else: + print(f" Warning: manuscript file not found: {manuscript_path}", file=sys.stderr) + + return { + "project_id": project["id"], + "name": project["name"], + "description": project.get("description"), + "coursecode": project.get("coursecode"), + "script_content": script_content, + "shorts": project.get("shorts", []), # list of slugs, not objects + } + + +def _build_short_payload(project: dict, project_path: Path, verbose: bool) -> dict: + # Read the script file if one is specified + script_content = None + script_path_str = project.get("script") + if script_path_str: + script_path = project_path / script_path_str + if script_path.exists(): + script_content = script_path.read_text() + if verbose: + print(f" Read script: {script_path} ({len(script_content)} chars)") + else: + print(f" Warning: script file not found: {script_path}", file=sys.stderr) + + return { + "project_id": project["id"], + "name": project["name"], + "description": project.get("description"), + "parent_project": project["parent_project"], + "hook": project.get("hook"), + "script_content": script_content, + "platform_targets": project.get("platform_targets", ["youtube"]), + "resolution": project.get("resolution"), + "fps": project.get("fps"), + "duration_seconds": project.get("duration_seconds"), + } diff --git a/gnommo/renderer.py b/gnommo/renderer.py index 67c1b76..18fe05a 100644 --- a/gnommo/renderer.py +++ b/gnommo/renderer.py @@ -19,6 +19,110 @@ from .models import ( from .preprocessor import run_ffmpeg_with_progress +def _get_audio_duration(audio_path: Path) -> float: + """Get duration of an audio file using ffprobe.""" + cmd = [ + "ffprobe", + "-v", "error", + "-show_entries", "format=duration", + "-of", "default=noprint_wrappers=1:nokey=1", + str(audio_path), + ] + result = subprocess.run(cmd, capture_output=True, text=True) + if result.returncode != 0: + raise RenderError(f"Failed to get duration for {audio_path}: {result.stderr}") + return float(result.stdout.strip()) + + +def _build_crossfade_loop_filter( + input_label: str, + output_label: str, + audio_duration: float, + overlap: float, + needed_duration: float, + volume: float, + delay_ms: int, +) -> list[str]: + """ + Build FFmpeg filter chain for crossfade looping. + + Creates a seamless loop by overlapping copies of the audio with fade in/out. + Each loop iteration crossfades with the next for `overlap` seconds. + + Args: + input_label: Input stream label (e.g., "[0:a]") + output_label: Output stream label (e.g., "[aud0]") + audio_duration: Duration of the source audio in seconds + overlap: Crossfade overlap duration in seconds + needed_duration: Total duration needed + volume: Volume multiplier + delay_ms: Initial delay in milliseconds + + Returns: + List of filter strings to append to the filter_complex + """ + filters = [] + loop_len = audio_duration - overlap + + # Calculate number of loop iterations needed (add 1 extra for safety) + n_loops = math.ceil(needed_duration / loop_len) + 1 + + # Limit to reasonable number of loops to avoid filter complexity explosion + n_loops = min(n_loops, 100) + + if n_loops <= 1: + # Single play, no looping needed + filters.append( + f"{input_label}atrim=0:{needed_duration:.3f}," + f"asetpts=PTS-STARTPTS," + f"adelay={delay_ms}|{delay_ms}," + f"volume={volume:.2f}{output_label}" + ) + return filters + + # Split input into n_loops copies + split_labels = [f"[xfloop_{output_label[1:-1]}_{i}]" for i in range(n_loops)] + filters.append(f"{input_label}asplit={n_loops}{''.join(split_labels)}") + + # Process each copy with appropriate delay and fades + mix_labels = [] + for i in range(n_loops): + copy_label = split_labels[i] + out_label = f"[xfl_{output_label[1:-1]}_{i}]" + mix_labels.append(out_label) + + loop_delay = i * loop_len + total_delay_ms = delay_ms + int(loop_delay * 1000) + + # Build filter chain for this copy + chain_parts = [] + + # Fade in at start (except first copy) + if i > 0: + chain_parts.append(f"afade=t=in:d={overlap:.3f}") + + # Fade out at end (for overlap with next copy) + # Calculate fade start time + fade_out_start = audio_duration - overlap + if fade_out_start > 0: + chain_parts.append(f"afade=t=out:st={fade_out_start:.3f}:d={overlap:.3f}") + + chain_parts.append(f"adelay={total_delay_ms}|{total_delay_ms}") + chain_parts.append(f"volume={volume:.2f}") + + filter_chain = ",".join(chain_parts) + filters.append(f"{copy_label}{filter_chain}{out_label}") + + # Mix all copies together, then trim to needed duration + filters.append( + f"{''.join(mix_labels)}amix=inputs={n_loops}:duration=longest:normalize=0," + f"atrim=0:{needed_duration + delay_ms/1000:.3f}," + f"asetpts=PTS-STARTPTS{output_label}" + ) + + return filters + + def render(plan: RenderPlan, output_path: Path, verbose: bool = False) -> None: """ Render the final video using FFmpeg. @@ -56,6 +160,7 @@ def _resolve_video_path( videos_dir: Path, video_source: VideoSource, shared_assets_dir: Path = None, + project_path: Path = None, ) -> Path: """Resolve the actual video file path (output_file if exists, else source_file). @@ -63,7 +168,10 @@ def _resolve_video_path( compressed alpha channel support. If video_source.is_shared is True, looks in shared_assets_dir instead of videos_dir. + Uses gnommocache fallback if configured and project_path is provided. """ + from .cache import resolve_with_cache + # Determine base directory based on is_shared flag if video_source.is_shared and shared_assets_dir: base_dir = shared_assets_dir @@ -72,26 +180,47 @@ def _resolve_video_path( if video_source.output_file: video_path = base_dir / video_source.output_file - if video_path.exists(): + # Check with cache fallback + if project_path: + resolved, _ = resolve_with_cache(video_path, project_path) + if resolved.exists(): + return resolved + elif video_path.exists(): return video_path # Check for WebM variant (preprocessing outputs compressed WebM instead of ProRes) webm_path = video_path.with_suffix(".mov") - if webm_path.exists(): + if project_path: + resolved, _ = resolve_with_cache(webm_path, project_path) + if resolved.exists(): + return resolved + elif webm_path.exists(): return webm_path - return base_dir / video_source.source_file + + # Fall back to source_file with cache fallback + source_path = base_dir / video_source.source_file + if project_path: + resolved, _ = resolve_with_cache(source_path, project_path) + return resolved + return source_path def _has_audio_stream(video_path: Path) -> bool: """Check if a video file contains an audio stream using ffprobe.""" result = subprocess.run( [ - "ffprobe", "-v", "error", - "-select_streams", "a", - "-show_entries", "stream=index", - "-of", "csv=p=0", + "ffprobe", + "-v", + "error", + "-select_streams", + "a", + "-show_entries", + "stream=index", + "-of", + "csv=p=0", str(video_path), ], - capture_output=True, text=True, + capture_output=True, + text=True, ) return bool(result.stdout.strip()) @@ -131,7 +260,7 @@ def build_ffmpeg_command(plan: RenderPlan, output_path: Path) -> list[str]: # Add -ss seek BEFORE -i for skip parameter and/or partial rendering always_visible_inputs: list[int] = [] for video_id, video_source, cutout in plan.narration_videos: - video_path = _resolve_video_path(videos_dir, video_source, shared_assets_dir) + video_path = _resolve_video_path(videos_dir, video_source, shared_assets_dir, project_path) # Combine video skip setting with partial render offset total_seek = video_source.skip + plan.input_seek_time if total_seek > 0: @@ -141,12 +270,14 @@ def build_ffmpeg_command(plan: RenderPlan, output_path: Path) -> list[str]: input_idx += 1 # Input: background image/video (if specified) + from .cache import resolve_with_cache bg_file = plan.config.background or plan.config.background_video has_background = bool(bg_file) bg_idx = None bg_is_image = False if has_background: bg_path = project_path / bg_file + bg_path, _ = resolve_with_cache(bg_path, project_path) if not bg_path.exists(): bg_path = project_path.parent / bg_file image_extensions = {".png", ".jpg", ".jpeg", ".gif", ".bmp", ".tiff", ".webp"} @@ -169,6 +300,7 @@ def build_ffmpeg_command(plan: RenderPlan, output_path: Path) -> list[str]: for event in plan.slide_events: if event.slide_id not in slide_inputs: image_path = slides_dir / event.slide_def.image + image_path, _ = resolve_with_cache(image_path, project_path) cmd.extend(["-i", str(image_path)]) slide_inputs[event.slide_id] = input_idx input_idx += 1 @@ -181,7 +313,7 @@ def build_ffmpeg_command(plan: RenderPlan, output_path: Path) -> list[str]: for i, event in enumerate(plan.video_events): video_path = _resolve_video_path( - videos_dir, event.video_source, shared_assets_dir + videos_dir, event.video_source, shared_assets_dir, project_path ) # Seek to skip point before loading input skip = event.video_source.skip @@ -199,7 +331,7 @@ def build_ffmpeg_command(plan: RenderPlan, output_path: Path) -> list[str]: for i, event in enumerate(plan.outro_events): video_path = _resolve_video_path( - videos_dir, event.video_source, shared_assets_dir + videos_dir, event.video_source, shared_assets_dir, project_path ) # Seek to skip point before loading input skip = event.video_source.skip @@ -217,13 +349,18 @@ def build_ffmpeg_command(plan: RenderPlan, output_path: Path) -> list[str]: # Input: audio files audio_dir = plan.audio_dir.resolve() if plan.audio_dir else project_path audio_inputs: dict[str, int] = {} # audio_id -> input_idx + audio_durations: dict[str, float] = {} # audio_id -> duration (for crossfade loops) for event in plan.audio_events: if event.audio_id not in audio_inputs: audio_path = audio_dir / event.audio_def.file + audio_path, _ = resolve_with_cache(audio_path, project_path) cmd.extend(["-i", str(audio_path)]) audio_inputs[event.audio_id] = input_idx input_idx += 1 + # Cache duration if this audio uses crossfade looping + if event.audio_def.loop and event.audio_def.overlap: + audio_durations[event.audio_id] = _get_audio_duration(audio_path) # Build filter_complex filter_complex = build_filter_complex( @@ -236,6 +373,7 @@ def build_ffmpeg_command(plan: RenderPlan, output_path: Path) -> list[str]: video_inputs, num_inputs_before_audio, audio_inputs, + audio_durations, video_events_with_audio, outro_inputs, outro_events_with_audio, @@ -541,6 +679,7 @@ def build_filter_complex( video_inputs: dict[int, int], # event_index -> input_idx num_inputs_before_audio: int, audio_inputs: dict[str, int], + audio_durations: dict[str, float], # audio_id -> duration (for crossfade loops) video_events_with_audio: set[int] = None, outro_inputs: dict[int, int] = None, # outro event_index -> input_idx outro_events_with_audio: set[int] = None, @@ -790,48 +929,65 @@ def build_filter_complex( main_audio_idx = always_visible_inputs[0] audio_labels_to_mix = [] - # Get audio channel setting from first narration video + # Get audio channel setting and volume from first narration video channel_filter = "" + narration_volume = 1.0 if plan.narration_videos: _, first_video_source, _ = plan.narration_videos[0] channel_filter = _build_audio_channel_filter( first_video_source.use_audio_channels ) + narration_volume = first_video_source.volume + + # Build volume filter if not 1.0 + volume_filter = f"volume={narration_volume:.2f}" if narration_volume != 1.0 else "" # Use narration_end_time to stop audio before outro (if outro exists) - audio_end_time = plan.narration_end_time if plan.outro_events else plan.total_duration + audio_end_time = ( + plan.narration_end_time if plan.outro_events else plan.total_duration + ) if not plan.narration_pauses: - # Simple case: trim main audio to end before outro (with optional channel filter) + # Simple case: trim main audio to end before outro (with optional channel and volume filters) + filter_parts = [] + if channel_filter: + filter_parts.append(channel_filter) + if volume_filter: + filter_parts.append(volume_filter) + if plan.outro_events: # Trim narration audio to stop before outro - if channel_filter: - filters.append(f"[{main_audio_idx}:a]{channel_filter}atrim=0:{audio_end_time:.3f},asetpts=PTS-STARTPTS[main_aud]") - else: - filters.append(f"[{main_audio_idx}:a]atrim=0:{audio_end_time:.3f},asetpts=PTS-STARTPTS[main_aud]") + filter_parts.append(f"atrim=0:{audio_end_time:.3f}") + filter_parts.append("asetpts=PTS-STARTPTS") + filters.append( + f"[{main_audio_idx}:a]{','.join(filter_parts)}[main_aud]" + ) audio_labels_to_mix.append("[main_aud]") - elif channel_filter: - filters.append(f"[{main_audio_idx}:a]{channel_filter}[main_aud]") + elif filter_parts: + filters.append(f"[{main_audio_idx}:a]{','.join(filter_parts)}[main_aud]") audio_labels_to_mix.append("[main_aud]") else: audio_labels_to_mix.append(f"[{main_audio_idx}:a]") else: # Complex case: segment the narration audio for pauses - segments = _build_narration_segments( - plan.narration_pauses, audio_end_time - ) + segments = _build_narration_segments(plan.narration_pauses, audio_end_time) for seg_idx, (src_start, src_end, out_start, out_end) in enumerate( segments ): seg_label = f"narr_aud{seg_idx}" delay_ms = int(out_start * 1000) # Trim audio to source range, then delay to output position - # Apply channel filter if needed - channel_part = f"{channel_filter}," if channel_filter else "" + # Apply channel filter, volume filter if needed + filter_parts = [] + if channel_filter: + filter_parts.append(channel_filter) + filter_parts.append(f"atrim={src_start:.3f}:{src_end:.3f}") + filter_parts.append("asetpts=PTS-STARTPTS") + filter_parts.append(f"adelay={delay_ms}|{delay_ms}") + if volume_filter: + filter_parts.append(volume_filter) filters.append( - f"[{main_audio_idx}:a]{channel_part}atrim={src_start:.3f}:{src_end:.3f}," - f"asetpts=PTS-STARTPTS," - f"adelay={delay_ms}|{delay_ms}[{seg_label}]" + f"[{main_audio_idx}:a]{','.join(filter_parts)}[{seg_label}]" ) audio_labels_to_mix.append(f"[{seg_label}]") @@ -850,7 +1006,8 @@ def build_filter_complex( if plan.narration_pauses and not event.audio_def.ignore_pauses: # Build segments that skip narration pauses (pauses by default) relevant_pauses = [ - p for p in plan.narration_pauses + p + for p in plan.narration_pauses if p.output_time > event.start_time ] src_pos = 0.0 @@ -892,13 +1049,29 @@ def build_filter_complex( # Simple loop: no pauses or ignore_pauses=True label = f"aud{i}" delay_ms = int(event.start_time * 1000) - filters.append( - f"[{audio_idx}:a]aloop=loop=-1:size=2e+09," - f"atrim=0:{remaining:.3f}," - f"asetpts=PTS-STARTPTS," - f"adelay={delay_ms}|{delay_ms}," - f"volume={volume:.2f}[{label}]" - ) + + if event.audio_def.overlap and event.audio_id in audio_durations: + # Crossfade loop: overlap copies with fade in/out + audio_dur = audio_durations[event.audio_id] + crossfade_filters = _build_crossfade_loop_filter( + input_label=f"[{audio_idx}:a]", + output_label=f"[{label}]", + audio_duration=audio_dur, + overlap=event.audio_def.overlap, + needed_duration=remaining, + volume=volume, + delay_ms=delay_ms, + ) + filters.extend(crossfade_filters) + else: + # Standard loop without crossfade + filters.append( + f"[{audio_idx}:a]aloop=loop=-1:size=2e+09," + f"atrim=0:{remaining:.3f}," + f"asetpts=PTS-STARTPTS," + f"adelay={delay_ms}|{delay_ms}," + f"volume={volume:.2f}[{label}]" + ) audio_labels_to_mix.append(f"[{label}]") else: # One-shot audio: delay to trigger time @@ -952,8 +1125,9 @@ def build_filter_complex( if len(audio_labels_to_mix) > 1: num_audio_tracks = len(audio_labels_to_mix) audio_mix_inputs = "".join(audio_labels_to_mix) + # normalize=0 prevents amix from dividing volume by number of inputs filters.append( - f"{audio_mix_inputs}amix=inputs={num_audio_tracks}:duration=longest:dropout_transition=0[aout]" + f"{audio_mix_inputs}amix=inputs={num_audio_tracks}:duration=longest:dropout_transition=0:normalize=0[aout]" ) elif len(audio_labels_to_mix) == 1: # Single audio track, just copy it diff --git a/gnommo/transcriber.py b/gnommo/transcriber.py index 405ae1f..391a983 100644 --- a/gnommo/transcriber.py +++ b/gnommo/transcriber.py @@ -5,7 +5,9 @@ import subprocess from dataclasses import dataclass from pathlib import Path +from .cache import resolve_with_cache from .errors import GnommoError +from typing import Optional @dataclass @@ -78,8 +80,19 @@ def save_transcript(words: list[TranscribedWord], output_path: Path) -> None: json.dump(data, f, indent=2) -def load_transcript(transcript_path: Path) -> list[TranscribedWord]: - """Load transcribed words from a JSON file.""" +def load_transcript( + transcript_path: Path, project_path: Optional[Path] = None +) -> list[TranscribedWord]: + """Load transcribed words from a JSON file. + + Args: + transcript_path: Path to the transcript JSON file + project_path: Optional project path for cache fallback + """ + # Try cache fallback if project_path provided + if project_path: + transcript_path, _ = resolve_with_cache(transcript_path, project_path) + if not transcript_path.exists(): raise TranscriptionError(f"Transcript file not found: {transcript_path}") @@ -89,3 +102,86 @@ def load_transcript(transcript_path: Path) -> list[TranscribedWord]: return [ TranscribedWord(word=w["word"], start=w["start"], end=w["end"]) for w in data ] + + +def _format_srt_timestamp(seconds: float) -> str: + """Format seconds as SRT timestamp: HH:MM:SS,mmm""" + hours = int(seconds // 3600) + minutes = int((seconds % 3600) // 60) + secs = int(seconds % 60) + millis = int((seconds % 1) * 1000) + return f"{hours:02d}:{minutes:02d}:{secs:02d},{millis:03d}" + + +def words_to_srt( + words: list[TranscribedWord], + max_words_per_line: int = 10, + max_duration: float = 5.0, + gap_threshold: float = 1.0, +) -> str: + """ + Convert word-level timestamps to SRT caption format. + + Groups words into readable caption segments based on: + - Maximum words per line (default: 10) + - Maximum segment duration (default: 5 seconds) + - Natural gaps between words (default: 1 second pause triggers new segment) + + Args: + words: List of TranscribedWord with timestamps + max_words_per_line: Maximum words before splitting to new segment + max_duration: Maximum duration of a single caption segment + gap_threshold: Pause duration that triggers a new segment + + Returns: + SRT formatted string ready for YouTube upload + """ + if not words: + return "" + + segments: list[tuple[float, float, str]] = [] # (start, end, text) + current_words: list[str] = [] + segment_start: float = words[0].start + segment_end: float = words[0].end + + for i, word in enumerate(words): + # Check if we should start a new segment + start_new_segment = False + + # Gap between words + if current_words and (word.start - segment_end) > gap_threshold: + start_new_segment = True + + # Too many words + if len(current_words) >= max_words_per_line: + start_new_segment = True + + # Segment too long + if current_words and (word.end - segment_start) > max_duration: + start_new_segment = True + + if start_new_segment and current_words: + # Save current segment + text = " ".join(current_words) + segments.append((segment_start, segment_end, text)) + # Start new segment + current_words = [] + segment_start = word.start + + current_words.append(word.word) + segment_end = word.end + + # Don't forget the last segment + if current_words: + text = " ".join(current_words) + segments.append((segment_start, segment_end, text)) + + # Format as SRT + srt_lines = [] + for idx, (start, end, text) in enumerate(segments, 1): + srt_lines.append(str(idx)) + srt_lines.append(f"{_format_srt_timestamp(start)} --> {_format_srt_timestamp(end)}") + srt_lines.append(text) + srt_lines.append("") # Blank line between entries + + return "\n".join(srt_lines) diff --git a/gnommo/transformer.py b/gnommo/transformer.py index 187a8f8..a5d1237 100644 --- a/gnommo/transformer.py +++ b/gnommo/transformer.py @@ -442,6 +442,7 @@ def build_render_plan( audio: Optional[dict[str, AudioDefinition]] = None, audio_dir: Optional[Path] = None, slide_range: Optional[tuple[str, Optional[str]]] = None, + proxy: Optional[bool] = False, ) -> tuple[RenderPlan, list[MarkerTiming]]: """ Build a complete render plan from manuscript and transcription. @@ -461,9 +462,15 @@ def build_render_plan( audio_dir = audio_dir or project_path # Find the main narration video first (need skip value for timing adjustment) - narration_video_id = config.main_video + narration_video_id = "narration_combined.mov" # Default narration video ID + # Handle legacy list format - use first element + if isinstance(narration_video_id, list): + narration_video_id = narration_video_id[0] if narration_video_id else None if not (narration_video_id and narration_video_id in videos): - raise ValueError("Main video not specified or not found in videos.") + raise ValueError( + f"Main video '{narration_video_id}' not specified or not found in videos. " + f"Available: {list(videos.keys())}" + ) narration_video = videos[narration_video_id] # Align markers to transcription timestamps @@ -495,8 +502,13 @@ def build_render_plan( narration_video = videos[narration_video_id] cutout = config.cutouts[narration_video.cutout] + # Track which files are loaded from external cache + cached_files: set[str] = set() + narration_videos: list[tuple[str, VideoSource, CutoutDefinition]] = [] - video_path = _resolve_video_path(videos_dir, narration_video, shared_assets_dir) + video_path, is_cached = _resolve_video_path(videos_dir, narration_video, shared_assets_dir, project_path) + if is_cached: + cached_files.add(narration_video_id) full_duration = get_video_duration(video_path) # Adjust duration for skip (content starts at skip, so effective duration is less) effective_duration = full_duration - narration_skip @@ -536,6 +548,14 @@ def build_render_plan( time_range=(time_offset, render_end_time) if slide_range else None, ) + # Track cached files for triggered videos + for event in video_events: + _, is_cached = _resolve_video_path( + videos_dir, event.video_source, shared_assets_dir, project_path + ) + if is_cached: + cached_files.add(event.video_id) + audio_events = _extract_audio_events( marker_timings, audio, @@ -622,6 +642,8 @@ def build_render_plan( total_duration, videos_dir, shared_assets_dir, + project_path, + cached_files, ) # Update total duration to include outro @@ -654,6 +676,7 @@ def build_render_plan( narration_pauses=narration_pauses, outro_events=outro_events, narration_end_time=narration_end_time, + cached_files=cached_files, ) return plan, marker_timings @@ -663,8 +686,16 @@ def _resolve_video_path( videos_dir: Path, video_source: VideoSource, shared_assets_dir: Path = None, -) -> Path: - """Resolve the actual video file path.""" + project_path: Path = None, +) -> tuple[Path, bool]: + """Resolve the actual video file path with cache fallback. + + Returns: + Tuple of (resolved_path, is_cached) where is_cached=True if + the file was found in the external cache. + """ + from .cache import resolve_with_cache + if video_source.is_shared and shared_assets_dir: base_dir = shared_assets_dir else: @@ -672,12 +703,24 @@ def _resolve_video_path( if video_source.output_file: video_path = base_dir / video_source.output_file - if video_path.exists(): - return video_path + if project_path: + resolved, is_cached = resolve_with_cache(video_path, project_path) + if resolved.exists(): + return resolved, is_cached + elif video_path.exists(): + return video_path, False webm_path = video_path.with_suffix(".mov") - if webm_path.exists(): - return webm_path - return base_dir / video_source.source_file + if project_path: + resolved, is_cached = resolve_with_cache(webm_path, project_path) + if resolved.exists(): + return resolved, is_cached + elif webm_path.exists(): + return webm_path, False + + source_path = base_dir / video_source.source_file + if project_path: + return resolve_with_cache(source_path, project_path) + return source_path, False def _extract_slide_events( @@ -932,6 +975,8 @@ def _extract_outro_events( narration_end_time: float, videos_dir: Path, shared_assets_dir: Path = None, + project_path: Path = None, + cached_files: set = None, ) -> list[OutroEvent]: """ Extract outro events that play after the narration ends. @@ -949,7 +994,9 @@ def _extract_outro_events( video_source = videos[video_id] # Get the video duration - video_path = _resolve_video_path(videos_dir, video_source, shared_assets_dir) + video_path, is_cached = _resolve_video_path(videos_dir, video_source, shared_assets_dir, project_path) + if is_cached and cached_files is not None: + cached_files.add(video_id) if video_path.exists(): full_duration = get_video_duration(video_path) else: diff --git a/gnommo/validator.py b/gnommo/validator.py index 106f62d..cc35a55 100644 --- a/gnommo/validator.py +++ b/gnommo/validator.py @@ -2,6 +2,7 @@ from pathlib import Path +from .cache import resolve_with_cache from .errors import ValidationError, ValidationIssue from .models import ( ProjectConfig, @@ -98,6 +99,7 @@ def validate_project( for slide_id, slide_def in slides.items(): image_path = slides_dir / slide_def.image + image_path, _ = resolve_with_cache(image_path, project_path) if not image_path.exists(): issues.append( ValidationIssue( @@ -142,6 +144,7 @@ def validate_project( base_dir = videos_dir video_path = base_dir / video_source.source_file + video_path, _ = resolve_with_cache(video_path, project_path) if not video_path.exists(): issues.append( ValidationIssue( @@ -153,6 +156,7 @@ def validate_project( # Check preprocessed output exists if filters are defined if video_source.filter and video_source.output_file: output_path = base_dir / video_source.output_file + output_path, _ = resolve_with_cache(output_path, project_path) if not output_path.exists(): issues.append( ValidationIssue( @@ -168,9 +172,11 @@ def validate_project( if bg_file: # Check in project folder first, then parent (for shared_assets) bg_path = project_path / bg_file + bg_path, _ = resolve_with_cache(bg_path, project_path) if not bg_path.exists(): # Try parent directory (shared_assets at repo root) bg_path = project_path.parent / bg_file + bg_path, _ = resolve_with_cache(bg_path, project_path.parent) if not bg_path.exists(): issues.append( ValidationIssue( @@ -188,7 +194,7 @@ def validate_project( # Check resolution is reasonable width, height = config.resolution - if width < 100 or height < 100: + if width < 50 or height < 50: issues.append( ValidationIssue( f"Resolution too small: {width}x{height}", project_path / "project.json" diff --git a/jensauthorkey b/jensauthorkey new file mode 100644 index 0000000..29b85f0 --- /dev/null +++ b/jensauthorkey @@ -0,0 +1,7 @@ +-----BEGIN OPENSSH PRIVATE KEY----- +b3BlbnNzaC1rZXktdjEAAAAABG5vbmUAAAAEbm9uZQAAAAAAAAABAAAAMwAAAAtzc2gtZW +QyNTUxOQAAACCDr3tCxUf7HC+9s9N0TF9EECMshm6/Epcr6kZzaZGv0AAAAKC+5OiPvuTo +jwAAAAtzc2gtZWQyNTUxOQAAACCDr3tCxUf7HC+9s9N0TF9EECMshm6/Epcr6kZzaZGv0A +AAAEBKyC2/ZfItNXIf/UcSTYaV/eWjX6uKIrvliO+sdFJUV4Ove0LFR/scL72z03RMX0QQ +IyyGbr8SlyvqRnNpka/QAAAAHGplbnMudGFuZHN0YWRAZWFnbGVjb25kb3Iubm8B +-----END OPENSSH PRIVATE KEY----- diff --git a/jensauthorkey.pub b/jensauthorkey.pub new file mode 100644 index 0000000..8a99724 --- /dev/null +++ b/jensauthorkey.pub @@ -0,0 +1 @@ +ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIIOve0LFR/scL72z03RMX0QQIyyGbr8SlyvqRnNpka/Q jens.tandstad@eaglecondor.no diff --git a/short_is_universe_pixelated/.gnommo_sync.json b/short_is_universe_pixelated/.gnommo_sync.json new file mode 100644 index 0000000..2634cd0 --- /dev/null +++ b/short_is_universe_pixelated/.gnommo_sync.json @@ -0,0 +1,5 @@ +{ + "last_pushed_at": "2026-03-13T09:44:12+00:00", + "server_updated_at": "2026-03-13T09:44:12.934Z", + "last_pulled_at": "2026-03-13T09:35:00+00:00" +} \ No newline at end of file diff --git a/short_is_universe_pixelated/project.json b/short_is_universe_pixelated/project.json new file mode 100644 index 0000000..e91f7b3 --- /dev/null +++ b/short_is_universe_pixelated/project.json @@ -0,0 +1,34 @@ +{ + "id": "short_is_universe_pixelated", + "name": "Is the universe pixelated?", + "description": "What if space is made of tiny blocks? A 60-second take on discrete physics.", + "parent_project": "Video1", + "hook": "What if reality is fundamentally blocky — like Minecraft, but smaller?", + "platform_targets": [ + "youtube" + ], + "resolution": [ + 1080, + 1920 + ], + "fps": 30, + "duration_seconds": 60, + "script": "script.md", + "output_video": "export/final.mp4", + "keynote_file": "../video1/media/video1.key", + "background": "../video1/shared_assets/BlackBackground.mp4", + "slides": "../video1/media/slides/Video1/slides.json", + "defaultSlideType": "fullscreen", + "cutouts": { + "talkinghead": { + "x": "-23%", + "y": "10%", + "height": "90%" + }, + "fullscreen": { + "x": "0%", + "y": "0%", + "height": "100%" + } + } +} diff --git a/short_is_universe_pixelated/script.md b/short_is_universe_pixelated/script.md new file mode 100644 index 0000000..c81cd40 --- /dev/null +++ b/short_is_universe_pixelated/script.md @@ -0,0 +1,31 @@ +# Short: Is the universe pixelated? + +**HOOK**: What if reality is fundamentally blocky — like Minecraft, but smaller? + +[SLIDE: title_card] + +Everyone assumes space is smooth and continuous. + +[SLIDE: smooth_space] + +But what if it isn't? + +[SLIDE: pixelated_space] + +What if there's a *smallest* unit of space — and below that, nothing exists? + +[SLIDE: planck_length] + +This isn't new-age woo. The Planck length has been sitting in physics for a century. + +[SLIDE: planck_formula] + +The question is: is it a minimum, or just a measurement limit? + +[SLIDE: question_mark] + +That's what we're exploring at Glitch University. + +[SLIDE: outro] + +Link in description. The physics rabbit hole goes deep. diff --git a/skills/.env b/skills/.env new file mode 100644 index 0000000..bd0e15b --- /dev/null +++ b/skills/.env @@ -0,0 +1,4 @@ + + +API_URL="${GNOMMO_API_URL:-https://glitch.university}" +CONTENT_API_KEY=782y497821y491y3981212 diff --git a/skills/CLAUDE.deglitch.md b/skills/CLAUDE.deglitch.md new file mode 100644 index 0000000..5c0156a --- /dev/null +++ b/skills/CLAUDE.deglitch.md @@ -0,0 +1,110 @@ +# Gnommo Content Skills + +Skills for generating content for the Gnommo/Glitch.University learning platform. + +## Available Skills + +| Skill | File | Purpose | +|-------|------|---------| +| DEGLITCH Gates | `deglitch-gate-generator.md` | Generate quiz questions from manuscripts | +| Slide Content | `slide-content-generator.md` | Generate image prompts & text for slides | + +--- + +# DEGLITCH Gate Generator + +Generate quiz questions from manuscript content for the Gnommo learning platform. + +## Quick Start + +1. Read `manuscript.txt` (or specified file) +2. Identify 3-7 key concepts +3. Create 1-2 questions per concept +4. Output JSON or submit via API + +## Project Structure + +Each video project has: +- `manuscript.txt` - The narration script with `[SX]` slide markers +- `project.json` - Contains `coursecode` to identify the tech on the server + +## API Configuration + +``` +Base URL: ${GNOMMO_API_URL:-http://localhost:3001} +Auth: Authorization: Bearer ${CONTENT_API_KEY} +``` + +## Endpoints + +- `GET /api/content/techs/available` - Find tech_id to link +- `POST /api/content/deglitch-gates` - Create gate +- `GET /api/content/deglitch-gates` - List gates +- `PUT /api/content/deglitch-gates/:id` - Update gate + +## Question JSON Structure + +```json +{ + "tech_id": null, + "title": "Gate Title", + "description": "What this tests", + "passing_score": 0.8, + "shuffle_questions": true, + "shuffle_options": true, + "questions": [ + { + "question_type": "radio", + "text": "Question?", + "sort_order": 0, + "options": { + "a": { "answer": "Wrong", "correct": false, "why": "Explanation" }, + "b": { "answer": "Right", "correct": true, "why": "Explanation" }, + "c": { "answer": "Wrong", "correct": false, "why": "Explanation" }, + "d": { "answer": "Wrong", "correct": false, "why": "Explanation" } + } + } + ] +} +``` + +## Question Types +- `radio` - Single answer (most common) +- `checkbox` - Multiple answers +- `llm` - Free text (AI evaluated) + +## Quality Guidelines + +- Test understanding, not memorization +- One clear correct answer per radio question +- Plausible wrong answers with educational "why" +- Concise questions, avoid trick questions +- Vary difficulty across questions + +## Workflow with API Key + +```bash +# 1. Read project.json to get coursecode +cat /path/to/video/project.json | jq '.coursecode' + +# 2. Find tech_id by matching coursecode +curl -H "Authorization: Bearer $CONTENT_API_KEY" \ + $GNOMMO_API_URL/api/content/techs + +# 3. Create gate with matched tech_id +curl -X POST -H "Authorization: Bearer $CONTENT_API_KEY" \ + -H "Content-Type: application/json" \ + $GNOMMO_API_URL/api/content/deglitch-gates \ + -d '{"tech_id": 1, "title":"...","questions":[...]}' +``` + +## Matching Coursecode to Tech + +The `coursecode` in `project.json` matches the `code` field in the server's tech list: +- `♟️_#1.0` → Lightlane series, Video 1 +- `♟️_#2.0` → Lightlane series, Video 2 +- `WTF_#1` → What is Glitch University series, Video 1 + +## Workflow without API Key + +Output the complete JSON for manual entry or later API submission. diff --git a/skills/deglitch-api.sh b/skills/deglitch-api.sh new file mode 100644 index 0000000..f134857 --- /dev/null +++ b/skills/deglitch-api.sh @@ -0,0 +1,141 @@ +#!/bin/bash +# glitch gate API Helper Script +# Usage: source this file, then use the functions + +# Configuration - set these or export before sourcing +GNOMMO_API_URL="${GNOMMO_API_URL:-http://localhost:3001}" +# CONTENT_API_KEY should be set in environment + +# Check if API key is set +check_api_key() { + if [ -z "$CONTENT_API_KEY" ]; then + echo "Error: CONTENT_API_KEY not set" + echo "Run: export CONTENT_API_KEY=your-key-here" + return 1 + fi +} + +# List all techs +list_techs() { + check_api_key || return 1 + curl -s -H "Authorization: Bearer $CONTENT_API_KEY" \ + "$GNOMMO_API_URL/api/content/techs" | jq +} + +# List techs without gates (available for linking) +list_available_techs() { + check_api_key || return 1 + curl -s -H "Authorization: Bearer $CONTENT_API_KEY" \ + "$GNOMMO_API_URL/api/content/techs/available" | jq +} + +# List all glitch gates +list_gates() { + check_api_key || return 1 + curl -s -H "Authorization: Bearer $CONTENT_API_KEY" \ + "$GNOMMO_API_URL/api/content/deglitch-gates" | jq +} + +# Get a specific gate by ID +get_gate() { + check_api_key || return 1 + local gate_id=$1 + if [ -z "$gate_id" ]; then + echo "Usage: get_gate " + return 1 + fi + curl -s -H "Authorization: Bearer $CONTENT_API_KEY" \ + "$GNOMMO_API_URL/api/content/deglitch-gates/$gate_id" | jq +} + +# Create a gate from JSON file +create_gate() { + check_api_key || return 1 + local json_file=$1 + if [ -z "$json_file" ]; then + echo "Usage: create_gate " + return 1 + fi + if [ ! -f "$json_file" ]; then + echo "Error: File not found: $json_file" + return 1 + fi + curl -s -X POST \ + -H "Authorization: Bearer $CONTENT_API_KEY" \ + -H "Content-Type: application/json" \ + -d @"$json_file" \ + "$GNOMMO_API_URL/api/content/deglitch-gates" | jq +} + +# Create a gate from JSON string +create_gate_json() { + check_api_key || return 1 + local json_data=$1 + if [ -z "$json_data" ]; then + echo "Usage: create_gate_json ''" + return 1 + fi + curl -s -X POST \ + -H "Authorization: Bearer $CONTENT_API_KEY" \ + -H "Content-Type: application/json" \ + -d "$json_data" \ + "$GNOMMO_API_URL/api/content/deglitch-gates" | jq +} + +# Update a gate from JSON file +update_gate() { + check_api_key || return 1 + local gate_id=$1 + local json_file=$2 + if [ -z "$gate_id" ] || [ -z "$json_file" ]; then + echo "Usage: update_gate " + return 1 + fi + if [ ! -f "$json_file" ]; then + echo "Error: File not found: $json_file" + return 1 + fi + curl -s -X PUT \ + -H "Authorization: Bearer $CONTENT_API_KEY" \ + -H "Content-Type: application/json" \ + -d @"$json_file" \ + "$GNOMMO_API_URL/api/content/deglitch-gates/$gate_id" | jq +} + +# Delete a gate +delete_gate() { + check_api_key || return 1 + local gate_id=$1 + if [ -z "$gate_id" ]; then + echo "Usage: delete_gate " + return 1 + fi + read -p "Delete gate $gate_id? (y/N) " confirm + if [ "$confirm" = "y" ] || [ "$confirm" = "Y" ]; then + curl -s -X DELETE \ + -H "Authorization: Bearer $CONTENT_API_KEY" \ + "$GNOMMO_API_URL/api/content/deglitch-gates/$gate_id" | jq + else + echo "Cancelled" + fi +} + +# Print available commands +deglitch_help() { + echo "glitch gate API Commands:" + echo "" + echo " list_techs - List all techs" + echo " list_available_techs - List techs without gates" + echo " list_gates - List all glitch gates" + echo " get_gate - Get gate details" + echo " create_gate - Create gate from JSON file" + echo " create_gate_json '' - Create gate from JSON string" + echo " update_gate - Update gate from JSON file" + echo " delete_gate - Delete a gate" + echo "" + echo "Configuration:" + echo " GNOMMO_API_URL=$GNOMMO_API_URL" + echo " CONTENT_API_KEY=$([ -n "$CONTENT_API_KEY" ] && echo "[set]" || echo "[not set]")" +} + +echo "DEGLITCH API helper loaded. Run 'deglitch_help' for commands." diff --git a/skills/deglitch-gate-generator.md b/skills/deglitch-gate-generator.md new file mode 100644 index 0000000..7e47d01 --- /dev/null +++ b/skills/deglitch-gate-generator.md @@ -0,0 +1,251 @@ +# glitch gate Generator Skill + +You are a quiz/assessment generator for the Gnommo learning platform. Your job is to read educational manuscript content and create glitch gate questions that test understanding. + +## What is a glitch gate? + +A glitch gate is a quiz that learners must pass to demonstrate mastery of a tech (lesson). Gates have: +- A title and description +- A passing score (default 80%) +- Multiple questions with explanations for why each answer is correct/incorrect + +## Question Philosophy + +Create questions that test **understanding**, not memorization: + +- **Intuition questions**: Test pattern recognition and conceptual understanding +- **Grit questions**: Present tricky scenarios requiring careful thinking +- **Craft questions**: Test precise technical knowledge and attention to detail + +### Good Question Characteristics +- Tests a single concept clearly +- Has one unambiguously correct answer +- Wrong answers are plausible (not obviously wrong) +- Each answer has a "why" explanation +- Avoids trick questions or gotchas + +## Workflow + +### Step 1: Read the Manuscript + +First, read the manuscript file to understand the content: + +``` +Read the file: manuscript.txt +``` + +Or if given a specific path: +``` +Read the file: /path/to/manuscript.txt +``` + +### Step 2: Identify Key Concepts + +After reading, identify 3-7 key concepts that learners should understand. Consider: +- Core principles explained in the text +- Common misconceptions to address +- Practical applications mentioned +- Relationships between concepts + +### Step 3: Generate Questions + +For each key concept, create 1-2 questions. Use this JSON structure: + +```json +{ + "tech_id": null, + "title": "Gate Title Based on Content", + "description": "Brief description of what this gate tests", + "passing_score": 0.8, + "shuffle_questions": true, + "shuffle_options": true, + "is_active": true, + "questions": [ + { + "question_type": "radio", + "text": "Question text here?", + "sort_order": 0, + "options": { + "a": { + "answer": "First option", + "correct": false, + "why": "Explanation of why this is incorrect" + }, + "b": { + "answer": "Second option (correct)", + "correct": true, + "why": "Explanation of why this is correct" + }, + "c": { + "answer": "Third option", + "correct": false, + "why": "Explanation of why this is incorrect" + }, + "d": { + "answer": "Fourth option", + "correct": false, + "why": "Explanation of why this is incorrect" + } + } + } + ] +} +``` + +### Question Types + +- `radio` - Single correct answer (most common) +- `checkbox` - Multiple correct answers +- `llm` - Free text evaluated by AI (use sparingly) + +## Step 4: Submit to API (if API key available) + +If you have the Content API key, you can directly create the gate: + +```bash +curl -X POST https://your-domain.com/api/content/deglitch-gates \ + -H "Authorization: Bearer YOUR_API_KEY" \ + -H "Content-Type: application/json" \ + -d 'YOUR_JSON_HERE' +``` + +### API Endpoints + +| Method | Endpoint | Description | +|--------|----------|-------------| +| GET | `/api/content/techs` | List all techs to find tech_id | +| GET | `/api/content/techs/available` | Techs without gates | +| GET | `/api/content/deglitch-gates` | List existing gates | +| POST | `/api/content/deglitch-gates` | Create new gate | +| PUT | `/api/content/deglitch-gates/:id` | Update gate | +| DELETE | `/api/content/deglitch-gates/:id` | Delete gate | + +### Finding the Right Tech ID + +Before creating a gate, list available techs to find the correct `tech_id`: + +```bash +curl -X GET https://your-domain.com/api/content/techs/available \ + -H "Authorization: Bearer YOUR_API_KEY" +``` + +## Example: Complete Question Set + +Here's an example of a well-structured gate for an "Atomic Structure" lesson: + +```json +{ + "tech_id": 1, + "title": "Atomic Structure Fundamentals", + "description": "Test your understanding of basic atomic structure and the components of atoms.", + "passing_score": 0.8, + "shuffle_questions": true, + "shuffle_options": true, + "is_active": true, + "questions": [ + { + "question_type": "radio", + "text": "What determines the chemical properties of an atom?", + "sort_order": 0, + "options": { + "a": { + "answer": "The number of neutrons", + "correct": false, + "why": "Neutrons affect atomic mass and stability, but not chemical properties directly." + }, + "b": { + "answer": "The number of protons", + "correct": false, + "why": "Protons determine the element, but electrons determine how it bonds." + }, + "c": { + "answer": "The number of electrons in the outer shell", + "correct": true, + "why": "Valence electrons determine how an atom bonds with others, defining its chemical behavior." + }, + "d": { + "answer": "The total atomic mass", + "correct": false, + "why": "Atomic mass affects physical properties like density, not chemical reactivity." + } + } + }, + { + "question_type": "radio", + "text": "An atom has 6 protons and 8 neutrons. What element is it?", + "sort_order": 1, + "options": { + "a": { + "answer": "Oxygen", + "correct": false, + "why": "Oxygen has 8 protons. The number of protons defines the element." + }, + "b": { + "answer": "Carbon", + "correct": true, + "why": "Carbon has 6 protons. This is carbon-14, an isotope with 8 neutrons." + }, + "c": { + "answer": "Nitrogen", + "correct": false, + "why": "Nitrogen has 7 protons." + }, + "d": { + "answer": "Carbon-14 is not carbon", + "correct": false, + "why": "Isotopes are variants of the same element. Carbon-14 is still carbon." + } + } + }, + { + "question_type": "radio", + "text": "Why are noble gases chemically inert?", + "sort_order": 2, + "options": { + "a": { + "answer": "They have no electrons", + "correct": false, + "why": "Noble gases have electrons; helium has 2, neon has 10, etc." + }, + "b": { + "answer": "Their outer electron shell is full", + "correct": true, + "why": "A full valence shell means no tendency to gain, lose, or share electrons." + }, + "c": { + "answer": "They are too heavy to react", + "correct": false, + "why": "Mass doesn't determine reactivity. Francium is heavy but highly reactive." + }, + "d": { + "answer": "They only exist at very low temperatures", + "correct": false, + "why": "Noble gases exist at all temperatures; they're gases at room temperature." + } + } + } + ] +} +``` + +## Tips for Quality Questions + +1. **Start with the concept**, then craft the question around it +2. **Make wrong answers educational** - the "why" should teach something +3. **Vary difficulty** - include some easier and some harder questions +4. **Avoid "all of the above"** or "none of the above" options +5. **Keep questions concise** - if it needs a lot of context, split it +6. **Test understanding, not recall** - ask "why" and "how", not just "what" + +## Environment Variables + +If using the API programmatically, you need: +- `CONTENT_API_KEY` - Your API key for authentication +- API base URL (e.g., `https://gnommo.com` or `http://localhost:3001`) + +## Output Format + +When generating questions without API access, output: +1. A summary of key concepts identified +2. The complete JSON structure ready to copy +3. Any notes about the questions or suggestions for the tech linking diff --git a/skills/slide-content-generator.md b/skills/slide-content-generator.md new file mode 100644 index 0000000..93bb5f8 --- /dev/null +++ b/skills/slide-content-generator.md @@ -0,0 +1,165 @@ +# Slide Content Generator Skill + +Generate slide content (image prompts or text) from Gnommo manuscript files. + +## Context + +Gnommo presentations use a **square slide area next to a talking head**. Slides should be: +- Visually impactful but not cluttered +- Timed to appear with the first word after the `[SX]` marker +- Either **image-based** (generated via AI) or **text-based** (minimal, punchy text) + +## Manuscript Format + +Manuscripts use slide markers like `[S1]`, `[S2]`, etc. The content following each marker is what the presenter says while that slide is displayed. + +``` +[S1] +Welcome to the course... + +[S2] +What if the universe is discrete? +``` + +## Workflow + +### Step 1: Read the Manuscript + +``` +Read the file: /path/to/manuscript.txt +``` + +### Step 2: Analyze Each Slide + +For each `[SX]` marker, determine: +1. **What is the core message?** - The key idea being communicated +2. **Visual or text?** - Would an image or text better support the message? +3. **Emotional tone?** - Dramatic, contemplative, humorous, technical? + +### Step 3: Generate Content + +For each slide, output one of: + +#### IMAGE PROMPT +For conceptual, emotional, or complex ideas that benefit from visualization. + +``` +**[SX]** - "First few words..." +**IMAGE PROMPT:** +`Detailed description for AI image generation, style, mood, composition, lighting, specific elements to include` +``` + +#### TEXT SLIDE +For lists, key terms, definitions, or when words ARE the point. + +``` +**[SX]** - "First few words..." +**TEXT SLIDE:** +``` +HEADLINE + +• Bullet point +• Another point +``` +``` + +## Guidelines + +### When to Use IMAGE PROMPTS + +- Abstract concepts (e.g., "the fabric of spacetime") +- Metaphors and analogies (e.g., "like changing engines while driving") +- Emotional moments (e.g., "this sounds insane") +- Scene-setting (e.g., "imagine a Minecraft universe") + +### When to Use TEXT SLIDES + +- Lists of items being enumerated +- Technical terms being defined +- Key questions or frameworks +- Course titles, section headers +- Quotes or key phrases + +### Image Prompt Best Practices + +1. **Be specific about style**: "isometric illustration", "cinematic lighting", "minimal vector style" +2. **Include mood/tone**: "mysterious", "hopeful", "dramatic contrast" +3. **Describe composition**: "split image", "centered subject", "deep space background" +4. **Avoid text in images**: AI image generators struggle with text - use text slides instead +5. **Keep it achievable**: Don't describe impossibly complex scenes + +### Text Slide Best Practices + +1. **Minimal words**: 3-7 words per line, 1-5 lines max +2. **Use hierarchy**: HEADLINES in caps, details below +3. **Bullets for lists**: Keep them short and scannable +4. **Leave breathing room**: Don't fill the entire square + +## Output Format + +Output slides in order, with clear separation: + +```markdown +--- + +**[S1]** - "First words of narration..." +**TYPE:** (IMAGE PROMPT or TEXT SLIDE) +Content here + +--- + +**[S2]** - "First words of narration..." +... +``` + +## Example Output + +--- + +**[S1]** - "Welcome to Glitch.University..." +**TEXT SLIDE:** +``` +GLITCH.UNIVERSITY +WTF_#1 + +What is Glitch University? +``` + +--- + +**[S2]** - "What if the universe is fundamentally discrete..." +**IMAGE PROMPT:** +`A hyper-detailed Minecraft-style voxel universe, showing galaxies and stars rendered as tiny glowing cubes, deep space background with blocky nebulae, cosmic scale but pixelated, dark background with vibrant cube-shaped stars, cinematic lighting` + +--- + +## Customization Options + +### Style Presets + +You can request specific visual styles: +- **Tech/Corporate**: Clean vectors, isometric, blues and whites +- **Cosmic/Physics**: Deep space, nebulae, particle effects +- **Playful/Minecraft**: Voxels, bright colors, blocky +- **Philosophical**: Abstract, minimal, contemplative +- **Dramatic**: High contrast, cinematic, intense lighting + +### Text Tone + +- **Academic**: Formal terminology, structured +- **Casual**: Conversational, approachable +- **Punchy**: Short, impactful, memorable + +## Integration with Gnommo + +The generated content can be used to: +1. Create slides in Keynote/PowerPoint +2. Generate images via Midjourney/DALL-E/Stable Diffusion +3. Populate the `slides.json` file in the project's media folder + +## Tips + +- Read the ENTIRE manuscript first to understand the arc +- Match slide density to pacing - fast sections need simpler slides +- Create visual continuity - recurring metaphors should have consistent imagery +- Consider what the talking head is doing - slides complement, not compete diff --git a/transcode.sh b/transcode.sh new file mode 100755 index 0000000..7519cf5 --- /dev/null +++ b/transcode.sh @@ -0,0 +1,300 @@ +#!/bin/zsh +# +# Video Transcoding Script +# Converts video files to H.265/HEVC at 1080p for significant size reduction +# +# Usage: ./transcode.sh [options] +# +# Options: +# --replace Delete original files after successful transcoding +# --dry-run Show what would be transcoded without doing it +# --crf Quality level (default: 23, lower=better quality, 18-28 typical) +# + +set -e + +# Configuration +DEFAULT_CRF=23 +EXTENSIONS=("mov" "mp4" "m4v" "avi" "mkv" "mxf") + +usage() { + cat << EOF +Video Transcoding Script + +Converts video files to H.265/HEVC at 1080p for significant size reduction. +Typically achieves 80-95% size reduction from uncompressed 4K footage. + +Usage: $(basename "$0") [options] + +Options: + --replace Delete original files after successful transcoding + --dry-run Show what would be transcoded without doing it + --crf Quality level (default: 23) + Lower = better quality, larger files + 18 = visually lossless, 23 = default, 28 = smaller + --help Show this help message + +Output: + Files are saved alongside originals with '_compressed.mp4' suffix. + With --replace, originals are deleted after successful transcode. + When processing a folder, files are sorted smallest-first. + +Examples: + $(basename "$0") ./video.mov # Transcode single file + $(basename "$0") ./media/videos # Transcode folder (smallest first) + $(basename "$0") ./media/videos --dry-run # Preview only + $(basename "$0") ./media/videos --replace # Transcode and delete originals + $(basename "$0") ./media/videos --crf 20 # Higher quality + +EOF + exit 0 +} + +# Parse arguments +FOLDER="" +REPLACE=false +DRY_RUN=false +CRF=$DEFAULT_CRF + +while [[ $# -gt 0 ]]; do + case "$1" in + --replace) + REPLACE=true + shift + ;; + --dry-run) + DRY_RUN=true + shift + ;; + --crf) + CRF="$2" + shift 2 + ;; + --help|-h) + usage + ;; + -*) + echo "Unknown option: $1" + usage + ;; + *) + if [[ -z "$FOLDER" ]]; then + FOLDER="$1" + fi + shift + ;; + esac +done + +# Validate arguments +if [[ -z "$FOLDER" ]]; then + echo "Error: Folder path is required" + echo "" + usage +fi + +if [[ ! -d "$FOLDER" && ! -f "$FOLDER" ]]; then + echo "Error: Path not found: $FOLDER" + exit 1 +fi + +# Check for ffmpeg +if ! command -v ffmpeg &> /dev/null; then + echo "Error: ffmpeg is not installed" + echo "Install with: brew install ffmpeg" + exit 1 +fi + +# Build find pattern for video files +build_find_pattern() { + local pattern="" + for ext in "${EXTENSIONS[@]}"; do + if [[ -n "$pattern" ]]; then + pattern="$pattern -o" + fi + pattern="$pattern -iname '*.$ext'" + done + echo "$pattern" +} + +# Format file size for display +format_size() { + local bytes=$1 + if (( bytes >= 1073741824 )); then + printf "%.1fG" $(echo "scale=1; $bytes / 1073741824" | bc) + elif (( bytes >= 1048576 )); then + printf "%.1fM" $(echo "scale=1; $bytes / 1048576" | bc) + else + printf "%.1fK" $(echo "scale=1; $bytes / 1024" | bc) + fi +} + +# Get file size in bytes +get_size() { + stat -f%z "$1" 2>/dev/null || echo 0 +} + +echo "========================================" +echo "Video Transcoder" +echo "========================================" +echo "Folder: $FOLDER" +echo "Codec: H.265/HEVC" +echo "Resolution: 1080p (scaled down)" +echo "Quality: CRF $CRF" +echo "Replace: $REPLACE" +[[ "$DRY_RUN" == true ]] && echo "DRY RUN: Yes" +echo "========================================" +echo "" + +# Check if input is a file or folder +IS_SINGLE_FILE=false +if [[ -f "$FOLDER" ]]; then + IS_SINGLE_FILE=true + VIDEO_FILES=("$FOLDER") + echo "Processing single file" + echo "" +else + # Find all video files (excluding already compressed ones), sorted by size (smallest first) + FIND_PATTERN=$(build_find_pattern) + + # Use Python for robust sorting by size (handles spaces in paths correctly) + VIDEO_FILES=() + while IFS= read -r file; do + VIDEO_FILES+=("$file") + done < <(eval "find \"$FOLDER\" -type f \( $FIND_PATTERN \)" 2>/dev/null | python3 -c " +import sys +import os + +files = [] +for line in sys.stdin: + path = line.rstrip('\n') + if '_compressed.' in path: + continue + try: + size = os.path.getsize(path) + files.append((size, path)) + except: + pass + +files.sort(key=lambda x: x[0]) +for size, path in files: + print(path) +") + + if [[ ${#VIDEO_FILES[@]} -eq 0 ]]; then + echo "No video files found in $FOLDER" + exit 0 + fi + + echo "Found ${#VIDEO_FILES[@]} video file(s) to process (smallest first)" + echo "" +fi + +# Track totals +TOTAL_ORIGINAL=0 +TOTAL_COMPRESSED=0 +SUCCESS_COUNT=0 +FAIL_COUNT=0 + +# Process each file +for input_file in "${VIDEO_FILES[@]}"; do + # Generate output filename + dir=$(dirname "$input_file") + basename=$(basename "$input_file") + name="${basename%.*}" + output_file="$dir/${name}_compressed.mp4" + + # Get original size + original_size=$(get_size "$input_file") + original_size_fmt=$(format_size $original_size) + + echo "----------------------------------------" + echo "Input: $input_file ($original_size_fmt)" + echo "Output: $output_file" + + if [[ "$DRY_RUN" == true ]]; then + echo "Action: [DRY RUN] Would transcode" + continue + fi + + # Skip if output already exists + if [[ -f "$output_file" ]]; then + echo "Action: Skipped (output already exists)" + continue + fi + + # Transcode with ffmpeg + # -vf scale=-2:1080 = scale to 1080p height, auto width (divisible by 2) + # -c:v libx265 = H.265/HEVC codec + # -crf = quality (lower = better) + # -preset medium = encoding speed/compression tradeoff + # -c:a aac -b:a 128k = audio to AAC at 128kbps + # -tag:v hvc1 = compatibility tag for Apple devices + echo "Action: Transcoding..." + + if ffmpeg -i "$input_file" \ + -vf "scale=-2:1080" \ + -c:v libx265 \ + -crf "$CRF" \ + -preset medium \ + -c:a aac -b:a 128k \ + -tag:v hvc1 \ + -y \ + "$output_file" \ + -loglevel warning -stats 2>&1; then + + # Get compressed size + compressed_size=$(get_size "$output_file") + compressed_size_fmt=$(format_size $compressed_size) + + # Calculate reduction + if (( original_size > 0 )); then + reduction=$(echo "scale=1; 100 - ($compressed_size * 100 / $original_size)" | bc) + else + reduction=0 + fi + + echo "Result: $original_size_fmt → $compressed_size_fmt (${reduction}% reduction)" + + TOTAL_ORIGINAL=$((TOTAL_ORIGINAL + original_size)) + TOTAL_COMPRESSED=$((TOTAL_COMPRESSED + compressed_size)) + ((SUCCESS_COUNT++)) + + # Delete original if --replace is set + if [[ "$REPLACE" == true ]]; then + rm "$input_file" + echo "Deleted: $input_file" + fi + else + echo "Result: FAILED" + ((FAIL_COUNT++)) + # Remove partial output file if it exists + [[ -f "$output_file" ]] && rm "$output_file" + fi +done + +echo "" +echo "========================================" +echo "Summary" +echo "========================================" +if [[ "$DRY_RUN" == true ]]; then + echo "DRY RUN - no files were transcoded" +else + echo "Processed: $SUCCESS_COUNT succeeded, $FAIL_COUNT failed" + if (( SUCCESS_COUNT > 0 )); then + total_orig_fmt=$(format_size $TOTAL_ORIGINAL) + total_comp_fmt=$(format_size $TOTAL_COMPRESSED) + if (( TOTAL_ORIGINAL > 0 )); then + total_reduction=$(echo "scale=1; 100 - ($TOTAL_COMPRESSED * 100 / $TOTAL_ORIGINAL)" | bc) + else + total_reduction=0 + fi + echo "Total: $total_orig_fmt → $total_comp_fmt (${total_reduction}% reduction)" + + if [[ "$REPLACE" == true ]]; then + saved=$(format_size $((TOTAL_ORIGINAL - TOTAL_COMPRESSED))) + echo "Freed: $saved" + fi + fi +fi +echo "========================================"