diff --git a/GlitchTrailer/project.json b/GlitchTrailer/project.json index 9c3f5ed..7f1a339 100644 --- a/GlitchTrailer/project.json +++ b/GlitchTrailer/project.json @@ -14,6 +14,7 @@ "audioonly": [ { "type": "audio_normalize", + "compress": false, "normalize": true, "target_lufs": -14, "target_lra": 11, diff --git a/gnommo/cli.py b/gnommo/cli.py index 8182545..99cfd15 100644 --- a/gnommo/cli.py +++ b/gnommo/cli.py @@ -4,6 +4,7 @@ import argparse import json from logging import config import re +import time import shutil import subprocess import sys @@ -42,6 +43,13 @@ Examples: gnommo -p video1 trim --force Redo trim even for segments that already have skip/take gnommo -p video1 trim --threshold -25 Raise threshold to ignore clothing/room noise gnommo -p video1 trim -v Show detected silence periods for debugging + gnommo -p video1 transcode Transcode narration folder to H.265 (1st pass, before preprocess) + gnommo -p video1 transcode --replace Delete originals after successful transcode + gnommo -p video1 transcode --crf 28 Lower quality / smaller files (default CRF: 23) + gnommo -p video1 transcode --processed Compress _processed.mov files to HEVC+alpha (2nd pass, after preprocess) + gnommo -p video1 transcode --processed --alpha-quality 0.5 More aggressive alpha compression + gnommo -p video1 transcode --processed --dry-run Preview what would be compressed + gnommo -p video1 transcode --force Re-transcode even if output already exists gnommo -p video1 all Full pipeline: transcribe → align → render gnommo -p video1 render --dry-run Show FFmpeg command without running gnommo -p video1 description Generate YouTube description file @@ -92,6 +100,7 @@ Examples: "push", "pull", "handoff", + "transcode", ], help="Action to perform (default: render)", ) @@ -146,7 +155,7 @@ Examples: parser.add_argument( "--channel", type=str, - choices=["left", "right", "both"], + choices=["auto", "left", "right", "both"], default="both", help="For extract-audio: which audio channel(s) to extract (default: both)", ) @@ -171,6 +180,29 @@ Examples: default=-40.0, help="For trim: silence threshold in dB (default: -40). Raise (e.g. -25) to ignore clothing/room noise.", ) + parser.add_argument( + "--crf", + type=int, + default=23, + help="For transcode: H.265 quality (CRF, default: 23; lower=better quality, larger file)", + ) + parser.add_argument( + "--replace", + action="store_true", + help="For transcode: delete original files after successful transcode", + ) + parser.add_argument( + "--processed", + action="store_true", + help="For transcode: compress _processed.mov files (with alpha) using HEVC+alpha instead of narration files", + ) + parser.add_argument( + "--alpha-quality", + type=float, + default=1.0, + dest="alpha_quality", + help="For transcode --processed: HEVC alpha quality 0.0-1.0 (default: 0.75; lower=smaller file)", + ) args = parser.parse_args() @@ -198,6 +230,17 @@ Examples: ) elif action == "trim": return cmd_trim(project_path, args.verbose, args.force, args.threshold) + elif action == "transcode": + return cmd_transcode( + project_path, + args.verbose, + args.dry_run, + args.replace, + args.crf, + args.force, + args.processed, + args.alpha_quality, + ) elif action in ("stitch"): return cmd_stitch( project_path, @@ -234,13 +277,18 @@ Examples: return cmd_master(project_path, args.verbose, args.channel) elif action == "push": from .push import cmd_push + return cmd_push(project_path, args.verbose, args.force, args.prod) elif action == "pull": from .pull import cmd_pull + return cmd_pull(project_path, args.verbose, args.force, args.prod) elif action == "handoff": from .handoff import cmd_handoff - return cmd_handoff(project_path, args.verbose, args.file, args.prod, args.res) + + return cmd_handoff( + project_path, args.verbose, args.file, args.prod, args.res + ) except GnommoError as e: print(f"Error: {e}", file=sys.stderr) @@ -313,11 +361,229 @@ def cmd_import(project_path: Path, force: bool, verbose: bool) -> int: shared_assets_dir = _find_shared_assets(project_path) if shared_assets_dir: _import_shared_assets(shared_assets_dir, verbose) + _sync_shared_videos_to_local(project_path, config, shared_assets_dir, verbose) + + # Probe and cache audio file durations into audio.json + _probe_audio_durations(project_path, config, force, verbose) + + # Probe and cache video metadata (duration, has_audio) into videos.json + _probe_video_metadata(project_path, config, shared_assets_dir, force, verbose) print("Import complete.") return 0 +def _probe_audio_durations( + project_path: Path, config, force: bool, verbose: bool +) -> None: + """Probe and cache audio file durations into audio.json. + + Runs once at import time so the render stage never needs to scan audio files. + Skips entries that already have a duration unless --force is set. + """ + from .renderer import _get_audio_duration + + if config and config.audio_path: + audio_json_path = project_path / config.audio_path + else: + audio_json_path = project_path / "audio.json" + + if not audio_json_path.exists(): + return + + audio_dir = audio_json_path.parent + data = _read_json(audio_json_path) + updated = False + + for audio_id, audio_data in data.items(): + if "file" not in audio_data: + continue + if "duration" in audio_data and not force: + if verbose: + print(f" Audio '{audio_id}': cached ({audio_data['duration']:.1f}s)") + continue + audio_path = audio_dir / audio_data["file"] + if not audio_path.exists(): + if verbose: + print(f" Audio '{audio_id}': file not found, skipping") + continue + print(f" Probing audio '{audio_id}' ({audio_path.name})...", end=" ", flush=True) + try: + duration = _get_audio_duration(audio_path) + data[audio_id]["duration"] = round(duration, 3) + updated = True + print(f"{duration:.1f}s") + except Exception as e: + print(f"failed ({e})") + + if updated: + with open(audio_json_path, "w") as f: + json.dump(data, f, indent=4) + print(f" Saved durations to {audio_json_path.name}") + + +def _probe_video_metadata( + project_path: Path, config, shared_assets_dir: Optional[Path], force: bool, verbose: bool +) -> None: + """Probe and cache video file duration and audio presence into videos.json. + + Runs once at import time so the render stage never needs to probe video files. + Shared entries are written back to shared_assets/videos.json (canonical source). + Local entries are written to the project's videos.json. + Skips entries that already have both fields unless --force is set. + """ + from .preprocessor import get_video_duration + from .renderer import _has_audio_stream + + if config and config.videos_path: + videos_json_path = project_path / config.videos_path + else: + videos_json_path = project_path / "media" / "videos" / "videos.json" + + if not videos_json_path.exists(): + return + + videos_dir = videos_json_path.parent + local_data = _read_json(videos_json_path) + + # Load shared_assets/videos.json separately — shared probes write there + shared_json_path = shared_assets_dir / "videos.json" if shared_assets_dir else None + shared_data = _read_json(shared_json_path) if shared_json_path and shared_json_path.exists() else {} + + local_updated = False + shared_updated = False + + for video_id, video_data in local_data.items(): + if "source_file" not in video_data: + continue + + is_shared = video_data.get("is_shared", False) + + # For shared entries, check the shared_assets/videos.json for cached values + if is_shared and video_id in shared_data: + canonical = shared_data[video_id] + else: + canonical = video_data + + if not force and "duration" in canonical and "has_audio" in canonical: + if verbose: + print(f" Video '{video_id}': cached ({canonical['duration']:.1f}s, audio={canonical['has_audio']})") + continue + + base_dir = shared_assets_dir if (is_shared and shared_assets_dir) else videos_dir + + # Mirror renderer._resolve_video_path: try output_file first, then source_file + video_path = None + output_file = video_data.get("output_file") + if output_file: + for candidate_dir in [base_dir, base_dir.parent]: + candidate = candidate_dir / output_file + if candidate.exists(): + video_path = candidate + break + mov_candidate = candidate.with_suffix(".mov") + if mov_candidate.exists(): + video_path = mov_candidate + break + + if video_path is None: + source_candidate = base_dir / video_data["source_file"] + if source_candidate.exists(): + video_path = source_candidate + + if video_path is None: + if verbose: + print(f" Video '{video_id}': file not found, skipping") + continue + + print(f" Probing video '{video_id}' ({video_path.name})...", end=" ", flush=True) + try: + duration = get_video_duration(video_path) + has_audio = _has_audio_stream(video_path) + result = {"duration": round(duration, 3), "has_audio": has_audio} + print(f"{duration:.1f}s, audio={has_audio}") + + if is_shared and video_id in shared_data: + # Write back to shared_assets/videos.json — canonical source for shared assets + shared_data[video_id].update(result) + shared_updated = True + else: + local_data[video_id].update(result) + local_updated = True + except Exception as e: + print(f"failed ({e})") + + if local_updated: + with open(videos_json_path, "w") as f: + json.dump(local_data, f, indent=4) + print(f" Saved metadata to {videos_json_path.name}") + + if shared_updated and shared_json_path: + with open(shared_json_path, "w") as f: + json.dump(shared_data, f, indent=4) + print(f" Saved shared metadata to {shared_json_path.name}") + + +def _sync_shared_videos_to_local( + project_path: Path, config, shared_assets_dir: Path, verbose: bool +) -> None: + """Append entries from shared_assets/videos.json into the project's local videos.json. + + Each new entry gets is_shared=true so the renderer looks in shared_assets_dir. + Existing local entries are never overwritten (preserves cutout, layer, filters, etc.). + """ + shared_videos_json = shared_assets_dir / "videos.json" + if not shared_videos_json.exists(): + return + + shared_videos = _read_json(shared_videos_json) + if not shared_videos: + return + + if config and config.videos_path: + local_json_path = project_path / config.videos_path + else: + local_json_path = project_path / "media" / "videos" / "videos.json" + + local_videos: dict = {} + if local_json_path.exists(): + local_videos = _read_json(local_json_path) + + _METADATA_FIELDS = ("duration", "has_audio") + + added = [] + metadata_updated = [] + for video_id, shared_entry in shared_videos.items(): + if video_id in local_videos: + # Propagate any metadata fields that were probed into shared_assets/videos.json + changed = False + for field in _METADATA_FIELDS: + if field in shared_entry and local_videos[video_id].get(field) != shared_entry[field]: + local_videos[video_id][field] = shared_entry[field] + changed = True + if changed: + metadata_updated.append(video_id) + elif verbose: + print(f" Shared '{video_id}': already in local videos.json, skipping") + continue + # New entry — copy from shared and mark it as shared + local_entry = dict(shared_entry) + local_entry["is_shared"] = True + local_videos[video_id] = local_entry + added.append(video_id) + + if added or metadata_updated: + local_json_path.parent.mkdir(parents=True, exist_ok=True) + with open(local_json_path, "w", encoding="utf-8") as f: + json.dump(local_videos, f, indent=4) + if added: + print(f" Synced {len(added)} shared asset(s) to local videos.json: {', '.join(added)}") + if metadata_updated: + print(f" Updated metadata for {len(metadata_updated)} shared asset(s): {', '.join(metadata_updated)}") + elif verbose: + print(" No new shared assets to sync to local videos.json") + + def _find_shared_assets(project_path: Path) -> Optional[Path]: """Find the shared_assets directory. @@ -481,13 +747,22 @@ def _import_videos(videos_dir: Path, config, verbose: bool) -> None: # Also exclude files in subdirectories (proxy/, intermediate/, etc.) video_files = [f for f in video_files if f.parent == videos_dir] + # Ensure videos.json exists even if there are no video files yet + videos_json_path = videos_dir / "videos.json" + if not videos_json_path.exists(): + videos_dir.mkdir(parents=True, exist_ok=True) + with open(videos_json_path, "w", encoding="utf-8") as f: + json.dump({}, f, indent=2) + print( + f" Created empty {videos_json_path.relative_to(videos_dir.parent.parent)}" + ) + if not video_files: if verbose: print(f" No new video files found in {videos_dir}") return - # Load existing videos.json if it exists - videos_json_path = videos_dir / "videos.json" + # Load existing videos.json existing_videos: dict = {} if videos_json_path.exists(): existing_videos = _read_json(videos_json_path) @@ -523,7 +798,7 @@ def _import_videos(videos_dir: Path, config, verbose: bool) -> None: print(f" Added talking head segment: {video_id}") else: # Regular video - + video_entry["output_file"] = video_file.name video_entry["cutout"] = "square" video_entry["filter"] = [] @@ -545,43 +820,81 @@ def _import_videos(videos_dir: Path, config, verbose: bool) -> None: def _import_narration_segments(narration_dir: Path, config, verbose: bool) -> None: """Import narration video files into narration.json. - Scans the narration directory for video files and adds them to narration.json. - Uses the filename (without extension) as the segment_id. - Does not overwrite existing entries - only adds new ones. + Folder structure: + media/narration/raw_mov/ ← raw recordings from iPhone/QuickTime + media/narration/compressed/ ← H.265 copies (transcode 1st pass) + media/narration/processed/ ← chroma-keyed output (preprocess) + media/narration/narration.json - If a 'talkinghead' filter preset exists in default_filters, it will be - applied automatically to all narration segments. + Scans processed/ for ready-to-stitch files and raw/ for any new raw + recordings not yet represented in narration.json. + + Priority: processed/ files define the segment catalogue. + Raw files discovered in raw/ add new entries pointing at raw/ with + output_file preset to processed/_processed.mov. """ video_extensions = {".mov", ".mp4", ".webm", ".avi", ".mkv", ".m4v"} - # Find all video files (exclude processed outputs and combined files) - video_files = [ - f - for f in narration_dir.iterdir() - if f.is_file() - and f.suffix.lower() in video_extensions - and "_processed" not in f.stem # Exclude any _processed files - and not f.name.startswith("narration_combined") - ] + processed_dir = narration_dir / "processed" + raw_dir = narration_dir / "raw_mov" + processed_dir.mkdir(parents=True, exist_ok=True) + raw_dir.mkdir(parents=True, exist_ok=True) - if not video_files: - if verbose: - print(f" No narration files found in {narration_dir}") - return - - # Load existing narration.json if it exists + # Load / create narration.json narration_json_path = narration_dir / "narration.json" existing_narration: dict = {} if narration_json_path.exists(): existing_narration = _read_json(narration_json_path) - # Get available filter presets from config default_filters = config.default_filters if config else {} - - # Add new segments (don't overwrite existing) added_count = 0 - for video_file in sorted(video_files): - # Use filename without extension as segment_id + + def _scan(directory: Path) -> list[Path]: + if not directory.exists(): + return [] + return sorted( + f + for f in directory.iterdir() + if f.is_file() + and f.suffix.lower() in video_extensions + and not f.name.startswith(".") + ) + + # 1. Scan processed/ — only add entries when NO raw_mov equivalent exists. + # If raw_mov has the source, step 2 will create the entry pointing there + # (with the filter chain), which is better for re-processing later. + _raw_video_exts = {".mov", ".mp4", ".avi", ".mkv", ".m4v"} + for video_file in _scan(processed_dir): + segment_id = video_file.stem + # Strip _processed suffix for cleaner segment IDs if present + if segment_id.endswith("_processed"): + segment_id = segment_id[:-10] + + if segment_id in existing_narration: + if verbose: + print(f" Skipping {segment_id} (already exists)") + continue + + # If a raw_mov equivalent exists, skip — step 2 will handle it + raw_mov_has_file = raw_dir.exists() and any( + (raw_dir / f"{segment_id}{ext}").exists() + for ext in _raw_video_exts + ) + if raw_mov_has_file: + continue + + narration_entry = { + "source_file": f"processed/{video_file.name}", + } + narration_entry["use_audio_channels"] = "auto" + narration_entry["defer_loudnorm"] = True + + existing_narration[segment_id] = narration_entry + added_count += 1 + print(f" Added narration segment: {segment_id} (from processed/)") + + # 2. Scan raw/ — add entries for raw files not yet in narration.json + for video_file in _scan(raw_dir): segment_id = video_file.stem if segment_id in existing_narration: @@ -589,34 +902,33 @@ def _import_narration_segments(narration_dir: Path, config, verbose: bool) -> No print(f" Skipping {segment_id} (already exists)") continue - # Build the narration entry narration_entry = { - "source_file": video_file.name, - "output_file": f"{video_file.stem}_processed.mov", + "source_file": f"raw_mov/{video_file.name}", + "output_file": f"processed/{video_file.stem}_processed.mov", } - # Apply talkinghead preset if available if "talkinghead" in default_filters: narration_entry["cutout"] = "talkinghead" - - if "talkinghead" in default_filters: narration_entry["filter"] = "talkinghead" - # Default audio settings for narration - narration_entry["use_audio_channels"] = "left" + narration_entry["use_audio_channels"] = "auto" narration_entry["defer_loudnorm"] = True existing_narration[segment_id] = narration_entry added_count += 1 - print(f" Added narration segment: {segment_id}") + print(f" Added narration segment: {segment_id} (from raw_mov)") + + # Always write narration.json (creates it if missing) + with open(narration_json_path, "w", encoding="utf-8") as f: + json.dump(existing_narration, f, indent=2) if added_count > 0: - # Write updated narration.json - with open(narration_json_path, "w", encoding="utf-8") as f: - json.dump(existing_narration, f, indent=2) print(f" Updated narration.json (+{added_count} segments)") else: - print(f" No new narration segments to add") + if not existing_narration: + print(f" narration.json created (empty — add files to processed/ or raw/)") + else: + print(f" No new narration segments to add") def _import_presenter_notes( @@ -697,6 +1009,83 @@ def _import_presenter_notes( print(f" {non_empty} slides have presenter notes") +# ============================================================================= +# Tasks File +# ============================================================================= + +_TASKS_VIDEO_PREFIXES = { + "video:": 6, + "vft:": 4, + "vfb:": 4, + "vst:": 4, + "vsb:": 4, + "narration:": 10, +} + + +def _collect_missing_video_markers( + markers: list[str], videos: dict +) -> list[tuple[str, str]]: + """Return (marker_text, video_id) for video markers not defined in videos.json.""" + missing = [] + seen = set() + for marker in markers: + matched = next((p for p in _TASKS_VIDEO_PREFIXES if marker.startswith(p)), None) + if matched is None: + continue + video_id = marker[_TASKS_VIDEO_PREFIXES[matched] :] + if video_id not in videos and video_id not in seen: + seen.add(video_id) + missing.append((marker, video_id)) + return missing + + +def _write_tasks_file( + project_path: Path, + missing_videos: list[tuple[str, str]], + alignment_issues: list[tuple[str, str]], +) -> None: + """Write tasks.md to project_path with missing assets and alignment issues.""" + tasks_path = project_path / "tasks.md" + today = datetime.now().strftime("%Y-%m-%d") + + lines = [ + f"# Tasks: {project_path.name}", + f"_Generated: {today}_", + "", + ] + + if missing_videos: + lines += [ + f"## Missing Video Assets ({len(missing_videos)})", + "Referenced in manuscript.txt but not defined in videos.json.", + "", + ] + for marker, video_id in missing_videos: + lines.append(f"- [ ] `{video_id}` — referenced as `[{marker}]`") + lines.append("") + + if alignment_issues: + lines += [ + f"## Slide Alignment Issues ({len(alignment_issues)})", + "Slide markers that could not be matched to the spoken narration (likely adlibbed).", + "", + ] + for marker_id, context in alignment_issues: + lines.append(f'- [ ] `{marker_id}` — _"{context}"_') + lines.append("") + + if not missing_videos and not alignment_issues: + lines += ["_No outstanding tasks._", ""] + + tasks_path.write_text("\n".join(lines), encoding="utf-8") + print( + f" Tasks written → tasks.md" + + (f" ({len(missing_videos)} missing videos)" if missing_videos else "") + + (f" ({len(alignment_issues)} alignment issues)" if alignment_issues else "") + ) + + # ============================================================================= # Validate Command # ============================================================================= @@ -736,6 +1125,10 @@ def cmd_validate(project_path: Path, verbose: bool) -> int: for w in warnings: print(f" Warning: {w}") + # Write tasks file (missing assets only — no alignment data at validate time) + missing_videos = _collect_missing_video_markers(markers, videos) + _write_tasks_file(project_path, missing_videos, alignment_issues=[]) + print("Validation passed.") return 0 @@ -753,31 +1146,30 @@ def cmd_preprocess( workers: int = 1, res: str = "full", ) -> int: - """Run preprocessing pipeline on narration segments and videos.""" + """Run preprocessing pipeline on narration segments and videos. + + Discovers source files directly from raw_mov/ (preferred) or raw_mp4/ + (fallback when raw_mov/ is empty). Does NOT require narration.json to + exist — it writes/updates narration.json after processing. + """ from concurrent.futures import ThreadPoolExecutor, as_completed - from .parser import parse_project_config, parse_narration, parse_videos - from .preprocessor import ( - preprocess_video, - create_downscaled_videos, - RES_CONFIGS, - ) + from .parser import parse_project_config, parse_videos + from .preprocessor import preprocess_video + from .models import VideoSource as _VideoSource mode_str = f" ({res.upper()})" if res != "full" else "" print(f"Preprocessing narration: {project_path.name}{mode_str}") config = parse_project_config(project_path) - narration, narration_dir = parse_narration(project_path, config) - if not narration: - print(" No narration segments found in media/narration/narration.json") - print(" Run 'gnommo -p import' first to populate narration.json") - return 1 + # Narration directory — always media/narration/ + narration_dir = project_path / "media" / "narration" + narration_dir.mkdir(parents=True, exist_ok=True) - # Downscale source files first if a preview res was requested - if res != "full": - narration_dir = create_downscaled_videos(narration_dir, narration, res, force, verbose) - cfg = RES_CONFIGS[res] - print(f" Working from {res} dir ({cfg[0]}x{cfg[1]}): {narration_dir}") + raw_dir = narration_dir / "raw_mov" + compressed_dir = narration_dir / "raw_mp4" + processed_dir = narration_dir / "processed" + processed_dir.mkdir(parents=True, exist_ok=True) # Resolve intermediate directory gnommo_scratch = None @@ -787,71 +1179,163 @@ def cmd_preprocess( gnommo_scratch = project_path / gnommo_scratch print(f" Using intermediate dir: {gnommo_scratch}") - # Filter segments that need preprocessing - segments_to_process = [] - for segment_id, segment_source in narration.items(): - if not segment_source.filter: - if verbose: - print(f" {segment_id}: No filters defined, skipping.") + # --- Filter pipeline --- + talkinghead_filter = (config.default_filters or {}).get("talkinghead", []) + if not talkinghead_filter: + print(" ERROR: No 'talkinghead' filter defined in project.json default_filters.") + print(" Add a 'talkinghead' entry under 'default_filters' in project.json.") + return 1 + + # --- Source discovery --- + _video_exts = {".mov", ".mp4", ".avi", ".mkv", ".m4v"} + + def _scan_dir(d: Path) -> list[Path]: + if not d.exists(): + return [] + return sorted( + f for f in d.iterdir() + if f.is_file() and f.suffix.lower() in _video_exts and not f.name.startswith(".") + ) + + raw_mov_files = _scan_dir(raw_dir) + raw_mp4_files = _scan_dir(compressed_dir) + + if raw_mov_files: + source_files = raw_mov_files + using_compressed = False + elif raw_mp4_files: + source_files = raw_mp4_files + using_compressed = True + print(" WARNING: raw_mov/ is empty — using compressed files from raw_mp4/ instead. Quality may be reduced.") + else: + print(f" No source files found in raw_mov/ or raw_mp4/.") + print(f" Place .mov recordings in {raw_dir}") + return 1 + + # --- Load existing narration.json to preserve per-segment settings --- + narration_json_path = narration_dir / "narration.json" + existing_narration: dict = {} + if narration_json_path.exists(): + existing_narration = _read_json(narration_json_path) + + # --- Build segments list --- + segments_to_process: list[tuple[str, _VideoSource]] = [] + skipped_count = 0 + + for source_file in source_files: + segment_id = source_file.stem + # Strip _compressed suffix (raw_mp4 naming convention) + if using_compressed and segment_id.endswith("_compressed"): + segment_id = segment_id[: -len("_compressed")] + + output_file = f"processed/{segment_id}_processed.mov" + output_path = narration_dir / output_file + + if output_path.exists() and not force: + print(f" {segment_id}: output exists, skipping (use --force to reprocess)") + skipped_count += 1 continue - segments_to_process.append((segment_id, segment_source)) + + # Filter: from existing narration.json entry (if explicitly set), else talkinghead + existing_entry = existing_narration.get(segment_id, {}) + raw_filter = existing_entry.get("filter") + if raw_filter: + if isinstance(raw_filter, str): + filter_list = (config.default_filters or {}).get(raw_filter, talkinghead_filter) + else: + filter_list = raw_filter + else: + filter_list = talkinghead_filter + + video_source = _VideoSource( + source_file=source_file, + filter=filter_list, + output_file=output_file, + use_audio_channels=existing_entry.get("use_audio_channels", "auto"), + defer_loudnorm=existing_entry.get("defer_loudnorm", True), + ) + segments_to_process.append((segment_id, video_source)) if not segments_to_process: - print("\nNo narration segments to preprocess.") + if skipped_count: + print(f"\n All {skipped_count} segment(s) already preprocessed. Use --force to reprocess.") + else: + print("\n No segments to preprocess.") return 0 if dry_run: for segment_id, segment_source in segments_to_process: print(f"\n Would preprocess: {segment_id}") print(f" Source: {segment_source.source_file}") - print(f" Output: {segment_source.output_file or 'N/A'}") - for step in segment_source.filter: - print(f" - {step}") + print(f" Output: {segment_source.output_file}") + print(f" Filters: {len(segment_source.filter)} step(s)") return 0 - # Process segments + # --- Process segments --- + successfully_processed: list[tuple[str, _VideoSource]] = [] + if workers > 1 and len(segments_to_process) > 1: - # Parallel processing num_workers = min(workers, len(segments_to_process)) - print( - f"\n Processing {len(segments_to_process)} segments in parallel ({num_workers} workers)" - ) + print(f"\n Processing {len(segments_to_process)} segments in parallel ({num_workers} workers)") def process_segment_task(task): - segment_id, segment_source = task + seg_id, seg_source = task preprocess_video( - narration_dir, - segment_id, - segment_source, - verbose=False, - force=force, - custom_gnommo_scratch=gnommo_scratch, + narration_dir, seg_id, seg_source, + verbose=False, force=force, custom_gnommo_scratch=gnommo_scratch, ) - return segment_id + return task completed = 0 with ThreadPoolExecutor(max_workers=num_workers) as executor: - futures = { - executor.submit(process_segment_task, task): task - for task in segments_to_process - } + futures = {executor.submit(process_segment_task, t): t for t in segments_to_process} for future in as_completed(futures): - segment_id = future.result() + seg_id, seg_source = future.result() completed += 1 - print(f" Completed: {segment_id} ({completed}/{len(segments_to_process)})") + print(f" Completed: {seg_id} ({completed}/{len(segments_to_process)})") + output_path = narration_dir / seg_source.output_file + if output_path.exists(): + successfully_processed.append((seg_id, seg_source)) else: - # Sequential processing for segment_id, segment_source in segments_to_process: print(f"\n Processing: {segment_id}") - print(f" Source file: {segment_source.source_file}") - print(f" Output file: {segment_source.output_file or 'N/A'}") + print(f" Source: {segment_source.source_file}") + print(f" Output: {segment_source.output_file}") print(f" Filters: {len(segment_source.filter)} step(s)") - preprocess_video( - narration_dir, segment_id, segment_source, verbose, force, gnommo_scratch + narration_dir, segment_id, segment_source, + verbose, force, gnommo_scratch, ) + output_path = narration_dir / segment_source.output_file + if output_path.exists(): + successfully_processed.append((segment_id, segment_source)) - print(f"\n Run 'gnommo -p stitch' to stitch narration segments into one fulll length narration file.") + # --- Update narration.json --- + # Write processed segments; preserve any existing per-segment settings (skip/take/etc.) + _PRESERVE_KEYS = ("skip", "take", "begin", "end", "cutout", "use_audio_channels", + "defer_loudnorm", "volume", "zoom") + for segment_id, segment_source in successfully_processed: + existing_entry = existing_narration.get(segment_id, {}) + entry: dict = {} + # Preserve settings the user may have set (trim points, cutout, etc.) + for key in _PRESERVE_KEYS: + if key in existing_entry: + entry[key] = existing_entry[key] + # Point source_file to the processed output + entry["source_file"] = segment_source.output_file + entry.setdefault("use_audio_channels", "auto") + entry.setdefault("defer_loudnorm", True) + existing_narration[segment_id] = entry + + with open(narration_json_path, "w", encoding="utf-8") as f: + json.dump(existing_narration, f, indent=2) + + if successfully_processed: + print(f"\n Updated narration.json ({len(successfully_processed)} segment(s))") + + print( + f"\n Run 'gnommo -p stitch' to stitch narration segments into one full length narration file." + ) # Also preprocess videos from videos.json (e.g. chroma key, color grade) videos, videos_dir = parse_videos(project_path, config) @@ -867,13 +1351,19 @@ def cmd_preprocess( if video_source.output_file: output_path = videos_dir / video_source.output_file if output_path.exists() and not force: - print(f" {video_id}: output exists, skipping (use --force to reprocess)") + print( + f" {video_id}: output exists, skipping (use --force to reprocess)" + ) continue if dry_run: - print(f" Would preprocess: {video_id} ({len(video_source.filter)} filter(s))") + print( + f" Would preprocess: {video_id} ({len(video_source.filter)} filter(s))" + ) continue print(f" Processing: {video_id}") - preprocess_video(videos_dir, video_id, video_source, verbose, force, gnommo_scratch) + preprocess_video( + videos_dir, video_id, video_source, verbose, force, gnommo_scratch + ) print("\nPreprocessing complete.") return 0 @@ -938,7 +1428,9 @@ def cmd_trim( continue print(f" {seg_id}: analysing...", end="", flush=True) - first_sound, last_sound = detect_silence_bounds(source_path, noise_threshold_db=threshold_db, verbose=verbose) + first_sound, last_sound = detect_silence_bounds( + source_path, noise_threshold_db=threshold_db, verbose=verbose + ) total_dur = get_video_duration(source_path) new_skip = max(0.0, round(first_sound - 0.5, 3)) @@ -964,6 +1456,368 @@ def cmd_trim( return 0 +# ============================================================================= +# Transcode Command — compress narration folder to H.265 +# ============================================================================= + + +def _get_video_codec(path: Path) -> str: + """Return the codec name of the first video stream (e.g. 'hevc', 'prores', 'h264').""" + result = subprocess.run( + [ + "ffprobe", + "-v", + "error", + "-select_streams", + "v:0", + "-show_entries", + "stream=codec_name", + "-of", + "default=noprint_wrappers=1:nokey=1", + str(path), + ], + capture_output=True, + text=True, + ) + return result.stdout.strip().lower() + + +def _transcode_processed_files( + project_path: Path, + verbose: bool, + dry_run: bool, + replace: bool, + force: bool, + alpha_quality: float, +) -> int: + """ + Compress _processed.mov files (ProRes 4444 + alpha) to HEVC+alpha via + Apple VideoToolbox. + + For each _processed.mov: + 1. Transcode to a temp file using hevc_videotoolbox with alpha. + 2. Move the ProRes original into a prores/ subdirectory (never deleted). + 3. Rename the compressed file to the original _processed.mov name + so stitch/render find it unchanged. + + The prores/ subdirectory is never scanned — only top-level files are candidates. + If prores/ already exists the file has already been compressed — + skip unless --force. + """ + from .parser import parse_project_config, parse_narration + + print(f"Transcoding processed files (HEVC+alpha): {project_path.name}") + + config = parse_project_config(project_path) + + # Resolve narration_dir and videos_dir — processed files live in both + _narration, narration_dir = parse_narration(project_path, config) + videos_json_path = project_path / config.videos_path + videos_dir = videos_json_path.parent + + # Glob both directories for *_processed.mov; skip any _prores.mov archives + search_dirs = [d for d in [narration_dir, videos_dir] if d.exists()] + candidates: list[Path] = [] + seen: set[Path] = set() + for d in search_dirs: + for p in d.glob("*_processed.mov"): + if p not in seen and "_prores" not in p.stem: + seen.add(p) + candidates.append(p) + + if not candidates: + print(" No _processed.mov files found.") + return 0 + + # Smallest first + candidates = [c for c in candidates if c.exists()] + candidates.sort(key=lambda f: f.stat().st_size) + + total_original = 0 + total_compressed = 0 + transcoded = 0 + skipped = 0 + + for src in candidates: + # Archive goes into prores/ subdirectory alongside the source file + prores_dir = src.parent / "prores" + archive = prores_dir / src.name + + # Always skip files already encoded as HEVC — regardless of --replace or --force + if _get_video_codec(src) == "hevc": + print(f" {src.name}: already HEVC, skipping") + skipped += 1 + continue + + # Without --replace, skip if the archive already exists in prores/ + if not replace and archive.exists() and not force: + size_mb = src.stat().st_size / 1_048_576 + print( + f" {src.name}: already compressed ({size_mb:.1f} MB), skipping (use --force to redo)" + ) + skipped += 1 + continue + + src_mb = src.stat().st_size / 1_048_576 + print(f" {src.name} ({src_mb:.1f} MB) → HEVC+alpha", end="") + + if dry_run: + print(" [dry-run]") + continue + + print(" ...", end="", flush=True) + + tmp_out = src.with_name(src.stem + "_hevc_tmp.mov") + + cmd = [ + "ffmpeg", + "-i", + str(src), + "-c:v", + "hevc_videotoolbox", + "-allow_sw", + "1", + "-alpha_quality", + str(alpha_quality), + "-tag:v", + "hvc1", + "-c:a", + "copy", + "-y", + str(tmp_out), + ] + + if verbose: + print() + print(" " + " ".join(cmd)) + + result = subprocess.run( + cmd, + capture_output=not verbose, + text=True, + ) + + if result.returncode != 0: + print(f"\n ERROR transcoding {src.name}") + if tmp_out.exists(): + tmp_out.unlink() + if not verbose and result.stderr: + last_lines = result.stderr.strip().splitlines()[-5:] + for line in last_lines: + print(f" {line}", file=sys.stderr) + continue + + out_mb = tmp_out.stat().st_size / 1_048_576 + ratio = (1.0 - tmp_out.stat().st_size / src.stat().st_size) * 100 + + if replace: + # Delete ProRes original, move compressed into its place + src.unlink() + tmp_out.rename(src) + print( + f"\r {src.name} ({src_mb:.1f} MB) → HEVC+alpha" + f" ({out_mb:.1f} MB, -{ratio:.0f}%)" + ) + else: + # Move ProRes original into prores/ subdirectory, compressed takes its place + prores_dir.mkdir(exist_ok=True) + src.rename(archive) + tmp_out.rename(src) + print( + f"\r {src.name} ({src_mb:.1f} MB) → HEVC+alpha" + f" ({out_mb:.1f} MB, -{ratio:.0f}%)" + f" [ProRes → prores/{archive.name}]" + ) + + total_original += int(src_mb * 1_048_576) + total_compressed += int(out_mb * 1_048_576) + transcoded += 1 + + print() + if dry_run: + print(f" [dry-run] Would compress {len(candidates) - skipped} file(s)") + return 0 + + if transcoded > 0: + orig_mb = total_original / 1_048_576 + comp_mb = total_compressed / 1_048_576 + saved_mb = orig_mb - comp_mb + ratio = (saved_mb / orig_mb * 100) if orig_mb else 0 + print( + f" Compressed {transcoded} file(s): {orig_mb:.1f} MB → {comp_mb:.1f} MB" + f" (saved {saved_mb:.1f} MB, -{ratio:.0f}%)" + ) + if skipped: + print(f" Skipped {skipped} already-compressed file(s)") + + return 0 + + +def cmd_transcode( + project_path: Path, + verbose: bool, + dry_run: bool = False, + replace: bool = False, + crf: int = 23, + force: bool = False, + processed: bool = False, + alpha_quality: float = 0.75, +) -> int: + """ + Transcode project video files to save disk space. + + Default (1st pass, before preprocess): + Compress raw narration recordings to H.265. Output: {stem}_compressed.mp4. + Skips files with '_compressed.' or '_processed.' in the name. + Use --replace to delete originals after success. + + With --processed (2nd pass, after preprocess): + Compress _processed.mov files (ProRes 4444 + alpha) to HEVC+alpha. + Archives the ProRes original as _prores.mov (never deleted). + The compressed file takes the original _processed.mov name so the + rest of the pipeline (stitch, render) finds it unchanged. + Uses Apple VideoToolbox (hevc_videotoolbox) with --alpha-quality. + """ + if processed: + return _transcode_processed_files( + project_path, verbose, dry_run, replace, force, alpha_quality + ) + + from .parser import parse_project_config, parse_narration + + print(f"Transcoding narration: {project_path.name}") + + config = parse_project_config(project_path) + _narration, narration_dir = parse_narration(project_path, config) + + raw_dir = narration_dir / "raw_mov" + compressed_dir = narration_dir / "raw_mp4" + + if not raw_dir.exists(): + print(f" raw/ directory not found: {raw_dir}", file=sys.stderr) + print(f" Place raw recordings in {raw_dir} and run 'import' first.") + return 1 + + compressed_dir.mkdir(parents=True, exist_ok=True) + + # Collect eligible video files from raw/ only + video_extensions = {".mp4", ".mov", ".avi", ".mkv", ".m4v", ".mts", ".webm"} + + candidates = [ + f + for f in raw_dir.iterdir() + if f.is_file() + and f.suffix.lower() in video_extensions + and not f.name.startswith(".") + ] + + if not candidates: + print(f" No video files found in {raw_dir}.") + return 0 + + # Process smallest files first + candidates.sort(key=lambda f: f.stat().st_size) + + total_original = 0 + total_compressed = 0 + transcoded = 0 + skipped = 0 + + for src in candidates: + # Output: compressed/.mp4 (clean name, no _compressed suffix) + output = compressed_dir / f"{src.stem}.mp4" + + if output.exists() and not force: + size_mb = output.stat().st_size / 1_048_576 + print( + f" {src.name}: already transcoded ({size_mb:.1f} MB), skipping (use --force to redo)" + ) + skipped += 1 + continue + + src_mb = src.stat().st_size / 1_048_576 + print( + f" raw/{src.name} ({src_mb:.1f} MB) → compressed/{output.name}", end="" + ) + + if dry_run: + print(" [dry-run]") + continue + + print(" ...", end="", flush=True) + + cmd = [ + "ffmpeg", + "-i", + str(src), + "-vf", + "scale=-2:1080", + "-c:v", + "libx265", + "-crf", + str(crf), + "-preset", + "medium", + "-c:a", + "aac", + "-b:a", + "128k", + "-tag:v", + "hvc1", + "-y", + str(output), + ] + + if verbose: + print() + print(" " + " ".join(cmd)) + + result = subprocess.run( + cmd, + capture_output=not verbose, + text=True, + ) + + if result.returncode != 0: + print(f"\n ERROR transcoding {src.name}") + if not verbose and result.stderr: + # Print last few lines of ffmpeg stderr for diagnosis + last_lines = result.stderr.strip().splitlines()[-5:] + for line in last_lines: + print(f" {line}", file=sys.stderr) + continue + + out_mb = output.stat().st_size / 1_048_576 + ratio = (1.0 - output.stat().st_size / src.stat().st_size) * 100 + print( + f"\r raw/{src.name} ({src_mb:.1f} MB) → compressed/{output.name} ({out_mb:.1f} MB, -{ratio:.0f}%)" + ) + + total_original += src.stat().st_size + total_compressed += output.stat().st_size + transcoded += 1 + + print() + if dry_run: + print(f" [dry-run] Would transcode {len(candidates) - skipped} file(s)") + return 0 + + if transcoded > 0: + orig_mb = total_original / 1_048_576 + comp_mb = total_compressed / 1_048_576 + saved_mb = orig_mb - comp_mb + ratio = (saved_mb / orig_mb * 100) if orig_mb else 0 + print( + f" Transcoded {transcoded} file(s): {orig_mb:.1f} MB → {comp_mb:.1f} MB (saved {saved_mb:.1f} MB, -{ratio:.0f}%)" + ) + if replace: + print(f" Originals deleted.") + if skipped: + print(f" Skipped {skipped} already-transcoded file(s)") + + return 0 + + # ============================================================================= # Stitch Command (fast iteration on narration segments) # ============================================================================= @@ -983,7 +1837,11 @@ def cmd_stitch( Also creates/updates an entry in videos.json with volume property. """ from .parser import parse_project_config, parse_narration, parse_videos - from .preprocessor import stitch_narration_segments, ensure_downscaled_files_exist, RES_CONFIGS + from .preprocessor import ( + stitch_narration_segments, + ensure_downscaled_files_exist, + RES_CONFIGS, + ) mode_str = f" ({res.upper()})" if res != "full" else "" print(f"Stitching narration: {project_path.name}{mode_str}") @@ -1006,7 +1864,9 @@ def cmd_stitch( # Use downscaled dirs for non-full res if res != "full": cfg = RES_CONFIGS[res] - narration_dir = ensure_downscaled_files_exist(narration_dir, res, force=False, verbose=verbose) + narration_dir = ensure_downscaled_files_exist( + narration_dir, res, force=False, verbose=verbose + ) videos_dir = videos_dir / cfg[2] videos_dir.mkdir(parents=True, exist_ok=True) print(f" Using {res} dirs: {narration_dir}, {videos_dir}") @@ -1052,7 +1912,9 @@ def cmd_stitch( # Get cutout from first narration segment first_seg = narration[segment_ids[0]] - cutout = first_seg.cutout or "talkinghead" # Default to audioonly if no cutout specified + cutout = ( + first_seg.cutout or "talkinghead" + ) # Create/update narration_combined entry existing_videos["narration_combined"] = { @@ -1121,9 +1983,21 @@ def _print_render_plan_details(plan, marker_timings, slides: dict) -> None: camera_events_by_time[t] = [] camera_events_by_time[t].append(event) + # Detect slide markers that share a timestamp with the adjacent slide marker. + # Two slides at the same time means alignment is ambiguous — treat as an error. + slide_timings = [ + t for t in marker_timings if t.marker_id in slides and t.timestamp >= 0 + ] + collision_ids: set[str] = set() + for a, b in zip(slide_timings, slide_timings[1:]): + if abs(a.timestamp - b.timestamp) < 0.1: + collision_ids.add(a.marker_id) + collision_ids.add(b.marker_id) + # Print each marker timing aligned_count = 0 unaligned_count = 0 + collision_count = 0 for timing in marker_timings: marker_id = timing.marker_id @@ -1132,7 +2006,6 @@ def _print_render_plan_details(plan, marker_timings, slides: dict) -> None: context = context[:47] + "..." if timing.timestamp >= 0: - aligned_count += 1 time_str = _format_time(timing.timestamp) # Show confidence if fuzzy match @@ -1142,28 +2015,54 @@ def _print_render_plan_details(plan, marker_timings, slides: dict) -> None: # Determine marker type for display if marker_id in slides: - print(f' {marker_id:6} {time_str}{conf_str} "{context}"') - elif marker_id.startswith("video:"): - video_id = marker_id[6:] + if marker_id in collision_ids: + collision_count += 1 + print( + f' {marker_id:6} {time_str}{conf_str} COLLISION - same time as adjacent slide - "{context}"' + ) + else: + aligned_count += 1 + print(f' {marker_id:6} {time_str}{conf_str} "{context}"') + elif any( + marker_id.startswith(p) + for p in ("video:", "vft:", "vfb:", "vst:", "vsb:", "vft:", "vfbp:", "vstp:", "vsbp:") + ): + aligned_count += 1 + pfx_len = next( + len(p) + for p in ("video:", "vft:", "vfb:", "vst:", "vsb:", "vft:", "vfbp:", "vstp:", "vsbp:") + if marker_id.startswith(p) + ) + video_id = marker_id[pfx_len:] # Find corresponding event by video_id event = video_events_by_id.get(video_id) if event: - cutout = event.video_source.cutout - duration = event.end_time - event.start_time + cutout_name = event.cutout_name + end_on = event.video_source.end_on or "next_slide" + layer_tag = f" [{event.layer}]" else: - cutout = "?" - duration = 0 + cutout_name = "?" + end_on = "next_slide" + layer_tag = "" + + cache_ind = " 📁" if video_id in plan.cached_files else "" - print(f" {marker_id:20} {time_str} in '{cutout}' ({duration:.1f}s){cache_ind}") + print( + f" {marker_id:20} {time_str} in '{cutout_name}' [{end_on}]{layer_tag}{cache_ind}" + ) elif marker_id.startswith("narration:"): + aligned_count += 1 video_id = marker_id[10:] cache_ind = " 📁" if video_id in plan.cached_files else "" print(f" {marker_id:20} {time_str} (continuous){cache_ind}") elif marker_id in CAMERA_PRESETS: + aligned_count += 1 print(f" {time_str} [{marker_id}]") - elif marker_id.startswith("A"): + elif marker_id.startswith("audio:"): + aligned_count += 1 print(f" {time_str} [audio:{marker_id[1:]}]") else: + aligned_count += 1 print(f' {marker_id:6} {time_str} "{context}"') else: unaligned_count += 1 @@ -1174,12 +2073,23 @@ def _print_render_plan_details(plan, marker_timings, slides: dict) -> None: # Summary total_markers = len(marker_timings) slide_markers = [t for t in marker_timings if t.marker_id in slides] - aligned_slides = len([t for t in slide_markers if t.timestamp >= 0]) + good_slides = len( + [ + t + for t in slide_markers + if t.timestamp >= 0 and t.marker_id not in collision_ids + ] + ) total_slides = len(slide_markers) - status = "OK" if unaligned_count == 0 else f"{unaligned_count} UNALIGNED" + issues = [] + if unaligned_count: + issues.append(f"{unaligned_count} UNALIGNED") + if collision_count: + issues.append(f"{collision_count} COLLISION") + status = "OK" if not issues else ", ".join(issues) print(f" Markers: {aligned_count}/{total_markers} aligned ({status})") - print(f" Slides: {aligned_slides}/{total_slides}") + print(f" Slides: {good_slides}/{total_slides}") print( f" Videos: {len(plan.video_events)} triggered, {len(plan.narration_videos)} always-visible" ) @@ -1211,6 +2121,61 @@ def _parse_slide_range(slides_arg: str) -> tuple[str, Optional[str]]: return start_slide, end_slide +def _writeback_video_metadata(plan, project_path, config) -> None: + """Write back cutout/layer derived from shorthand markers to videos.json. + + When a shorthand like [vfb:FARTSection1] is used and FARTSection1 has no + 'cutout' set in videos.json, this persists the resolved cutout (and layer if + the shorthand implies a non-default layer) back to the file. Once written, + subsequent renders read the value directly and no further write-back occurs. + """ + import json + + videos_json_path = project_path / config.videos_path + if not videos_json_path.exists(): + return + + # Collect field updates per video_id + writebacks: dict[str, dict] = {} + for event in plan.video_events: + video_id = event.video_id + source = event.video_source + if source.is_shared: + continue # shared videos live in their own file + + updates = {} + if source.cutout is None and event.cutout_name: + updates["cutout"] = event.cutout_name + if event.layer != source.layer: + updates["layer"] = event.layer + + if updates: + writebacks.setdefault(video_id, {}).update(updates) + + if not writebacks: + return + + with open(videos_json_path, "r", encoding="utf-8") as f: + raw = json.load(f) + + changed = False + for video_id, updates in writebacks.items(): + if video_id not in raw: + continue + for field, value in updates.items(): + if raw[video_id].get(field) != value: + raw[video_id][field] = value + changed = True + + if changed: + with open(videos_json_path, "w", encoding="utf-8") as f: + json.dump(raw, f, indent=2, ensure_ascii=False) + written = ", ".join( + f"{vid}({', '.join(upd)})" for vid, upd in writebacks.items() + ) + print(f" Updated videos.json: {written}") + + def cmd_render( project_path: Path, verbose: bool, @@ -1275,7 +2240,13 @@ def cmd_render( # Skip downscaling sources that have a preprocessed output_file — the # renderer will use the full-res processed version instead, saving disk space. sources_with_output = {v.source_file for v in videos.values() if v.output_file} - videos_dir = ensure_downscaled_files_exist(videos_dir, res, force=False, verbose=verbose, skip_sources=sources_with_output) + videos_dir = ensure_downscaled_files_exist( + videos_dir, + res, + force=False, + verbose=verbose, + skip_sources=sources_with_output, + ) if verbose: print(f" Using {res} dir: {videos_dir}") audio, audio_dir = parse_audio(project_path, config) @@ -1291,7 +2262,9 @@ def cmd_render( transcript_path = resolved_combined.with_suffix(".transcript.json") config.main_video = "narration_combined" if verbose: - print(f" Using combined narration: {resolved_combined.name} (volume={videos['narration_combined'].volume})") + print( + f" Using combined narration: {resolved_combined.name} (volume={videos['narration_combined'].volume})" + ) elif isinstance(config.main_video, list) and len(config.main_video) > 1: # Legacy: Multi-segment narration with main_video array in project.json resolved_combined, _ = resolve_with_cache(combined_path, project_path) @@ -1396,6 +2369,9 @@ def cmd_render( if plan.time_offset > 0: print(f" Time offset: {plan.time_offset:.1f}s (partial render)") + # Persist shorthand-derived cutout/layer back to videos.json (idempotent) + _writeback_video_metadata(plan, project_path, config) + # Print detailed render plan with alignment info _print_render_plan_details(plan, marker_timings, slides) if plan.audio_events: @@ -1425,6 +2401,24 @@ def cmd_render( f"for {pause.duration:.1f}s (narration freezes at {_format_time(pause.narration_time)})" ) + # Write tasks file with both missing assets and alignment issues + missing_videos = _collect_missing_video_markers(markers, videos) + slide_timings_for_collision = [ + t for t in marker_timings if t.marker_id in slides and t.timestamp >= 0 + ] + collision_ids_render = set() + for _a, _b in zip(slide_timings_for_collision, slide_timings_for_collision[1:]): + if abs(_a.timestamp - _b.timestamp) < 0.1: + collision_ids_render.add(_a.marker_id) + collision_ids_render.add(_b.marker_id) + alignment_issues = [ + (t.marker_id, t.context) + for t in marker_timings + if t.marker_id in slides + and (t.timestamp < 0 or t.marker_id in collision_ids_render) + ] + _write_tasks_file(project_path, missing_videos, alignment_issues) + # Check for unaligned markers unaligned = [t for t in marker_timings if t.timestamp < 0] if unaligned: @@ -1505,8 +2499,9 @@ def cmd_transcribe( from .transcriber import transcribe_video, save_transcript, words_to_srt from .parser import parse_project_config, parse_videos from .preprocessor import ensure_downscaled_files_exist + config = parse_project_config(project_path) - + # Handle --final mode: transcribe the rendered output for YouTube captions if final: path = project_path / "out" / f"{config.output_video}.mp4" @@ -1515,7 +2510,6 @@ def cmd_transcribe( mode_str = f" ({res.upper()})" if res != "full" else "" print(f"Transcribing: {project_path.name}{mode_str}") - videos, videos_dir = parse_videos(project_path, config) if not videos: print("Error: No videos defined in videos.json", file=sys.stderr) @@ -1523,7 +2517,9 @@ def cmd_transcribe( # Non-full res: use downscaled video directory if res != "full": - videos_dir = ensure_downscaled_files_exist(videos_dir, res, force=False, verbose=verbose) + videos_dir = ensure_downscaled_files_exist( + videos_dir, res, force=False, verbose=verbose + ) # Check for multi-segment narration (concatenated file) if isinstance(config.main_video, list) and len(config.main_video) > 1: @@ -1578,7 +2574,6 @@ def _transcribe_final(final_video: Path, verbose: bool) -> int: print(f"Transcribing final output: {final_video}") - if not final_video.exists(): print(f"Error: Final video not found: {final_video}", file=sys.stderr) print("Run 'gnommo render' first.", file=sys.stderr) @@ -1722,6 +2717,17 @@ def cmd_align(project_path: Path, verbose: bool) -> int: # ============================================================================= +def _files_modified_since(root: Path, since: float, pattern: str) -> bool: + """Return True if any file matching pattern under root has mtime > since.""" + try: + for p in root.rglob(pattern): + if p.is_file() and p.stat().st_mtime > since: + return True + except (OSError, PermissionError): + pass + return False + + def cmd_all( project_path: Path, verbose: bool, @@ -1729,32 +2735,86 @@ def cmd_all( res: str = "full", force: bool = False, ) -> int: - """Run full pipeline: preprocess → stitch → render → handoff.""" + """Run full pipeline: import → transcode → preprocess → transcode --processed → trim → stitch → render → handoff. + + Cascade rule: if any stage produces output, all subsequent stages are forced + to re-run (cascade_force=True), regardless of whether --force was passed. + This ensures downstream caches are always consistent with upstream changes. + """ from .handoff import cmd_handoff print(f"=== Full Pipeline: {project_path.name} ===\n") - print(">>> Step 1/5: Import\n") - result = cmd_import(project_path, force, verbose) + # cascade_force starts at --force. Once any stage does real work it flips to + # True so all downstream stages re-run unconditionally. + cascade_force = force + + print(">>> Step 1/8: Import\n") + result = cmd_import(project_path, cascade_force, verbose) if result != 0: return result - print("\n>>> Step 2/5: Preprocess\n") - result = cmd_preprocess(project_path, verbose, dry_run, force, workers=1, res=res) + print("\n>>> Step 2/8: Transcode narration (H.265)\n") + t0 = time.time() + result = cmd_transcode( + project_path, verbose, dry_run, replace=False, crf=23, force=cascade_force + ) + if result != 0: + return result + # Step 2 does not cascade: preprocess already checks its own output existence. + # A broad *_compressed.mp4 pattern would falsely match pre-existing raw_mp4/ sources. + + print("\n>>> Step 3/8: Preprocess\n") + t0 = time.time() + result = cmd_preprocess(project_path, verbose, dry_run, cascade_force, workers=1, res=res) + if result != 0: + return result + if ( + _files_modified_since(project_path, t0, "*_processed.mov") + or _files_modified_since(project_path, t0, "*_processed.webm") + ): + cascade_force = True + + print("\n>>> Step 4/8: Transcode processed (HEVC+alpha)\n") + t0 = time.time() + result = cmd_transcode( + project_path, + verbose, + dry_run, + replace=False, + crf=23, + force=cascade_force, + processed=True, + alpha_quality=1.0, + ) + if result != 0: + return result + if _files_modified_since(project_path, t0, "*_processed.mov"): + cascade_force = True + + print("\n>>> Step 5/8: Trim\n") + t0 = time.time() + result = cmd_trim(project_path, verbose, force=cascade_force, threshold_db=-40.0) + if result != 0: + return result + # Trim modifies narration.json skip/take values; any change invalidates stitch + if _files_modified_since(project_path, t0, "narration.json"): + cascade_force = True + + print("\n>>> Step 6/8: Stitch\n") + t0 = time.time() + result = cmd_stitch(project_path, verbose, cascade_force, res=res) + if result != 0: + return result + if _files_modified_since(project_path, t0, "narration_combined.mov"): + cascade_force = True + + print("\n>>> Step 7/8: Render\n") + result = cmd_render(project_path, verbose, dry_run, res=res, force=cascade_force) if result != 0: return result - print("\n>>> Step 3/5: Stitch\n") - result = cmd_stitch(project_path, verbose, force, res=res) - if result != 0: - return result - - print("\n>>> Step 4/5: Render\n") - result = cmd_render(project_path, verbose, dry_run, res=res, force=force) - if result != 0: - return result - - print("\n>>> Step 5/5: Handoff\n") + print("\n>>> Step 8/8: Handoff\n") return cmd_handoff(project_path, verbose, file_override=None, prod=False, res=res) @@ -1939,7 +2999,9 @@ def cmd_archive(project_path: Path, verbose: bool, dry_run: bool) -> int: json.dumps(data, indent=2, ensure_ascii=False) + "\n", encoding="utf-8", ) - print(f"\n Updated project.json with synced_time: {data['synced_time']}") + print( + f"\n Updated project.json with synced_time: {data['synced_time']}" + ) except (json.JSONDecodeError, IOError) as e: print(f"Warning: Could not update project.json: {e}") @@ -1987,7 +3049,8 @@ def _extract_audio_file( cmd = [ "ffmpeg", "-y", # Overwrite - "-i", str(source_path), + "-i", + str(source_path), "-vn", # No video ] @@ -1999,11 +3062,15 @@ def _extract_audio_file( # "both" keeps stereo, no filter needed # Output format: 48kHz 16-bit WAV (standard for audio editing) - cmd.extend([ - "-ar", "48000", # 48kHz sample rate - "-acodec", "pcm_s16le", # 16-bit PCM - str(output_path), - ]) + cmd.extend( + [ + "-ar", + "48000", # 48kHz sample rate + "-acodec", + "pcm_s16le", # 16-bit PCM + str(output_path), + ] + ) if verbose: print(f" Command: {' '.join(cmd)}") @@ -2017,9 +3084,13 @@ def _extract_audio_file( # Get duration info duration_cmd = [ - "ffprobe", "-v", "error", - "-show_entries", "format=duration", - "-of", "default=noprint_wrappers=1:nokey=1", + "ffprobe", + "-v", + "error", + "-show_entries", + "format=duration", + "-of", + "default=noprint_wrappers=1:nokey=1", str(output_path), ] duration_result = subprocess.run(duration_cmd, capture_output=True, text=True) @@ -2038,7 +3109,9 @@ def _extract_audio_file( print(f" - Effect > Compressor") print(f" - Effect > Filter Curve EQ") print(f" - Effect > Loudness Normalization") - print(f"\n Once you find good settings, update narration.json with matching filter config.") + print( + f"\n Once you find good settings, update narration.json with matching filter config." + ) return 0 @@ -2075,7 +3148,10 @@ def cmd_extract_audio( combined_path = videos_dir / "narration_combined.mov" if not combined_path.exists(): - print(f"Error: narration_combined.mov not found at {combined_path}", file=sys.stderr) + print( + f"Error: narration_combined.mov not found at {combined_path}", + file=sys.stderr, + ) print("Run 'gnommo -p stitch' first.", file=sys.stderr) return 1 @@ -2102,8 +3178,14 @@ def cmd_extract_audio( # Determine which segments to process if segment: if segment not in narration: - print(f"Error: Segment '{segment}' not found in narration.json", file=sys.stderr) - print(f"Available segments: {', '.join(sorted(narration.keys()))}", file=sys.stderr) + print( + f"Error: Segment '{segment}' not found in narration.json", + file=sys.stderr, + ) + print( + f"Available segments: {', '.join(sorted(narration.keys()))}", + file=sys.stderr, + ) return 1 segments_to_process = [(segment, narration[segment])] else: @@ -2135,7 +3217,8 @@ def cmd_extract_audio( cmd = [ "ffmpeg", "-y", # Overwrite - "-i", str(source_path), + "-i", + str(source_path), "-vn", # No video ] @@ -2147,11 +3230,15 @@ def cmd_extract_audio( # "both" keeps stereo, no filter needed # Output format: 48kHz 16-bit WAV (standard for audio editing) - cmd.extend([ - "-ar", "48000", # 48kHz sample rate - "-acodec", "pcm_s16le", # 16-bit PCM - str(output_path), - ]) + cmd.extend( + [ + "-ar", + "48000", # 48kHz sample rate + "-acodec", + "pcm_s16le", # 16-bit PCM + str(output_path), + ] + ) if verbose: print(f" Command: {' '.join(cmd)}") @@ -2163,9 +3250,13 @@ def cmd_extract_audio( # Get duration info duration_cmd = [ - "ffprobe", "-v", "error", - "-show_entries", "format=duration", - "-of", "default=noprint_wrappers=1:nokey=1", + "ffprobe", + "-v", + "error", + "-show_entries", + "format=duration", + "-of", + "default=noprint_wrappers=1:nokey=1", str(output_path), ] duration_result = subprocess.run(duration_cmd, capture_output=True, text=True) @@ -2184,7 +3275,9 @@ def cmd_extract_audio( print(f" - Effect > Compressor") print(f" - Effect > Filter Curve EQ") print(f" - Effect > Loudness Normalization") - print(f"\n Once you find good settings, update narration.json with matching filter config.") + print( + f"\n Once you find good settings, update narration.json with matching filter config." + ) return 0 @@ -2219,7 +3312,10 @@ def cmd_master( # Find narration_combined.mov combined_path = videos_dir / "narration_combined.mov" if not combined_path.exists(): - print(f"Error: narration_combined.mov not found at {combined_path}", file=sys.stderr) + print( + f"Error: narration_combined.mov not found at {combined_path}", + file=sys.stderr, + ) print("Run 'gnommo -p stitch' first.", file=sys.stderr) return 1 @@ -2256,17 +3352,23 @@ def cmd_master( # Step 1: Extract raw audio print(f"\n Extracting raw audio...") raw_cmd = [ - "ffmpeg", "-y", - "-i", str(combined_path), + "ffmpeg", + "-y", + "-i", + str(combined_path), "-vn", ] if channel_filter: raw_cmd.extend(["-af", channel_filter.rstrip(",")]) - raw_cmd.extend([ - "-ar", "48000", - "-acodec", "pcm_s16le", - str(raw_output), - ]) + raw_cmd.extend( + [ + "-ar", + "48000", + "-acodec", + "pcm_s16le", + str(raw_output), + ] + ) if verbose: print(f" Command: {' '.join(raw_cmd)}") @@ -2370,12 +3472,17 @@ def cmd_master( print(f" - Loudnorm: target={cfg.target_lufs} LUFS") processed_cmd = [ - "ffmpeg", "-y", - "-i", str(combined_path), + "ffmpeg", + "-y", + "-i", + str(combined_path), "-vn", - "-af", filter_chain, - "-ar", "48000", - "-acodec", "pcm_s16le", + "-af", + filter_chain, + "-ar", + "48000", + "-acodec", + "pcm_s16le", str(processed_output), ] @@ -2390,8 +3497,16 @@ def cmd_master( # Get durations def get_duration(path): - cmd = ["ffprobe", "-v", "error", "-show_entries", "format=duration", - "-of", "default=noprint_wrappers=1:nokey=1", str(path)] + cmd = [ + "ffprobe", + "-v", + "error", + "-show_entries", + "format=duration", + "-of", + "default=noprint_wrappers=1:nokey=1", + str(path), + ] r = subprocess.run(cmd, capture_output=True, text=True) try: return float(r.stdout.strip()) diff --git a/gnommo/description.py b/gnommo/description.py index bd1c671..6831937 100644 --- a/gnommo/description.py +++ b/gnommo/description.py @@ -178,7 +178,7 @@ def generate_chapters( continue timestamp = timing_lookup[slide_id] title = _extract_chapter_title(manuscript_text, slide_id, slides) - if chapters and (timestamp - chapters[-1].timestamp) < min_chapter_duration: + if chapters and (timestamp - chapters[-1].timestamp) < min_chapter_duration: continue # Skip this chapter, previous one covers it chapters.append( diff --git a/gnommo/extract_presenter_notes.py b/gnommo/extract_presenter_notes.py index 996114f..25da824 100644 --- a/gnommo/extract_presenter_notes.py +++ b/gnommo/extract_presenter_notes.py @@ -27,7 +27,6 @@ from gnommo.parser import _read_json def write_manuscript(data: Path, out_path: Path): - data = _read_json(data.read_text(encoding="utf-8")) lines = [] i = 0 diff --git a/gnommo/handoff.py b/gnommo/handoff.py index a6ebc9c..5215c35 100644 --- a/gnommo/handoff.py +++ b/gnommo/handoff.py @@ -30,11 +30,15 @@ from pathlib import Path try: import requests except ImportError: - print("Error: 'requests' package is required. Run: pip install requests", file=sys.stderr) + print( + "Error: 'requests' package is required. Run: pip install requests", + file=sys.stderr, + ) sys.exit(1) SYNC_FILE_LOCAL = ".gnommo_sync.json" -SYNC_FILE_PROD = ".gnommo_sync.prod.json" +SYNC_FILE_PROD = ".gnommo_sync.prod.json" + def _sync_file(prod: bool) -> str: return SYNC_FILE_PROD if prod else SYNC_FILE_LOCAL @@ -69,19 +73,33 @@ def _write_sync(project_path: Path, data: dict, prod: bool = False): json.dump(data, f, indent=2) -def cmd_handoff(project_path: Path, verbose: bool = False, file_override: str | None = None, prod: bool = False, res: str = "full") -> int: +def cmd_handoff( + project_path: Path, + verbose: bool = False, + file_override: str | None = None, + prod: bool = False, + res: str = "full", +) -> int: _load_env_file() if prod: api_url = os.environ.get("GNOMMOWEB_PROD_URL", "").rstrip("/") api_key = os.environ.get("GNOMMOWEB_PROD_API_KEY", "") - if not api_url: print("Error: GNOMMOWEB_PROD_URL is not set.", file=sys.stderr); return 1 - if not api_key: print("Error: GNOMMOWEB_PROD_API_KEY is not set.", file=sys.stderr); return 1 + if not api_url: + print("Error: GNOMMOWEB_PROD_URL is not set.", file=sys.stderr) + return 1 + if not api_key: + print("Error: GNOMMOWEB_PROD_API_KEY is not set.", file=sys.stderr) + return 1 else: api_url = os.environ.get("GNOMMOWEB_URL", "").rstrip("/") api_key = os.environ.get("GNOMMOWEB_API_KEY", "") - if not api_url: print("Error: GNOMMOWEB_URL is not set.", file=sys.stderr); return 1 - if not api_key: print("Error: GNOMMOWEB_API_KEY is not set.", file=sys.stderr); return 1 + if not api_url: + print("Error: GNOMMOWEB_URL is not set.", file=sys.stderr) + return 1 + if not api_key: + print("Error: GNOMMOWEB_API_KEY is not set.", file=sys.stderr) + return 1 if verbose: target = "production" if prod else "local" @@ -104,7 +122,9 @@ def cmd_handoff(project_path: Path, verbose: bool = False, file_override: str | if file_override: video_path = Path(file_override) else: - output_filename = project.get("output") or Path(project.get("output_video", "")).name + output_filename = ( + project.get("output") or Path(project.get("output_video", "")).name + ) if not output_filename: print( "Error: no 'output' field in project.json and no --file provided.", @@ -148,17 +168,23 @@ def cmd_handoff(project_path: Path, verbose: bool = False, file_override: str | result = r.json() video_version = result.get("video_version", "?") - video_url = result.get("video_url", "") + video_url = result.get("video_url", "") # ── Write sync state ─────────────────────────────────────────────────────── now_iso = datetime.now(tz=timezone.utc).isoformat(timespec="seconds") existing_sync = _read_sync(project_path, prod) - _write_sync(project_path, { - **existing_sync, - "last_handoff_at": now_iso, - "video_version": video_version, - "server_updated_at": result.get("asset", {}).get("updated_at", existing_sync.get("server_updated_at")), - }, prod) + _write_sync( + project_path, + { + **existing_sync, + "last_handoff_at": now_iso, + "video_version": video_version, + "server_updated_at": result.get("asset", {}).get( + "updated_at", existing_sync.get("server_updated_at") + ), + }, + prod, + ) print(f"✓ {project_id} → v{video_version} [processed]") if video_url: @@ -170,8 +196,8 @@ def cmd_handoff(project_path: Path, verbose: bool = False, file_override: str | def _mime_type(path: Path) -> str: ext = path.suffix.lower() return { - ".mp4": "video/mp4", - ".mov": "video/quicktime", + ".mp4": "video/mp4", + ".mov": "video/quicktime", ".webm": "video/webm", - ".mkv": "video/x-matroska", + ".mkv": "video/x-matroska", }.get(ext, "application/octet-stream") diff --git a/gnommo/models.py b/gnommo/models.py index 1b88f3e..712ea77 100644 --- a/gnommo/models.py +++ b/gnommo/models.py @@ -65,7 +65,9 @@ class ProjectConfig: # YouTube description fields description: str = "" # Video description text for YouTube footer: str = "" # Footer text (social links, subscribe CTA, etc.) - output_video: str = "" # Output filename (e.g. "DISC_INT3.mp4"); placed in out/ or out// + output_video: str = ( + "" # Output filename (e.g. "DISC_INT3.mp4"); placed in out/ or out// + ) @dataclass @@ -295,6 +297,10 @@ class VideoSource: False # If True, skip loudnorm during preprocessing (apply after concatenation) ) volume: float = 1.0 # Volume multiplier (1.0=full, >1.0=boost, <1.0=reduce) + layer: str = "above" # "above" = renders on top of slides; "below" = behind slides + duration: Optional[float] = None # Pre-probed file duration in seconds (set by import) + has_audio: Optional[bool] = None # Pre-detected audio presence (set by import) + end_on: Optional[str] = None # When video event ends: "next_slide" | "end" | "take" (None = marker-type default) @dataclass @@ -334,6 +340,7 @@ class AudioDefinition: ignore_pauses: bool = ( False # If True, audio continues playing during narration pauses ) + duration: Optional[float] = None # Pre-probed duration in seconds (set by import) @dataclass @@ -364,6 +371,8 @@ class VideoEvent: end_time: float video_source: "VideoSource" cutout: "CutoutDefinition" + cutout_name: str = "" # resolved cutout name (e.g. "fullscreen"), for display + layer: str = "above" # "above" = on top of slides; "below" = behind slides @dataclass @@ -508,7 +517,9 @@ class RenderPlan: cached_files: set = field( default_factory=set ) # Video IDs loaded from external cache (show 📁 indicator) - output_path: Optional[Path] = None # Final output file path (set after plan is built) + output_path: Optional[ + Path + ] = None # Final output file path (set after plan is built) # Slide layout configurations (hardcoded for POC) diff --git a/gnommo/parser.py b/gnommo/parser.py index ae24e63..1664f73 100644 --- a/gnommo/parser.py +++ b/gnommo/parser.py @@ -161,8 +161,35 @@ def parse_project_config(project_path: Path) -> ProjectConfig: except json.JSONDecodeError as e: raise ParseError(f"Invalid JSON: {e}", config_path) - # Parse cutouts (named zones for video placement) - cutouts: dict[str, CutoutDefinition] = {} + # Built-in cutouts — used by vft/vfb/vst/vsb marker shorthand. + # Projects can override these by defining cutouts with the same names. + cutouts: dict[str, CutoutDefinition] = { + # 100 % × 100 % at origin — for fullscreen video (vf* markers) + "fullscreen": CutoutDefinition( + x=-1, + y=-1, + height=-1, + width=-1, + x_percent=0.0, + y_percent=0.0, + height_percent=1.0, + width_percent=1.0, + ), + # 50 % height, square aspect, centred — for square video (vs* markers) + "square": CutoutDefinition( + x=-1, + y=-1, + height=-1, + width=-1, + x_percent=0.25, + y_percent=0.25, + height_percent=0.5, + width_percent=0.0, + ), + } + + # Parse cutouts (named zones for video placement) — project definitions + # override the built-ins above. cutouts_data = data.get("cutouts", {}) for cutout_name, cutout_data in cutouts_data.items(): x, x_pct = _parse_dimension(cutout_data.get("x", 0)) @@ -243,7 +270,9 @@ def parse_slides( # Try cache fallback for reading JSON slides_path, _ = resolve_with_cache(local_slides_path, project_path) if not slides_path.exists(): - raise ParseError(f"slides file not found: {local_slides_path}", local_slides_path) + raise ParseError( + f"slides file not found: {local_slides_path}", local_slides_path + ) try: data = _read_json(slides_path) @@ -305,12 +334,14 @@ def parse_audio( if "overlap" in audio_data and audio_data["overlap"]: overlap = parse_timestamp(audio_data["overlap"]) + raw_duration = audio_data.get("duration") audio[audio_id] = AudioDefinition( file=audio_data["file"], volume=float(audio_data.get("volume", 1.0)), loop=bool(audio_data.get("loop", False)), overlap=overlap, ignore_pauses=bool(audio_data.get("ignore_pauses", False)), + duration=float(raw_duration) if raw_duration is not None else None, ) return audio, audio_dir @@ -386,7 +417,9 @@ def parse_videos( # Try cache fallback for reading JSON videos_path, _ = resolve_with_cache(local_videos_path, project_path) if not videos_path.exists(): - raise ParseError(f"videos.json not found: {local_videos_path}", local_videos_path) + raise ParseError( + f"videos.json not found: {local_videos_path}", local_videos_path + ) try: data = _read_json(videos_path) @@ -440,6 +473,8 @@ def parse_videos( # take = end - begin (duration from begin to end) take = end_time - skip + raw_duration = video_data.get("duration") + raw_has_audio = video_data.get("has_audio") videos[video_id] = VideoSource( source_file=video_data["source_file"], filter=filter_list, @@ -455,6 +490,10 @@ def parse_videos( use_audio_channels=video_data.get("use_audio_channels", "both"), defer_loudnorm=video_data.get("defer_loudnorm", False), volume=float(video_data.get("volume", 1.0)), + layer=video_data.get("layer", "above"), + duration=float(raw_duration) if raw_duration is not None else None, + has_audio=bool(raw_has_audio) if raw_has_audio is not None else None, + end_on=video_data.get("end_on"), ) return videos, videos_dir diff --git a/gnommo/preprocessor.py b/gnommo/preprocessor.py index 97fac0d..f42f7d4 100644 --- a/gnommo/preprocessor.py +++ b/gnommo/preprocessor.py @@ -27,9 +27,9 @@ CHUNK_DURATION = 60 # Resolution presets for preview/proxy workflow # Each entry: (width, height, subdir_name) RES_CONFIGS: dict[str, tuple[int, int, str] | None] = { - "full": None, # no downscale, no subdir - "low": (490, 270, "low"), - "tiny": (320, 180, "proxy"), # "proxy" subdir kept for backward compat + "full": None, # no downscale, no subdir + "low": (490, 270, "low"), + "tiny": (320, 180, "proxy"), # "proxy" subdir kept for backward compat } # Keep legacy constants pointing at "tiny" values @@ -61,10 +61,14 @@ def _video_has_alpha(video_path: Path) -> bool: """Check if a video file has an alpha channel.""" cmd = [ "ffprobe", - "-v", "error", - "-select_streams", "v:0", - "-show_entries", "stream=pix_fmt", - "-of", "default=noprint_wrappers=1:nokey=1", + "-v", + "error", + "-select_streams", + "v:0", + "-show_entries", + "stream=pix_fmt", + "-of", + "default=noprint_wrappers=1:nokey=1", str(video_path), ] result = subprocess.run(cmd, capture_output=True, text=True) @@ -104,13 +108,20 @@ def create_downscaled_video( return out_path cmd = [ - "ffmpeg", "-y", - "-i", str(source_path), - "-vf", f"scale={width}:{height}", - "-c:v", "libx264", - "-preset", "ultrafast", - "-crf", "28", - "-c:a", "copy", + "ffmpeg", + "-y", + "-i", + str(source_path), + "-vf", + f"scale={width}:{height}", + "-c:v", + "libx264", + "-preset", + "ultrafast", + "-crf", + "28", + "-c:a", + "copy", str(out_path), ] result = subprocess.run(cmd, capture_output=True, text=True) @@ -204,7 +215,8 @@ def ensure_downscaled_files_exist( out_dir.mkdir(parents=True, exist_ok=True) video_files = [ - f for f in source_dir.iterdir() + f + for f in source_dir.iterdir() if f.is_file() and f.suffix.lower() in video_extensions and "_processed" not in f.stem @@ -247,6 +259,7 @@ import selectors, time, sys, subprocess def run_ffmpeg_with_progress(cmd, duration, description="Processing"): cmd = cmd.copy() + insert_pos = cmd.index("-y") + 1 if "-y" in cmd else 1 cmd[insert_pos:insert_pos] = [ "-progress", @@ -269,9 +282,11 @@ def run_ffmpeg_with_progress(cmd, duration, description="Processing"): sel.register(p.stdout, selectors.EVENT_READ) bar_width = 30 + start_time = time.time() last_update = time.time() last_percent = 0 seen_any_progress = False + last_log_line = "" logs = [] def draw(percent, suffix=""): @@ -287,6 +302,7 @@ def run_ffmpeg_with_progress(cmd, duration, description="Processing"): while True: # If process ended and no more output, break if p.poll() is not None: + # drain any remaining output quickly while True: line = p.stdout.readline() @@ -297,8 +313,12 @@ def run_ffmpeg_with_progress(cmd, duration, description="Processing"): events = sel.select(timeout=0.2) if not events: - # No output right now; show finalizing if we're near end - if ( + if not seen_any_progress: + # Show elapsed time and last FFmpeg output line during init + elapsed = time.time() - start_time + hint = f" | {last_log_line[:50]}" if last_log_line else "" + draw(0, f"Initializing... ({elapsed:.0f}s){hint}") + elif ( seen_any_progress and last_percent >= 99 and (time.time() - last_update) > 1.0 @@ -311,6 +331,10 @@ def run_ffmpeg_with_progress(cmd, duration, description="Processing"): if not line: continue logs.append(line) + # Track last non-empty, non-progress-key line for init diagnostics + stripped = line.strip() + if stripped and "=" not in stripped: + last_log_line = stripped if line.startswith("out_time_ms="): val = line.split("=", 1)[1].strip() @@ -332,7 +356,10 @@ def run_ffmpeg_with_progress(cmd, duration, description="Processing"): if p.returncode == 0: draw(100, "Done\n") else: - sys.stdout.write("\n") + code = p.returncode + # On macOS/Linux, -9 means SIGKILL (OOM kill by OS), -6 = SIGABRT + signal_hint = " (OOM kill)" if code == -9 else (" (abort)" if code == -6 else "") + sys.stdout.write(f"\n FFmpeg exited with code {code}{signal_hint}\n") sys.stdout.flush() return subprocess.CompletedProcess( @@ -340,7 +367,33 @@ def run_ffmpeg_with_progress(cmd, duration, description="Processing"): ) -def check_audio_channel_silent(input_path: Path, channel: str, threshold_db: float = -60.0) -> tuple[bool, float]: +def _has_audio_stream(video_path: Path) -> bool: + """Return True if the file has a real (non-ghost) audio stream.""" + result = subprocess.run( + [ + "ffprobe", "-v", "error", + "-analyzeduration", "0", + "-probesize", "1000000", + "-select_streams", "a:0", + "-show_entries", "stream=index,nb_frames", + "-of", "csv=p=0", + str(video_path), + ], + capture_output=True, + text=True, + ) + output = result.stdout.strip() + if not output: + return False + parts = output.split(",") + if len(parts) >= 2 and parts[1].strip() == "0": + return False # Ghost audio track — header present but no sample data + return True + + +def check_audio_channel_silent( + input_path: Path, channel: str, threshold_db: float = -60.0 +) -> tuple[bool, float]: """ Quick check whether the specified audio channel is silent. Uses ffmpeg volumedetect (audio-only pass, much faster than full processing). @@ -349,9 +402,14 @@ def check_audio_channel_silent(input_path: Path, channel: str, threshold_db: flo """ pan = "pan=mono|c0=c0" if channel == "left" else "pan=mono|c0=c1" cmd = [ - "ffmpeg", "-i", str(input_path), - "-af", f"{pan},volumedetect", - "-f", "null", "/dev/null", + "ffmpeg", + "-i", + str(input_path), + "-af", + f"{pan},volumedetect", + "-f", + "null", + "/dev/null", ] result = subprocess.run(cmd, capture_output=True, text=True) for line in result.stderr.splitlines(): @@ -416,10 +474,14 @@ def detect_silence_bounds( total_duration = get_video_duration(input_path) cmd = [ - "ffmpeg", "-i", str(input_path), + "ffmpeg", + "-i", + str(input_path), "-af", f"silencedetect=noise={noise_threshold_db}dB:duration={min_silence_duration}", - "-f", "null", "/dev/null", + "-f", + "null", + "/dev/null", ] result = subprocess.run(cmd, capture_output=True, text=True) @@ -591,6 +653,14 @@ def preprocess_video( # Audio normalization: denoise, compress, and normalize loudness # Note: skip/take are NOT applied here - they're only used during concatenation print(" Filter: audio_normalize") + if not _has_audio_stream(current_input): + raise PreprocessError( + f"audio_normalize requires an audio stream, but '{current_input.name}' has none.\n" + f" Check that the source file has audio, or remove audio_normalize from the filter list.", + filter_type="audio_normalize", + command="", + stderr="", + ) step_output = gnommo_scratch / f"{video_id}_batch{batch_num}_audio.mov" intermediate_files.append(step_output) apply_audio_normalize( @@ -1122,9 +1192,7 @@ def apply_combined_video_filters_chunked( num_chunks = int(duration / CHUNK_DURATION) + 1 chunk_files: list[Path] = [] - chunk_tasks: list[ - tuple - ] = [] # (index, chunk_path, start_time, chunk_duration) + chunk_tasks: list[tuple] = [] # (index, chunk_path, start_time, chunk_duration) # Build list of chunk tasks for i in range(num_chunks): @@ -1179,11 +1247,16 @@ def apply_combined_video_filters_chunked( print(f" Concatenating {len(chunk_files)} chunks → {output_path.name}") concat_cmd = [ - "ffmpeg", "-y", - "-f", "concat", - "-safe", "0", - "-i", str(concat_list), - "-c", "copy", + "ffmpeg", + "-y", + "-f", + "concat", + "-safe", + "0", + "-i", + str(concat_list), + "-c", + "copy", str(output_path), ] concat_result = run_ffmpeg_with_progress(concat_cmd, duration, "Concatenating") @@ -1953,12 +2026,14 @@ def parse_audio_normalize_config(config: dict[str, Any]) -> AudioNormalizeConfig # Parse EQ bands eq_bands = [] for band in config.get("eq_bands", []): - eq_bands.append(EQBand( - freq=float(band.get("freq", 1000)), - gain=float(band.get("gain", 0)), - q=float(band.get("q", 1.0)), - type=str(band.get("type", "peak")), - )) + eq_bands.append( + EQBand( + freq=float(band.get("freq", 1000)), + gain=float(band.get("gain", 0)), + q=float(band.get("q", 1.0)), + type=str(band.get("type", "peak")), + ) + ) return AudioNormalizeConfig( # Parametric EQ @@ -2163,12 +2238,18 @@ def stitch_narration_segments( # Preserve alpha with ProRes 4444 cmd.extend( [ - "-vf", "fps=30,format=yuva444p10le", - "-c:v", "prores_ks", - "-profile:v", "4", - "-pix_fmt", "yuva444p10le", - "-c:a", "pcm_s16le", - "-avoid_negative_ts", "make_zero", + "-vf", + "fps=30,format=yuva444p10le", + "-c:v", + "prores_ks", + "-profile:v", + "4", + "-pix_fmt", + "yuva444p10le", + "-c:a", + "pcm_s16le", + "-avoid_negative_ts", + "make_zero", str(trimmed_path), ] ) @@ -2176,14 +2257,22 @@ def stitch_narration_segments( # No alpha - use fast h264 encoding cmd.extend( [ - "-vf", "fps=30", - "-c:v", "libx264", - "-preset", "fast", - "-crf", "18", - "-c:a", "aac", - "-b:a", "192k", - "-avoid_negative_ts", "make_zero", - "-movflags", "+faststart", + "-vf", + "fps=30", + "-c:v", + "libx264", + "-preset", + "fast", + "-crf", + "18", + "-c:a", + "aac", + "-b:a", + "192k", + "-avoid_negative_ts", + "make_zero", + "-movflags", + "+faststart", str(trimmed_path), ] ) @@ -2211,12 +2300,18 @@ def stitch_narration_segments( cmd = [ "ffmpeg", "-y", - "-f", "concat", - "-safe", "0", - "-i", str(concat_list), - "-c:v", "copy", - "-c:a", "copy", - "-movflags", "+faststart", + "-f", + "concat", + "-safe", + "0", + "-i", + str(concat_list), + "-c:v", + "copy", + "-c:a", + "copy", + "-movflags", + "+faststart", str(output_path), ] @@ -2235,16 +2330,26 @@ def stitch_narration_segments( ) if needs_loudnorm: print(" Applying loudness normalization to stitched output...") - normalized_path = output_path.parent / f"{output_path.stem}_normalized{output_path.suffix}" + normalized_path = ( + output_path.parent / f"{output_path.stem}_normalized{output_path.suffix}" + ) # Use EBU R128 loudnorm targeting YouTube's recommended levels loudnorm_cmd = [ - "ffmpeg", "-y", - "-i", str(output_path), - "-c:v", "copy", - "-af", "loudnorm=I=-14:LRA=11:TP=-1.5", - "-c:a", "aac", "-b:a", "192k", - "-movflags", "+faststart", + "ffmpeg", + "-y", + "-i", + str(output_path), + "-c:v", + "copy", + "-af", + "loudnorm=I=-14:LRA=11:TP=-1.5", + "-c:a", + "aac", + "-b:a", + "192k", + "-movflags", + "+faststart", str(normalized_path), ] diff --git a/gnommo/pull.py b/gnommo/pull.py index 9f713b7..f816954 100644 --- a/gnommo/pull.py +++ b/gnommo/pull.py @@ -29,11 +29,15 @@ from pathlib import Path try: import requests except ImportError: - print("Error: 'requests' package is required. Run: pip install requests", file=sys.stderr) + print( + "Error: 'requests' package is required. Run: pip install requests", + file=sys.stderr, + ) sys.exit(1) SYNC_FILE_LOCAL = ".gnommo_sync.json" -SYNC_FILE_PROD = ".gnommo_sync.prod.json" +SYNC_FILE_PROD = ".gnommo_sync.prod.json" + def _sync_file(prod: bool) -> str: return SYNC_FILE_PROD if prod else SYNC_FILE_LOCAL @@ -77,19 +81,29 @@ def _parse_ts(ts_str) -> datetime | None: return None -def cmd_pull(project_path: Path, verbose: bool = False, force: bool = False, prod: bool = False) -> int: +def cmd_pull( + project_path: Path, verbose: bool = False, force: bool = False, prod: bool = False +) -> int: _load_env_file() if prod: api_url = os.environ.get("GNOMMOWEB_PROD_URL", "").rstrip("/") api_key = os.environ.get("GNOMMOWEB_PROD_API_KEY", "") - if not api_url: print("Error: GNOMMOWEB_PROD_URL is not set.", file=sys.stderr); return 1 - if not api_key: print("Error: GNOMMOWEB_PROD_API_KEY is not set.", file=sys.stderr); return 1 + if not api_url: + print("Error: GNOMMOWEB_PROD_URL is not set.", file=sys.stderr) + return 1 + if not api_key: + print("Error: GNOMMOWEB_PROD_API_KEY is not set.", file=sys.stderr) + return 1 else: api_url = os.environ.get("GNOMMOWEB_URL", "").rstrip("/") api_key = os.environ.get("GNOMMOWEB_API_KEY", "") - if not api_url: print("Error: GNOMMOWEB_URL is not set.", file=sys.stderr); return 1 - if not api_key: print("Error: GNOMMOWEB_API_KEY is not set.", file=sys.stderr); return 1 + if not api_url: + print("Error: GNOMMOWEB_URL is not set.", file=sys.stderr) + return 1 + if not api_key: + print("Error: GNOMMOWEB_API_KEY is not set.", file=sys.stderr) + return 1 if verbose: target = "production" if prod else "local" @@ -176,19 +190,23 @@ def cmd_pull(project_path: Path, verbose: bool = False, force: bool = False, pro now_iso = datetime.now(tz=timezone.utc).isoformat(timespec="seconds") existing_sync = _read_sync(project_path, prod) - _write_sync(project_path, { - **existing_sync, - "last_pulled_at": now_iso, - "server_updated_at": server_updated_at, - "last_pushed_at": existing_sync.get("last_pushed_at"), - }, prod) + _write_sync( + project_path, + { + **existing_sync, + "last_pulled_at": now_iso, + "server_updated_at": server_updated_at, + "last_pushed_at": existing_sync.get("last_pushed_at"), + }, + prod, + ) return 0 def _merge_parent(local: dict, server: dict, verbose: bool): """Update parent project.json: name, description, shorts index (slugs).""" - local["name"] = server.get("title", local.get("name")) + local["name"] = server.get("title", local.get("name")) local["description"] = server.get("description") or local.get("description") # shorts is a list of slugs — update from server's shorts list server_shorts = server.get("shorts", []) diff --git a/gnommo/push.py b/gnommo/push.py index eed377a..8e332e5 100644 --- a/gnommo/push.py +++ b/gnommo/push.py @@ -42,11 +42,15 @@ from pathlib import Path try: import requests except ImportError: - print("Error: 'requests' package is required. Run: pip install requests", file=sys.stderr) + print( + "Error: 'requests' package is required. Run: pip install requests", + file=sys.stderr, + ) sys.exit(1) SYNC_FILE_LOCAL = ".gnommo_sync.json" -SYNC_FILE_PROD = ".gnommo_sync.prod.json" +SYNC_FILE_PROD = ".gnommo_sync.prod.json" + def _sync_file(prod: bool) -> str: return SYNC_FILE_PROD if prod else SYNC_FILE_LOCAL @@ -90,19 +94,29 @@ def _parse_ts(ts_str) -> datetime | None: return None -def cmd_push(project_path: Path, verbose: bool = False, force: bool = False, prod: bool = False) -> int: +def cmd_push( + project_path: Path, verbose: bool = False, force: bool = False, prod: bool = False +) -> int: _load_env_file() if prod: api_url = os.environ.get("GNOMMOWEB_PROD_URL", "").rstrip("/") api_key = os.environ.get("GNOMMOWEB_PROD_API_KEY", "") - if not api_url: print("Error: GNOMMOWEB_PROD_URL is not set.", file=sys.stderr); return 1 - if not api_key: print("Error: GNOMMOWEB_PROD_API_KEY is not set.", file=sys.stderr); return 1 + if not api_url: + print("Error: GNOMMOWEB_PROD_URL is not set.", file=sys.stderr) + return 1 + if not api_key: + print("Error: GNOMMOWEB_PROD_API_KEY is not set.", file=sys.stderr) + return 1 else: api_url = os.environ.get("GNOMMOWEB_URL", "").rstrip("/") api_key = os.environ.get("GNOMMOWEB_API_KEY", "") - if not api_url: print("Error: GNOMMOWEB_URL is not set.", file=sys.stderr); return 1 - if not api_key: print("Error: GNOMMOWEB_API_KEY is not set.", file=sys.stderr); return 1 + if not api_url: + print("Error: GNOMMOWEB_URL is not set.", file=sys.stderr) + return 1 + if not api_key: + print("Error: GNOMMOWEB_API_KEY is not set.", file=sys.stderr) + return 1 if verbose: target = "production" if prod else "local" @@ -160,11 +174,15 @@ def cmd_push(project_path: Path, verbose: bool = False, force: bool = False, pro # ── Write sync state ────────────────────────────────────────────────────── now_iso = datetime.now(tz=timezone.utc).isoformat(timespec="seconds") existing_sync = _read_sync(project_path, prod) - _write_sync(project_path, { - **existing_sync, - "last_pushed_at": now_iso, - "server_updated_at": server_updated_at, - }, prod) + _write_sync( + project_path, + { + **existing_sync, + "last_pushed_at": now_iso, + "server_updated_at": server_updated_at, + }, + prod, + ) # ── Print summary ───────────────────────────────────────────────────────── asset = result.get("asset", {}) @@ -176,7 +194,9 @@ def cmd_push(project_path: Path, verbose: bool = False, force: bool = False, pro print(f"✓ {project_id} → gn_asset #{asset.get('id')} ({asset.get('name')})") if verbose: script_len = len(asset.get("script") or "") - print(f" server.script: {script_len} chars | fps={asset.get('fps')} res={asset.get('resolution')}") + print( + f" server.script: {script_len} chars | fps={asset.get('fps')} res={asset.get('resolution')}" + ) return 0 @@ -201,19 +221,19 @@ def _build_parent_payload(project: dict, project_path: Path, verbose: bool) -> d print(f" no manuscript field in project.json") return { - "project_id": project["id"], - "name": project["name"], - "description": project.get("description"), - "coursecode": project.get("coursecode"), - "script_content": script_content, - "resolution": project.get("resolution"), - "fps": project.get("fps"), + "project_id": project["id"], + "name": project["name"], + "description": project.get("description"), + "coursecode": project.get("coursecode"), + "script_content": script_content, + "resolution": project.get("resolution"), + "fps": project.get("fps"), "duration_seconds": project.get("duration_seconds"), - "hook": project.get("hook"), + "hook": project.get("hook"), "platform_targets": project.get("platform_targets"), - "status": project.get("status"), - "youtube_url": project.get("youtube_url"), - "shorts": project.get("shorts", []), + "status": project.get("status"), + "youtube_url": project.get("youtube_url"), + "shorts": project.get("shorts", []), } @@ -231,14 +251,14 @@ def _build_short_payload(project: dict, project_path: Path, verbose: bool) -> di print(f" Warning: script file not found: {script_path}", file=sys.stderr) return { - "project_id": project["id"], - "name": project["name"], - "description": project.get("description"), - "parent_project": project["parent_project"], - "hook": project.get("hook"), - "script_content": script_content, + "project_id": project["id"], + "name": project["name"], + "description": project.get("description"), + "parent_project": project["parent_project"], + "hook": project.get("hook"), + "script_content": script_content, "platform_targets": project.get("platform_targets", ["youtube"]), - "resolution": project.get("resolution"), - "fps": project.get("fps"), + "resolution": project.get("resolution"), + "fps": project.get("fps"), "duration_seconds": project.get("duration_seconds"), } diff --git a/gnommo/renderer.py b/gnommo/renderer.py index 8971195..6a9cf15 100644 --- a/gnommo/renderer.py +++ b/gnommo/renderer.py @@ -22,12 +22,46 @@ from .preprocessor import run_ffmpeg_with_progress def _get_audio_duration(audio_path: Path) -> float: - """Get duration of an audio file using ffprobe.""" + """Get duration of an audio file using ffprobe. + + For MP3 files, counts packets directly to get an accurate duration regardless + of whether the file has a Xing/VBRI header. Falls back to format duration for + other formats. + """ + if audio_path.suffix.lower() == ".mp3": + # Count actual packets rather than trusting the header estimate. + # This is slower but accurate for headerless VBR/CBR MP3s. + cmd = [ + "ffprobe", + "-v", + "error", + "-count_packets", + "-show_entries", + "stream=nb_read_packets,duration", + "-select_streams", + "a:0", + "-of", + "default=noprint_wrappers=1:nokey=1", + str(audio_path), + ] + result = subprocess.run(cmd, capture_output=True, text=True) + if result.returncode == 0: + # Output: duration\nnb_read_packets — take the first non-N/A line + for line in result.stdout.strip().splitlines(): + try: + val = float(line) + if val > 0: + return val + except ValueError: + continue cmd = [ "ffprobe", - "-v", "error", - "-show_entries", "format=duration", - "-of", "default=noprint_wrappers=1:nokey=1", + "-v", + "error", + "-show_entries", + "format=duration", + "-of", + "default=noprint_wrappers=1:nokey=1", str(audio_path), ] result = subprocess.run(cmd, capture_output=True, text=True) @@ -208,16 +242,28 @@ def _resolve_video_path( def _has_audio_stream(video_path: Path) -> bool: - """Check if a video file contains an audio stream using ffprobe.""" + """Check if a video file contains a non-empty audio stream. + + Uses -analyzeduration 0 to avoid the slow avformat_find_stream_info() scan + that happens when an MP4 has a declared audio track with no actual frames — + ffprobe would otherwise scan the entire file looking for audio packets. + + Also checks nb_frames to reject ghost audio tracks (stream header exists in + the moov atom but no sample data in stsc/stsz). + """ result = subprocess.run( [ "ffprobe", "-v", "error", + "-analyzeduration", + "0", + "-probesize", + "1000000", "-select_streams", - "a", + "a:0", "-show_entries", - "stream=index", + "stream=index,nb_frames", "-of", "csv=p=0", str(video_path), @@ -225,7 +271,16 @@ def _has_audio_stream(video_path: Path) -> bool: capture_output=True, text=True, ) - return bool(result.stdout.strip()) + output = result.stdout.strip() + if not output: + return False + # output is "index" or "index,nb_frames" + parts = output.split(",") + if len(parts) >= 2: + nb_frames = parts[1].strip() + if nb_frames == "0": + return False # Ghost audio track — declared but no sample data + return True def _build_audio_channel_filter(use_audio_channels: str) -> str: @@ -263,11 +318,18 @@ def build_ffmpeg_command(plan: RenderPlan, output_path: Path) -> list[str]: # Add -ss seek BEFORE -i for skip parameter and/or partial rendering always_visible_inputs: list[int] = [] for video_id, video_source, cutout in plan.narration_videos: - video_path = _resolve_video_path(videos_dir, video_source, shared_assets_dir, project_path) + video_path = _resolve_video_path( + videos_dir, video_source, shared_assets_dir, project_path + ) # Combine video skip setting with partial render offset total_seek = video_source.skip + plan.input_seek_time if total_seek > 0: cmd.extend(["-ss", f"{total_seek:.3f}"]) + # Skip stream analysis — codec params are in the container header, and + # duration is already known by gnommo via ffprobe (plan.total_duration). + # Without this, FFmpeg reads 100MB+ of compressed data per input at 4K + # bitrates before encoding starts ("Estimating duration from bitrate"). + cmd.extend(["-analyzeduration", "0", "-probesize", "1000"]) cmd.extend(["-i", str(video_path)]) always_visible_inputs.append(input_idx) input_idx += 1 @@ -283,18 +345,26 @@ def build_ffmpeg_command(plan: RenderPlan, output_path: Path) -> list[str]: shared_assets_dir = project_path.parent / "shared_assets" videos_json_bg = shared_assets_dir / "videos.json" if not videos_json_bg.exists(): - raise RenderError(f"shared_assets/videos.json not found (needed for background handle '{bg_handle}')") + raise RenderError( + f"shared_assets/videos.json not found (needed for background handle '{bg_handle}')" + ) bg_videos = _read_json(videos_json_bg) if bg_handle not in bg_videos: - raise RenderError(f"Background handle '{bg_handle}' not found in shared_assets/videos.json") + raise RenderError( + f"Background handle '{bg_handle}' not found in shared_assets/videos.json" + ) bg_path = shared_assets_dir / bg_videos[bg_handle]["source_file"] if not bg_path.exists(): - raise RenderError(f"Background file not found: {bg_path} (from handle '{bg_handle}')") + raise RenderError( + f"Background file not found: {bg_path} (from handle '{bg_handle}')" + ) image_extensions = {".png", ".jpg", ".jpeg", ".gif", ".bmp", ".tiff", ".webp"} bg_is_image = bg_path.suffix.lower() in image_extensions # Loop background videos infinitely if not bg_is_image: cmd.extend(["-stream_loop", "-1"]) + # Duration of background video is irrelevant (looped or image) — skip analysis + cmd.extend(["-analyzeduration", "0", "-probesize", "1000"]) cmd.extend(["-i", str(bg_path)]) bg_idx = input_idx input_idx += 1 @@ -325,14 +395,24 @@ def build_ffmpeg_command(plan: RenderPlan, output_path: Path) -> list[str]: video_path = _resolve_video_path( videos_dir, event.video_source, shared_assets_dir, project_path ) - # Seek to skip point before loading input skip = event.video_source.skip if skip > 0: cmd.extend(["-ss", f"{skip:.3f}"]) + cmd.extend(["-analyzeduration", "0", "-probesize", "1000"]) + # Use pre-probed duration to tell FFmpeg exactly how much to read, + # preventing scans of ghost audio tracks on empty MP4 audio streams. + if event.video_source.duration is not None: + remaining = event.video_source.duration - skip + if remaining > 0: + cmd.extend(["-t", f"{remaining:.3f}"]) cmd.extend(["-i", str(video_path)]) video_inputs[i] = input_idx input_idx += 1 - if _has_audio_stream(video_path): + has_audio = event.video_source.has_audio + if has_audio is None: + print(f" Warning: no cached metadata for '{event.video_source.source_file}' — run 'gnommo import' to avoid slow probing") + has_audio = _has_audio_stream(video_path) + if has_audio: video_events_with_audio.add(i) # Input: outro videos (play after narration ends) @@ -343,14 +423,22 @@ def build_ffmpeg_command(plan: RenderPlan, output_path: Path) -> list[str]: video_path = _resolve_video_path( videos_dir, event.video_source, shared_assets_dir, project_path ) - # Seek to skip point before loading input skip = event.video_source.skip if skip > 0: cmd.extend(["-ss", f"{skip:.3f}"]) + cmd.extend(["-analyzeduration", "0", "-probesize", "1000"]) + if event.video_source.duration is not None: + remaining = event.video_source.duration - skip + if remaining > 0: + cmd.extend(["-t", f"{remaining:.3f}"]) cmd.extend(["-i", str(video_path)]) outro_inputs[i] = input_idx input_idx += 1 - if _has_audio_stream(video_path): + has_audio = event.video_source.has_audio + if has_audio is None: + print(f" Warning: no cached metadata for '{event.video_source.source_file}' — run 'gnommo import' to avoid slow probing") + has_audio = _has_audio_stream(video_path) + if has_audio: outro_events_with_audio.add(i) # Track where audio inputs start @@ -365,12 +453,24 @@ def build_ffmpeg_command(plan: RenderPlan, output_path: Path) -> list[str]: if event.audio_id not in audio_inputs: audio_path = audio_dir / event.audio_def.file audio_path, _ = resolve_with_cache(audio_path, project_path) + # Use pre-probed duration from audio.json if available (set by import). + # For MP3 without Xing/VBRI headers this is critical — FFmpeg otherwise + # scans the whole file to estimate duration (100s+ for large files). + # Fall back to live probe only for MP3 when duration wasn't pre-cached. + file_duration = event.audio_def.duration + if file_duration is None and audio_path.suffix.lower() == ".mp3": + file_duration = _get_audio_duration(audio_path) + if file_duration is not None: + cmd.extend(["-t", str(file_duration)]) cmd.extend(["-i", str(audio_path)]) audio_inputs[event.audio_id] = input_idx input_idx += 1 - # Cache duration if this audio uses crossfade looping + # Cache duration for crossfade loop filter if event.audio_def.loop and event.audio_def.overlap: - audio_durations[event.audio_id] = _get_audio_duration(audio_path) + audio_durations[event.audio_id] = ( + file_duration if file_duration is not None + else _get_audio_duration(audio_path) + ) # Build filter_complex filter_complex = build_filter_complex( @@ -418,7 +518,7 @@ def build_ffmpeg_command(plan: RenderPlan, output_path: Path) -> list[str]: "-preset", "fast", "-crf", - "23", + "20", "-c:a", "aac", "-b:a", @@ -793,6 +893,43 @@ def build_filter_complex( ) current_label = next_label + # Add "below-slides" triggered video overlays (vfb/vsb or layer="below") + for i, event in enumerate(plan.video_events): + if event.layer != "below": + continue + video_idx = video_inputs[i] + cut_x, cut_y, cut_width, cut_height = _calculate_cutout_position( + event.cutout, width, height + ) + + duration = event.end_time - event.start_time + if event.video_source.take is not None: + duration = min(duration, event.video_source.take) + effective_end = event.start_time + duration + + zoom = event.video_source.zoom + zoomed_width = int(cut_width * zoom) + zoomed_height = int(cut_height * zoom) + + video_label = f"tvb{i}" + start_pts = event.start_time + filters.append( + f"[{video_idx}:v]format=yuva444p10le," + f"setpts=PTS-STARTPTS+{start_pts:.3f}/TB," + f"scale={zoomed_width}:{zoomed_height}:force_original_aspect_ratio=increase," + f"crop={cut_width}:{cut_height}:(iw-{cut_width})/2:(ih-{cut_height})/2," + f"format=rgba[{video_label}]" + ) + + next_label = f"tvbbase{i}" + enable_expr = f"between(t\\,{event.start_time:.3f}\\,{effective_end:.3f})" + filters.append( + f"[{current_label}][{video_label}]overlay=" + f"x={cut_x}:y={cut_y}:enable={enable_expr}" + f"[{next_label}]" + ) + current_label = next_label + # Add slide overlays with time-based enable for i, event in enumerate(plan.slide_events): slide_idx = slide_inputs[event.slide_id] @@ -815,8 +952,10 @@ def build_filter_complex( current_label = next_label - # Add triggered video overlays with time-based enable + # Add "above-slides" triggered video overlays (vft/vst or layer="above") for i, event in enumerate(plan.video_events): + if event.layer != "above": + continue video_idx = video_inputs[i] cut_x, cut_y, cut_width, cut_height = _calculate_cutout_position( event.cutout, width, height @@ -836,22 +975,25 @@ def build_filter_complex( # Scale to cover the zoomed area (like CSS object-fit: cover) # Then crop to cutout dimensions (centered) # Use setpts to sync video start with overlay enable time + # IMPORTANT: convert to rgba FIRST (before scale/crop) so the alpha channel + # is preserved throughout. scale in yuva444p10le can silently strip alpha. video_label = f"tv{i}" start_pts = event.start_time filters.append( - f"[{video_idx}:v]format=yuva444p10le," + f"[{video_idx}:v]format=rgba," f"setpts=PTS-STARTPTS+{start_pts:.3f}/TB," f"scale={zoomed_width}:{zoomed_height}:force_original_aspect_ratio=increase," - f"crop={cut_width}:{cut_height}:(iw-{cut_width})/2:(ih-{cut_height})/2," - f"format=rgba[{video_label}]" + f"crop={cut_width}:{cut_height}:(iw-{cut_width})/2:(ih-{cut_height})/2" + f"[{video_label}]" ) - # Overlay with time-based enable + # Overlay with time-based enable; format=auto lets FFmpeg pick the right + # compositing format so the RGBA alpha channel is respected. next_label = f"tvbase{i}" enable_expr = f"between(t\\,{event.start_time:.3f}\\,{effective_end:.3f})" filters.append( f"[{current_label}][{video_label}]overlay=" - f"x={cut_x}:y={cut_y}:enable={enable_expr}" + f"x={cut_x}:y={cut_y}:enable={enable_expr}:format=auto" f"[{next_label}]" ) @@ -950,13 +1092,17 @@ def build_filter_complex( _, first_video_source, _ = plan.narration_videos[0] use_channels = first_video_source.use_audio_channels if use_channels == "auto": - narration_path = _resolve_video_path(videos_dir, first_video_source, shared_assets_dir, project_path) + narration_path = _resolve_video_path( + videos_dir, first_video_source, shared_assets_dir, project_path + ) use_channels = _resolve_auto_channel(narration_path) channel_filter = _build_audio_channel_filter(use_channels) narration_volume = first_video_source.volume # Build volume filter if not 1.0 - volume_filter = f"volume={narration_volume:.2f}" if narration_volume != 1.0 else "" + volume_filter = ( + f"volume={narration_volume:.2f}" if narration_volume != 1.0 else "" + ) # Use narration_end_time to stop audio before outro (if outro exists) audio_end_time = ( @@ -980,7 +1126,9 @@ def build_filter_complex( ) audio_labels_to_mix.append("[main_aud]") elif filter_parts: - filters.append(f"[{main_audio_idx}:a]{','.join(filter_parts)}[main_aud]") + filters.append( + f"[{main_audio_idx}:a]{','.join(filter_parts)}[main_aud]" + ) audio_labels_to_mix.append("[main_aud]") else: audio_labels_to_mix.append(f"[{main_audio_idx}:a]") @@ -1066,7 +1214,10 @@ def build_filter_complex( label = f"aud{i}" delay_ms = int(event.start_time * 1000) - if event.audio_def.overlap and event.audio_id in audio_durations: + if ( + event.audio_def.overlap + and event.audio_id in audio_durations + ): # Crossfade loop: overlap copies with fade in/out audio_dur = audio_durations[event.audio_id] crossfade_filters = _build_crossfade_loop_filter( diff --git a/gnommo/transcriber.py b/gnommo/transcriber.py index 391a983..a80fc86 100644 --- a/gnommo/transcriber.py +++ b/gnommo/transcriber.py @@ -180,7 +180,9 @@ def words_to_srt( srt_lines = [] for idx, (start, end, text) in enumerate(segments, 1): srt_lines.append(str(idx)) - srt_lines.append(f"{_format_srt_timestamp(start)} --> {_format_srt_timestamp(end)}") + srt_lines.append( + f"{_format_srt_timestamp(start)} --> {_format_srt_timestamp(end)}" + ) srt_lines.append(text) srt_lines.append("") # Blank line between entries diff --git a/gnommo/transformer.py b/gnommo/transformer.py index 6c6f825..d831d74 100644 --- a/gnommo/transformer.py +++ b/gnommo/transformer.py @@ -1,6 +1,7 @@ """Transform stage: resolve timings and build render plan.""" import re +import string from dataclasses import dataclass from pathlib import Path from typing import Optional @@ -99,6 +100,16 @@ def _normalize_text(text: str) -> str: return text.strip() +def _normalize_token(word: str) -> str: + """Normalize a single word token for comparison. + + Strips leading/trailing punctuation and lowercases. Interior characters + (e.g. apostrophes in contractions) are preserved so "don't" stays "don't". + Applied to both transcript tokens and phrase words at comparison time. + """ + return word.lower().strip(string.punctuation) + + def _is_known_marker( marker_id: str, slides: dict = None, videos: dict = None, audio: dict = None ) -> bool: @@ -122,8 +133,9 @@ def _is_known_marker( if marker_id in slides: return True - # Video/narration triggers - if marker_id.startswith("video:") or marker_id.startswith("narration:"): + # Video/narration triggers (all supported prefixes) + _VIDEO_PREFIXES = ("video:", "narration:", "vft:", "vfb:", "vst:", "vsb:", "vftp:", "vfbp:", "vstp:", "vsbp:") + if any(marker_id.startswith(p) for p in _VIDEO_PREFIXES): return True # Camera presets @@ -143,20 +155,11 @@ def _strip_unknown_markers( text: str, slides: dict = None, videos: dict = None, audio: dict = None ) -> str: """ - Remove unknown markers from text. + Remove all [...] markers from context text — none are pronounced aloud. - Unknown markers aren't pronounced, so they should be stripped - before fuzzy matching. Note: [cite:...] markers are already - stripped at parse time by parse_manuscript(). + Note: [cite:...] markers are already stripped at parse time by parse_manuscript(). """ - - def replace_marker(match): - marker_id = match.group(1) - if _is_known_marker(marker_id, slides, videos, audio): - return match.group(0) # Keep known markers - return "" # Strip unknown markers - - return re.sub(r"\[([A-Za-z0-9_:]+)\]", replace_marker, text) + return re.sub(r"\[([^\]]+)\]", "", text) def _extract_marker_contexts( @@ -177,8 +180,9 @@ def _extract_marker_contexts( videos = videos or {} audio = audio or {} - # Split by markers, keeping the markers - parts = re.split(r"\[([A-Za-z0-9_:]+)\]", manuscript_text) + # Split by markers, keeping the markers — broad pattern handles any content + # including paths with / and - (e.g. [vfb:pexels/7670835-uhd_3840_2160_30fps]) + parts = re.split(r"\[([^\]]+)\]", manuscript_text) # parts: [text_before, marker1, text_after1, marker2, text_after2, ...] raw_contexts = [] @@ -189,16 +193,27 @@ def _extract_marker_contexts( if not _is_known_marker(marker_id, slides, videos, audio): continue - if i + 1 < len(parts): - following_text = parts[i + 1].strip() - # Clean up: remove newlines, collapse whitespace - following_text = " ".join(following_text.split()) - # Strip unknown markers from following text (they're not pronounced) - following_text = _strip_unknown_markers( - following_text, slides, videos, audio - ) - following_text = " ".join(following_text.split()) # Clean up extra spaces - raw_contexts.append((marker_id, following_text)) + # Collect all following text, looking past unknown markers until the + # next known marker. This handles [S1][segment:1] text... where the + # text lives two parts ahead rather than immediately after S1. + text_pieces = [] + j = i + 1 + while j < len(parts): + chunk = parts[j].strip() + if chunk: + text_pieces.append(chunk) + j += 1 # advance to the marker after this text chunk + if j >= len(parts): + break + if _is_known_marker(parts[j], slides, videos, audio): + break # stop at the next known marker + j += 1 # skip the unknown marker; its following text is next + + following_text = " ".join(text_pieces) + following_text = " ".join(following_text.split()) # collapse whitespace + following_text = _strip_unknown_markers(following_text, slides, videos, audio) + following_text = " ".join(following_text.split()) + raw_contexts.append((marker_id, following_text)) # For markers with no following text (consecutive markers), look ahead # Return (marker_id, following_text, is_borrowed) - is_borrowed=True means text came from look-ahead @@ -209,13 +224,20 @@ def _extract_marker_contexts( words = following_text.split()[:10] contexts.append((marker_id, " ".join(words), False)) else: - # Look ahead for next marker with text + # Look ahead for next marker with text, but never borrow from another + # slide marker — slides must align independently to avoid two consecutive + # slides matching the same transcription position simultaneously. + borrowed = False for j in range(i + 1, len(raw_contexts)): - if raw_contexts[j][1]: - words = raw_contexts[j][1].split()[:10] + next_marker_id, next_text = raw_contexts[j] + if next_text: + if next_marker_id in (slides or {}): + break # Slide owns this text; give up borrowing + words = next_text.split()[:10] contexts.append((marker_id, " ".join(words), True)) # Borrowed + borrowed = True break - else: + if not borrowed: contexts.append((marker_id, "", False)) return contexts @@ -250,7 +272,8 @@ def _fuzzy_match_ratio( return 0.0, 0, 0 transcript_words = [ - _normalize_text(transcription[j].word) for j in range(start_idx, transcript_end) + _normalize_token(transcription[j].word) + for j in range(start_idx, transcript_end) ] # Match phrase words sequentially against transcript window @@ -261,7 +284,7 @@ def _fuzzy_match_ratio( last_match_end_offset = 0 for phrase_word in phrase_words[:words_to_check]: - normalized = _normalize_text(phrase_word) + normalized = _normalize_token(phrase_word) if len(normalized) < 2: continue # skip very short words (a, I, etc.) - don't count them words_checked += 1 @@ -303,8 +326,12 @@ def _find_phrase_timestamp( (-1, -1.0, 0.0, -1) if not found. word_index points to the first matched word. match_end_idx points past the last matched word. """ - phrase_normalized = _normalize_text(phrase) - phrase_words = phrase_normalized.split() + # Normalize each word individually — same method as transcript tokens. + # This keeps contractions as single tokens ("haven't" stays "haven't") so + # phrase and transcript word counts stay in sync. Using _normalize_text on + # the whole phrase would expand "haven't" → "have not" (2 words), creating + # a phantom "not" that fails to match the transcript and corrupts the window. + phrase_words = [tok for tok in (_normalize_token(w) for w in phrase.split()) if tok] if not phrase_words: return -1, -1.0, 0.0, -1 @@ -504,7 +531,9 @@ def build_render_plan( cached_files: set[str] = set() narration_videos: list[tuple[str, VideoSource, CutoutDefinition]] = [] - video_path, is_cached = _resolve_video_path(videos_dir, narration_video, shared_assets_dir, project_path) + video_path, is_cached = _resolve_video_path( + videos_dir, narration_video, shared_assets_dir, project_path + ) if is_cached: cached_files.add(narration_video_id) full_duration = get_video_duration(video_path) @@ -798,40 +827,127 @@ def _extract_video_events( ] ) - # Collect video markers - video_markers: list[tuple[float, str, str]] = [] # (time, video_id, type) + # Mapping from shorthand marker prefix → (implied_cutout_name, implied_layer) + # These are the defaults; videos.json values act as a base but the marker wins. + _SHORTHAND: dict[str, tuple[str, str]] = { + "vft:": ("fullscreen", "above"), + "vfb:": ("fullscreen", "below"), + "vst:": ("square", "above"), + "vsb:": ("square", "below"), + "vftp:": ("fullscreen", "above", "pause_narration"), + "vfbp:": ("fullscreen", "below", "pause_narration"), + "vstp:": ("square", "above", "pause_narration"), + "vsbp:": ("square", "below", "pause_narration"), + } + + # Collect video markers: (time, video_id, event_type, cutout_name_override, layer_override) + # event_type is "video" (ends at next slide) or "narration" (runs to end) + video_markers: list[tuple[float, str, str, str | None, str | None]] = [] + for timing in marker_timings: if timing.timestamp < 0: continue - if timing.marker_id.startswith("video:"): - video_id = timing.marker_id[6:] - if video_id in videos: - video_source = videos[video_id] - if video_source.cutout and video_source.cutout in cutouts: - video_markers.append((timing.timestamp, video_id, "video")) + mid = timing.marker_id - elif timing.marker_id.startswith("narration:"): - video_id = timing.marker_id[10:] - if video_id in videos: - video_source = videos[video_id] - if video_source.cutout and video_source.cutout in cutouts: - video_markers.append((timing.timestamp, video_id, "narration")) + # --- shorthand markers: vft/vfb/vst/vsb --- + shorthand_match = next((p for p in _SHORTHAND if mid.startswith(p)), None) + if shorthand_match: + video_id = mid[len(shorthand_match) :] + if video_id not in videos: + raise ValueError( + f"Marker [{mid}] references unknown video '{video_id}'. " + f"Add it to videos.json or remove the marker." + ) + implied_cutout, implied_layer = _SHORTHAND[shorthand_match] + if implied_cutout not in cutouts: + raise ValueError( + f"Marker [{mid}] uses shorthand '{shorthand_match}' which requires " + f"cutout '{implied_cutout}' but it is not defined in project config. " + f"Available cutouts: {list(cutouts.keys())}" + ) + video_markers.append( + (timing.timestamp, video_id, "video", implied_cutout, implied_layer) + ) + continue + + # --- legacy [video:xxx] --- + if mid.startswith("video:"): + video_id = mid[6:] + if video_id not in videos: + raise ValueError( + f"Marker [video:{video_id}] references unknown video '{video_id}'. " + f"Add it to videos.json or remove the marker." + ) + video_source = videos[video_id] + if not video_source.cutout: + raise ValueError( + f"Marker [video:{video_id}] — video '{video_id}' has no 'cutout' set in videos.json." + ) + if video_source.cutout not in cutouts: + raise ValueError( + f"Marker [video:{video_id}] — cutout '{video_source.cutout}' is not defined in project config. " + f"Available: {list(cutouts.keys())}" + ) + video_markers.append( + (timing.timestamp, video_id, "video", None, None) + ) + continue + + # --- [narration:xxx] --- + if mid.startswith("narration:"): + video_id = mid[10:] + if video_id not in videos: + raise ValueError( + f"Marker [narration:{video_id}] references unknown video '{video_id}'. " + f"Add it to videos.json or remove the marker." + ) + video_source = videos[video_id] + if not video_source.cutout: + raise ValueError( + f"Marker [narration:{video_id}] — video '{video_id}' has no 'cutout' set in videos.json." + ) + if video_source.cutout not in cutouts: + raise ValueError( + f"Marker [narration:{video_id}] — cutout '{video_source.cutout}' is not defined in project config. " + f"Available: {list(cutouts.keys())}" + ) + video_markers.append( + (timing.timestamp, video_id, "narration", None, None) + ) events: list[VideoEvent] = [] - for start_time, video_id, marker_type in video_markers: + for ( + start_time, + video_id, + marker_type, + cutout_override, + layer_override, + ) in video_markers: video_source = videos[video_id] - cutout = cutouts[video_source.cutout] - if marker_type == "video": - # End at next slide + # Resolve cutout: marker override > videos.json cutout + # (validation already ensured cutout exists — this is a safety assertion) + cutout_name = cutout_override or video_source.cutout + cutout = cutouts[cutout_name] + + # Resolve layer: marker override > videos.json layer + layer = layer_override if layer_override is not None else video_source.layer + + end_on = video_source.end_on + if end_on == "take" and video_source.take is not None: + end_time = start_time + video_source.take + elif end_on == "end": + end_time = total_duration + elif end_on == "next_slide" or (end_on is None and marker_type == "video"): + # End at next slide marker end_time = total_duration for slide_time in slide_times: if slide_time > start_time: end_time = slide_time break else: - # narration: runs to end + # end_on is None and marker_type == "narration": runs to end end_time = total_duration # Filter by time range @@ -846,6 +962,8 @@ def _extract_video_events( end_time=end_time, video_source=video_source, cutout=cutout, + cutout_name=cutout_name, + layer=layer, ) ) @@ -992,7 +1110,9 @@ def _extract_outro_events( video_source = videos[video_id] # Get the video duration - video_path, is_cached = _resolve_video_path(videos_dir, video_source, shared_assets_dir, project_path) + video_path, is_cached = _resolve_video_path( + videos_dir, video_source, shared_assets_dir, project_path + ) if is_cached and cached_files is not None: cached_files.add(video_id) if video_path.exists(): diff --git a/gnommo/validator.py b/gnommo/validator.py index ba9e4dc..c4dfe1f 100644 --- a/gnommo/validator.py +++ b/gnommo/validator.py @@ -57,16 +57,26 @@ def validate_project( # Skip audio markers (start with 'A' followed by audio id, e.g., Awoosh) if marker.startswith("A") and len(marker) > 1 and marker[1:].isalnum(): continue - # Validate video trigger markers (video:xxx) - slide-like videos - if marker.startswith("video:"): - video_id = marker[6:] # Remove 'video:' prefix + # Validate video trigger markers — both legacy [video:xxx] and + # shorthand [vft:xxx] / [vfb:xxx] / [vst:xxx] / [vsb:xxx]. + _VIDEO_PREFIXES = { + "video:": 6, + "vft:": 4, + "vfb:": 4, + "vst:": 4, + "vsb:": 4, + } + matched_prefix = next( + (p for p in _VIDEO_PREFIXES if marker.startswith(p)), None + ) + if matched_prefix is not None: + video_id = marker[_VIDEO_PREFIXES[matched_prefix] :] if video_id not in videos: - # Check if it's a file extension mismatch hint = "" if "." in video_id: base_name = video_id.rsplit(".", 1)[0] if base_name in videos: - hint = f" (Did you mean [video:{base_name}]? Don't include file extensions in markers)" + hint = f" (Did you mean [{matched_prefix}{base_name}]? Don't include file extensions in markers)" warnings.append( ValidationIssue( f"Video marker [{marker}] referenced in manuscript but '{video_id}' not defined in videos.json{hint} — using PlaceholderVideo instead", @@ -214,11 +224,12 @@ def validate_project( ) ) - # Check we have at least one video source - if not videos: + # Check videos.json exists (empty is fine — project may not need triggered videos) + if not (project_path / config.videos_path).exists(): issues.append( ValidationIssue( - "No video sources defined in videos.json", project_path / "videos.json" + "videos.json not found — run 'gnommo import' to create it", + project_path / "videos.json", ) ) diff --git a/transcode.sh b/transcode.sh index 7519cf5..81da9d4 100755 --- a/transcode.sh +++ b/transcode.sh @@ -8,13 +8,13 @@ # Options: # --replace Delete original files after successful transcoding # --dry-run Show what would be transcoded without doing it -# --crf Quality level (default: 23, lower=better quality, 18-28 typical) +# --crf Quality level (default: 20, lower=better quality, 18-28 typical) # set -e # Configuration -DEFAULT_CRF=23 +DEFAULT_CRF=18 EXTENSIONS=("mov" "mp4" "m4v" "avi" "mkv" "mxf") usage() { @@ -44,7 +44,7 @@ Examples: $(basename "$0") ./media/videos # Transcode folder (smallest first) $(basename "$0") ./media/videos --dry-run # Preview only $(basename "$0") ./media/videos --replace # Transcode and delete originals - $(basename "$0") ./media/videos --crf 20 # Higher quality + $(basename "$0") ./media/videos --crf 18 # Higher quality EOF exit 0