From b9b5a8e77d9f4e21e17769fef0a1a8c3a698ab69 Mon Sep 17 00:00:00 2001 From: jenstandstad Date: Sun, 7 Jun 2026 11:19:19 +0200 Subject: [PATCH] Adding pexels downloader and fixes --- GlitchTrailer/project.json | 4 +- all.sh | 2 +- example/project.json | 1 + gnommo/cache.py | 14 +- gnommo/cli.py | 357 +++++++++++++++++++++++++++++++------ gnommo/models.py | 14 +- gnommo/parser.py | 7 +- gnommo/pexels.py | 312 ++++++++++++++++++++++++++++++++ gnommo/preprocessor.py | 25 +++ gnommo/renderer.py | 131 +++++++++++--- gnommo/transformer.py | 154 ++++++++++++---- gnommo/validator.py | 82 +++++++-- 12 files changed, 957 insertions(+), 146 deletions(-) create mode 100644 gnommo/pexels.py diff --git a/GlitchTrailer/project.json b/GlitchTrailer/project.json index 7f1a339..230cdd4 100644 --- a/GlitchTrailer/project.json +++ b/GlitchTrailer/project.json @@ -7,13 +7,14 @@ "platform_targets": ["youtube"], "status": "scripted", "youtube_url": null, - "resolution": [1960, 1080], + "resolution": [1920, 1080], "fps": 30, "duration_seconds": null, "default_filters": { "audioonly": [ { "type": "audio_normalize", +"enable":false, "compress": false, "normalize": true, "target_lufs": -14, @@ -24,6 +25,7 @@ "talkinghead": [ { "type": "audio_normalize", +"enable":false, "normalize": true, "target_lufs": -14, "target_lra": 11, diff --git a/all.sh b/all.sh index 1d49795..38e81d9 100755 --- a/all.sh +++ b/all.sh @@ -3,7 +3,7 @@ ./gnommo.sh -p video1 all --force --prod ./gnommo.sh -p video2 all --force --prod ./gnommo.sh -p video3 all --force --prod -#./gnommo.sh -p video4 all --force +./gnommo.sh -p video4 all --force --prod #./gnommo.sh -p video5 all --force #./gnommo.sh -p video6 all --force diff --git a/example/project.json b/example/project.json index 79b79fb..d6ffc84 100644 --- a/example/project.json +++ b/example/project.json @@ -18,6 +18,7 @@ "talkinghead": [ { "type": "audio_normalize", +"enable":false, "eq_bands": [ {"freq": 47, "gain": -15, "type": "lowshelf"}, {"freq": 107, "gain": -1.3, "q": 1.2}, diff --git a/gnommo/cache.py b/gnommo/cache.py index 9999b3a..84e4d9b 100644 --- a/gnommo/cache.py +++ b/gnommo/cache.py @@ -129,14 +129,24 @@ def resolve_with_cache( if cache_base is None: return local_path, False # No cache configured - # Build cache path: {cache_base}/{project_name}/{relative_path} + # Try 1: path inside the project → cache_base / project_name / relative try: relative = local_path.relative_to(project_path) cache_path = cache_base / project_path.name / relative if cache_path.exists(): return cache_path, True except ValueError: - pass # local_path is not relative to project_path + pass # local_path is not under project_path + + # Try 2: path relative to gnommo root (sibling dirs like shared_assets) + # e.g. shared_assets/pexels/file.mp4 → cache_base / shared_assets / pexels / file.mp4 + try: + relative = local_path.relative_to(project_path.parent) + cache_path = cache_base / relative + if cache_path.exists(): + return cache_path, True + except ValueError: + pass # local_path is not under project_path.parent either return local_path, False diff --git a/gnommo/cli.py b/gnommo/cli.py index 6fa9a95..5a5f31b 100644 --- a/gnommo/cli.py +++ b/gnommo/cli.py @@ -106,6 +106,7 @@ Examples: "pull", "handoff", "transcode", + "pexels", ], help="Action to perform (default: render)", ) @@ -310,6 +311,8 @@ Examples: return cmd_handoff( project_path, args.verbose, args.file, args.prod, args.res ) + elif action == "pexels": + return cmd_pexels(project_path, args.verbose) except GnommoError as e: print(f"Error: {e}", file=sys.stderr) @@ -362,7 +365,7 @@ def cmd_import(project_path: Path, force: bool, verbose: bool) -> int: keynote_file = keynote_files[0] # Use first .key file found if len(keynote_files) > 1: print(f" Warning: Multiple .key files found, using {keynote_file.name}") - _import_presenter_notes(project_path, keynote_file, verbose) + _import_presenter_notes(project_path, keynote_file, verbose, config) # Generate slides.json for each slide directory (after Keynote export) slides_base = project_path / "media" / "slides" @@ -391,6 +394,42 @@ def cmd_import(project_path: Path, force: bool, verbose: bool) -> int: # Probe and cache video metadata (duration, has_audio) into videos.json _probe_video_metadata(project_path, config, shared_assets_dir, force, verbose) + # ETL: if a manuscript exists, project shorthand marker semantics (cutout/layer) + # into videos.json so the render stage is always data-driven from the manuscript. + # Run AFTER sync so newly-added shared videos are already present when we write + # their cutout/layer. Also warn about any referenced video that is still missing. + manuscript_path = project_path / "manuscript.txt" + if manuscript_path.exists() and config: + from .parser import parse_manuscript + from .transformer import _SHORTHAND_PREFIXES + + _, markers, _, _ = parse_manuscript(project_path) + if markers: + _project_markers_to_videos( + markers, + project_path / config.videos_path, + config, + project_path, + ) + + # Warn about shorthand-referenced videos still absent from videos.json + videos_json_path = project_path / config.videos_path + local_vids: dict = ( + _read_json(videos_json_path) if videos_json_path.exists() else {} + ) + seen_missing: set[str] = set() + for marker in markers: + for prefix in _SHORTHAND_PREFIXES: + if marker.startswith(prefix): + vid_id = marker[len(prefix):] + if vid_id not in local_vids and vid_id not in seen_missing: + print( + f" ⚠ [{marker}] video '{vid_id}' not found in " + f"videos.json or shared_assets — add it manually" + ) + seen_missing.add(vid_id) + break + print("Import complete.") return 0 @@ -729,33 +768,47 @@ def _import_shared_assets(shared_assets_dir: Path, verbose: bool) -> None: """ video_extensions = {".mov", ".mp4", ".webm", ".avi", ".mkv", ".m4v"} - # Find all video files in shared_assets (root level and subdirectories) + # Find all video files in shared_assets (root level and subdirectories). + # Also scan the GnommoDisk cache mirror so files placed there are registered. + from .cache import load_cache_config + + scan_roots: list[Path] = [shared_assets_dir] + cache_base = load_cache_config() + if cache_base: + cache_shared = cache_base / "shared_assets" + if cache_shared.exists() and cache_shared != shared_assets_dir: + scan_roots.append(cache_shared) + video_files: list[tuple[Path, Path]] = [] # (relative_path, absolute_path) + seen_rel: set[str] = set() # deduplicate by relative path - for item in shared_assets_dir.iterdir(): - if item.name.startswith("."): - continue + for scan_root in scan_roots: + for item in scan_root.iterdir(): + if item.name.startswith("."): + continue - if item.is_file(): - # Video file directly in shared_assets root - if ( - item.suffix.lower() in video_extensions - and not item.name.endswith("_processed.mov") - and not item.name.endswith("_processed.webm") - ): - rel_path = item.relative_to(shared_assets_dir) - video_files.append((rel_path, item)) - elif item.is_dir(): - # Scan subdirectories recursively - for video_file in item.rglob("*"): + if item.is_file(): if ( - video_file.is_file() - and video_file.suffix.lower() in video_extensions - and not video_file.name.endswith("_processed.mov") - and not video_file.name.endswith("_processed.webm") + item.suffix.lower() in video_extensions + and not item.name.endswith("_processed.mov") + and not item.name.endswith("_processed.webm") ): - rel_path = video_file.relative_to(shared_assets_dir) - video_files.append((rel_path, video_file)) + rel_path = item.relative_to(scan_root) + if str(rel_path) not in seen_rel: + seen_rel.add(str(rel_path)) + video_files.append((rel_path, item)) + elif item.is_dir(): + for video_file in item.rglob("*"): + if ( + video_file.is_file() + and video_file.suffix.lower() in video_extensions + and not video_file.name.endswith("_processed.mov") + and not video_file.name.endswith("_processed.webm") + ): + rel_path = video_file.relative_to(scan_root) + if str(rel_path) not in seen_rel: + seen_rel.add(str(rel_path)) + video_files.append((rel_path, video_file)) if not video_files: if verbose: @@ -1049,11 +1102,36 @@ def _import_narration_segments(narration_dir: Path, config, verbose: bool) -> No print(f" No new narration segments to add") +def _write_youtube_meta( + project_path: Path, config, citations: list[str] +) -> None: + """Write youtube_meta.txt with project description and collected citations.""" + meta_path = project_path / "youtube_meta.txt" + lines: list[str] = [] + + if config and config.description: + lines.append("== Description ==") + lines.append(config.description) + lines.append("") + + if citations: + lines.append("== References ==") + for i, cite in enumerate(citations, 1): + lines.append(f"{i}. {cite}") + lines.append("") + + meta_path.write_text("\n".join(lines), encoding="utf-8") + print(f" Wrote {meta_path.name} ({len(citations)} reference(s))") + + def _import_presenter_notes( - project_path: Path, keynote_file: Path, verbose: bool + project_path: Path, keynote_file: Path, verbose: bool, config=None ) -> None: """Extract presenter notes from Keynote and write to manuscript.txt. + [cite:...] markers are stripped from the manuscript and collected into + youtube_meta.txt alongside the project description. + Uses the JXA script (extract_keynote_notes.js) to extract notes via osascript. Also exports slides as PNG images to media/slides/{project_name}/. Backs up existing manuscript.txt before overwriting. @@ -1116,21 +1194,44 @@ def _import_presenter_notes( print(f" Error parsing notes JSON: {e}", file=sys.stderr) return - # Convert to manuscript.txt format + # Convert to manuscript.txt format, stripping [cite:...] markers + _CITE_RE = re.compile(r"\[cite:([^\]]+)\]") lines = [] + citations: list[str] = [] + seen_citations: set[str] = set() + for item in notes_data: idx = item.get("slide_index") notes = (item.get("notes") or "").rstrip() lines.append(f"[S{idx}]") if notes: - lines.append(notes) + clean_note_lines = [] + for note_line in notes.splitlines(): + for m in _CITE_RE.finditer(note_line): + cite_text = m.group(1).strip() + if cite_text not in seen_citations: + citations.append(cite_text) + seen_citations.add(cite_text) + cleaned = _CITE_RE.sub("", note_line).strip() + if cleaned: + clean_note_lines.append(cleaned) + if clean_note_lines: + lines.append("\n".join(clean_note_lines)) lines.append("") # blank line between slides - # Write manuscript.txt - manuscript_path.write_text("\n".join(lines).rstrip() + "\n", encoding="utf-8") + # Write manuscript.txt with Unix line endings (Keynote notes may contain \r\n or \r) + content = "\n".join(lines).rstrip() + "\n" + content = content.replace("\r\n", "\n").replace("\r", "\n") + manuscript_path.write_text(content, encoding="utf-8") print(f" Wrote {manuscript_path} ({len(notes_data)} slides)") + # Write youtube_meta.txt with description + collected citations + _write_youtube_meta(project_path, config, citations) + if citations and verbose: + for i, cite in enumerate(citations, 1): + print(f" {i}. {cite}") + if verbose: non_empty = sum(1 for item in notes_data if item.get("notes")) print(f" {non_empty} slides have presenter notes") @@ -1221,6 +1322,71 @@ def _write_tasks_file( ) +# ============================================================================= +# Pexels Download Command +# ============================================================================= + + +def cmd_pexels(project_path: Path, verbose: bool) -> int: + """Download missing Pexels videos and enrich metadata for existing ones.""" + from .parser import parse_manuscript, parse_project_config, parse_videos + from .pexels import ( + get_pexels_api_key, + find_missing_pexels_videos, + download_video, + update_videos_json, + enrich_missing_descriptions, + ) + + api_key = get_pexels_api_key() + if not api_key: + print( + "Error: Pexels API key not configured.\n" + "Add to ~/.gnommo.conf:\n" + " [pexels]\n" + " api_key = YOUR_KEY_HERE\n" + "Get a free key at https://www.pexels.com/api/", + file=sys.stderr, + ) + return 1 + + config = parse_project_config(project_path) + _, markers, _, _ = parse_manuscript(project_path) + videos, _ = parse_videos(project_path, config) + + shared_assets_dir = _find_shared_assets(project_path) + if not shared_assets_dir: + print("Error: shared_assets directory not found.", file=sys.stderr) + return 1 + + local_videos_json = project_path / config.videos_path + shared_videos_json = shared_assets_dir / "videos.json" + + # 1. Download missing files + missing = find_missing_pexels_videos(markers, videos, shared_assets_dir) + failed = 0 + if missing: + print(f"Downloading {len(missing)} missing Pexels video(s)...") + for video_id, source_file in missing: + meta = download_video(source_file, shared_assets_dir, api_key) + if meta is None: + failed += 1 + continue + for json_path in (local_videos_json, shared_videos_json): + update_videos_json(json_path, video_id, meta) + if failed: + print(f"\n {failed}/{len(missing)} download(s) failed.") + else: + print(f"\n {len(missing)} video(s) downloaded.") + else: + print("No missing Pexels videos.") + + # 2. Enrich descriptions for existing files that have none + enrich_missing_descriptions(shared_assets_dir, api_key) + + return 1 if failed else 0 + + # ============================================================================= # Validate Command # ============================================================================= @@ -1283,6 +1449,35 @@ def _resolve_process_cache(project_path: Path, config) -> Optional[Path]: return p / project_path.name +def _narration_combined_hint(project_path: Path, config) -> str: + """Return a helpful hint when narration_combined.mov cannot be found. + + If external storage is configured but the volume isn't mounted, the stitch + command wouldn't help — the disk is just not connected. + """ + from .cache import load_cache_config + + missing_paths = [] + + cache_base = load_cache_config() + if cache_base is not None and not cache_base.exists(): + missing_paths.append(cache_base) + + if config and config.process_cache: + pc = Path(config.process_cache) + if not pc.is_absolute(): + pc = (project_path / pc).resolve() + if not pc.exists(): + missing_paths.append(pc) + + if missing_paths: + return ( + f"External disk not connected (expected at {missing_paths[0]}).\n" + "Connect the disk and try again." + ) + return "Run 'gnommo -p stitch' first." + + def _resolve_narration_combined( project_path: Path, videos_dir: Path, config ) -> Optional[Path]: @@ -2143,8 +2338,8 @@ def cmd_stitch( videos_dir_out.mkdir(parents=True, exist_ok=True) print(f" Using {res} dirs: {narration_dir}, {videos_dir_out}") - # Get segment IDs in sorted order - segment_ids = sorted(narration.keys()) + # Get segment IDs in natural order (Segment2 before Segment10) + segment_ids = sorted(narration.keys(), key=lambda s: [int(t) if t.isdigit() else t.lower() for t in re.split(r'(\d+)', s)]) # Show what we're stitching print(f"\n Segments ({len(segment_ids)}):") @@ -2442,7 +2637,7 @@ def _parse_slide_range(slides_arg: str) -> tuple[str, Optional[str]]: def _project_markers_to_videos( - markers: list[str], videos_json_path: Path, config + markers: list[str], videos_json_path: Path, config, project_path: Path = None ) -> None: """ETL: project shorthand marker semantics into videos.json. @@ -2451,6 +2646,9 @@ def _project_markers_to_videos( and layer values directly into videos.json. This runs before parse_videos so the render pass reads already-projected data and needs no shorthand logic. + Videos may live in the project's local videos.json or in shared_assets/videos.json. + Both files are updated so the render pass always finds the projected values. + The manuscript is the authoritative source: the LAST shorthand reference to a given video_id wins, matching what a human editor would expect when they change a marker near the end of the script. @@ -2460,36 +2658,75 @@ def _project_markers_to_videos( from .transformer import _SHORTHAND_PREFIXES # (cutout, layer) lookup table - # Build projection: video_id → {cutout, layer} + _PAUSE_PREFIXES = { + "vftp:", "vfbp:", "vfmp:", + "vf2tp:", "vf2bp:", "vf2mp:", + "vstp:", "vsbp:", "vsmp:", + } + + # Build projection: video_id → {cutout, layer, auto_pause_narration} + # auto_pause_narration=True means: write pause_narration=duration if not already set. projection: dict[str, dict] = {} for marker in markers: for prefix, implied in _SHORTHAND_PREFIXES.items(): if marker.startswith(prefix): - video_id = marker[len(prefix) :] + video_id = marker[len(prefix):] cutout, layer = implied[0], implied[1] - projection[video_id] = {"cutout": cutout, "layer": layer} + projection[video_id] = { + "cutout": cutout, + "layer": layer, + "_auto_pause": prefix in _PAUSE_PREFIXES, + } break if not projection: return - with open(videos_json_path, "r", encoding="utf-8") as f: - raw = json.load(f) + def _apply_projection(json_path: Path) -> list[str]: + """Apply projection to one videos.json file; return list of updated IDs.""" + if not json_path.exists(): + return [] + with open(json_path, "r", encoding="utf-8") as f: + raw = json.load(f) + changed = False + updated = [] + for video_id, fields in projection.items(): + if video_id not in raw: + continue + entry = raw[video_id] + video_changed = False + for field, value in fields.items(): + if field == "_auto_pause": + # Write pause_narration = duration only when: + # - marker is a pause-prefix (value is True) + # - pause_narration not already set (preserve manual overrides) + # - duration is known (probed by import) + if value and not entry.get("pause_narration") and entry.get("duration"): + entry["pause_narration"] = entry["duration"] + changed = True + video_changed = True + elif entry.get(field) != value: + entry[field] = value + changed = True + video_changed = True + if video_changed: + updated.append(video_id) + if changed: + with open(json_path, "w", encoding="utf-8") as f: + json.dump(raw, f, indent=2, ensure_ascii=False) + return updated - changed = False - for video_id, fields in projection.items(): - if video_id not in raw: - continue - for field, value in fields.items(): - if raw[video_id].get(field) != value: - raw[video_id][field] = value - changed = True + updated_local = _apply_projection(videos_json_path) + if updated_local: + print(f" Projected marker semantics → videos.json: {', '.join(updated_local)}") - if changed: - with open(videos_json_path, "w", encoding="utf-8") as f: - json.dump(raw, f, indent=2, ensure_ascii=False) - updated = [vid for vid in projection if vid in raw] - print(f" Projected marker semantics → videos.json: {', '.join(updated)}") + # Also project into shared_assets/videos.json for pexels/library videos + shared_assets_dir = _find_shared_assets(project_path) if project_path else None + if shared_assets_dir: + shared_videos_json = shared_assets_dir / "videos.json" + updated_shared = _apply_projection(shared_videos_json) + if updated_shared: + print(f" Projected marker semantics → shared_assets/videos.json: {', '.join(updated_shared)}") def _writeback_video_metadata(plan, project_path, config) -> None: @@ -2696,7 +2933,7 @@ def cmd_render( # ETL: project shorthand marker semantics (cutout/layer) into videos.json # before parse_videos reads it, so the render pass is purely data-driven. - _project_markers_to_videos(markers, project_path / config.videos_path, config) + _project_markers_to_videos(markers, project_path / config.videos_path, config, project_path) # Override resolution for preview modes if res != "full": @@ -2705,6 +2942,7 @@ def cmd_render( slides = parse_slides(project_path, config) videos, videos_dir = parse_videos(project_path, config) + source_videos_dir = videos_dir # keep original for validation (pre-downscale) # Non-full res: use downscaled video directory, create on-the-fly if needed if res != "full": @@ -2807,6 +3045,12 @@ def cmd_render( else: transcript_path = project_path / "transcript.json" + # If project.json specifies a transcript path, prefer it (always local) + if config.transcript_path: + local_transcript = project_path / config.transcript_path + if local_transcript.exists(): + transcript_path = local_transcript + # Try cache fallback for transcript transcript_path, _ = resolve_with_cache(transcript_path, project_path) if not transcript_path.exists(): @@ -2825,7 +3069,7 @@ def cmd_render( # Stage 2: Validate print("\n[2/4] Validating...") warnings = validate_project( - project_path, markers, config, slides, videos, videos_dir, malformed + project_path, markers, config, slides, videos, source_videos_dir, malformed ) for w in warnings: print(f" Warning: {w}") @@ -3061,7 +3305,12 @@ def cmd_transcribe( words = transcribe_video(video_path, model="base") - output_path = video_path.with_suffix(".transcript.json") + # Save to project-local path if configured in project.json (keeps transcript off external drives) + if config.transcript_path: + output_path = project_path / config.transcript_path + output_path.parent.mkdir(parents=True, exist_ok=True) + else: + output_path = video_path.with_suffix(".transcript.json") save_transcript(words, output_path) print(f" - Transcribed {len(words)} words") @@ -3819,7 +4068,7 @@ def cmd_extract_audio( f"Error: narration_combined.mov not found at {combined_path}", file=sys.stderr, ) - print("Run 'gnommo -p stitch' first.", file=sys.stderr) + print(_narration_combined_hint(project_path, config), file=sys.stderr) return 1 # Output to project out/ directory @@ -3985,7 +4234,7 @@ def cmd_master( f"Error: narration_combined.mov not found at {combined_path}", file=sys.stderr, ) - print("Run 'gnommo -p stitch' first.", file=sys.stderr) + print(_narration_combined_hint(project_path, config), file=sys.stderr) return 1 # Output directory diff --git a/gnommo/models.py b/gnommo/models.py index e90063d..a7c3495 100644 --- a/gnommo/models.py +++ b/gnommo/models.py @@ -49,6 +49,7 @@ class ProjectConfig: slides_path: str = "slides.json" # path to slides.json relative to project videos_path: str = "videos.json" # path to videos.json relative to project audio_path: str = "audio.json" # path to audio.json relative to project + transcript_path: Optional[str] = None # path to transcript.json relative to project (always saved locally) audio_source: Optional[str] = None # defaults to talking head main_video: Optional[ Union[str, list] @@ -135,6 +136,15 @@ class GnommoKeyConfig: # Can help with edge color contamination alpha_bias: tuple[int, int, int] = None + # Luminance protection: pixels with luma above this stay fully opaque (0-255, -1 = off) + # Use ~220 to protect white objects (headphones, teeth) from being partially keyed. + protect_luma: int = -1 + + # Shadow boost: extra key strength for dark pixels (0.0-5.0, 0 = off) + # Ramps up key signal proportionally to how dark a pixel is, helping key dark greens + # without affecting bright foreground areas. Values 1.0-2.0 are typical. + shadow_boost: float = 0.0 + # Edge refinement edge_erode: int = 0 # Pixels to erode from alpha edge (0-5) edge_soften: float = 0.0 # Blur the alpha edge (0-5 pixels) @@ -195,7 +205,7 @@ class AudioNormalizeConfig: Applies noise reduction, compression, and loudness normalization to improve audio quality and consistency. """ - + enabled: bool = True # Master switch to enable/disable all audio processing # Parametric EQ bands (applied before other processing) eq_bands: list[EQBand] = field(default_factory=list) @@ -300,7 +310,7 @@ class VideoSource: False # If True, skip loudnorm during preprocessing (apply after concatenation) ) volume: float = 1.0 # Volume multiplier (1.0=full, >1.0=boost, <1.0=reduce) - layer: str = "above" # "above" = renders on top of slides; "below" = behind slides + layer: str = "above" # "above" = on top of slides; "mid" = above narrator/below slides; "below" = behind narrator duration: Optional[ float ] = None # Pre-probed file duration in seconds (set by import) diff --git a/gnommo/parser.py b/gnommo/parser.py index d49944a..1951b5a 100644 --- a/gnommo/parser.py +++ b/gnommo/parser.py @@ -84,9 +84,9 @@ def parse_manuscript( text = re.sub(r"\[marker:[^\]]+\]", "", text) text = re.sub(r"\[cue:[^\]]+\]", "", text) - # Extract all valid markers like [S1], [video:demo], [Zoom2], etc. - # Include . in pattern to catch markers with file extensions (so validator can warn about them) - markers = re.findall(r"\[([A-Za-z0-9_:.]+)\]", text) + # Extract all valid markers like [S1], [video:demo], [vf2m:pexels/clip-name], etc. + # Include / and - to capture pexels/library video IDs; . to catch file extensions in markers. + markers = re.findall(r"\[([A-Za-z0-9_:./\-]+)\]", text) # Find malformed markers (missing brackets, extra spaces, etc.) malformed: list[tuple[int, str]] = [] @@ -258,6 +258,7 @@ def parse_project_config(project_path: Path) -> ProjectConfig: slides_path=data.get("slides", "slides.json"), videos_path=data.get("videos", "videos.json"), audio_path=data.get("audio", "audio.json"), + transcript_path=data.get("transcript"), audio_source=data.get("audio_source"), main_video=data.get("main_video"), process_cache=data.get("process_cache"), diff --git a/gnommo/pexels.py b/gnommo/pexels.py new file mode 100644 index 0000000..f4eb02f --- /dev/null +++ b/gnommo/pexels.py @@ -0,0 +1,312 @@ +"""Pexels video downloader for gnommo shared_assets. + +Configure API key in ~/.gnommo.conf: + + [pexels] + api_key = YOUR_KEY_HERE + +Get a free key at https://www.pexels.com/api/ +""" + +import configparser +import json +import re +import sys +import urllib.error +import urllib.request +from pathlib import Path +from typing import Optional + + +def get_pexels_api_key() -> Optional[str]: + config_path = Path.home() / ".gnommo.conf" + if not config_path.exists(): + return None + cfg = configparser.ConfigParser() + cfg.read(config_path) + return cfg.get("pexels", "api_key", fallback=None) + + +def extract_pexels_id(source_file: str) -> Optional[str]: + """Extract the numeric Pexels video ID from a source_file path. + + Handles names like 'pexels/11868263-hd_1920_1080_24fps.mp4' + and 'pexels/12136677_1080_1920_30fps.mp4'. + """ + name = Path(source_file).stem.split("/")[-1] + m = re.match(r"^(\d+)", name) + return m.group(1) if m else None + + +def _fetch_video_info(pexels_id: str, api_key: str) -> Optional[dict]: + url = f"https://api.pexels.com/videos/videos/{pexels_id}" + req = urllib.request.Request( + url, + headers={"Authorization": api_key, "User-Agent": "Mozilla/5.0 gnommo/1.0"}, + ) + try: + with urllib.request.urlopen(req, timeout=15) as resp: + return json.loads(resp.read()) + except urllib.error.HTTPError as e: + print(f" [{pexels_id}] Pexels API error {e.code} — video may have been deleted", flush=True) + return None + except Exception as e: + print(f" [{pexels_id}] Pexels API error: {e}", flush=True) + return None + + +def description_from_url(video_url: str) -> str: + """Extract human-readable description from a Pexels video URL slug. + + 'https://www.pexels.com/video/abstract-television-noise-11868263/' + → 'Abstract Television Noise' + """ + m = re.search(r"/video/([a-z0-9][a-z0-9-]+?)-\d+/?$", video_url) + if m: + return m.group(1).replace("-", " ").title() + return "" + + +def _pick_best_video_file(video_files: list, source_file: str) -> Optional[dict]: + """Select the video_files entry that best matches the hints in source_file.""" + stem = Path(source_file).stem.split("/")[-1] + + width_hint = height_hint = fps_hint = quality_hint = None + m = re.search(r"[_-](\d{3,4})[_-](\d{3,4})[_-](\d+)fps", stem) + if m: + width_hint = int(m.group(1)) + height_hint = int(m.group(2)) + fps_hint = int(m.group(3)) + for q in ("uhd", "hd", "sd"): + if q in stem.lower(): + quality_hint = q + break + + mp4s = [f for f in video_files if f.get("file_type") == "video/mp4"] + if not mp4s: + mp4s = video_files # fall back to any format + + def score(vf: dict) -> int: + s = 0 + if quality_hint and vf.get("quality", "").lower() == quality_hint: + s += 10 + if width_hint and vf.get("width") == width_hint: + s += 5 + if height_hint and vf.get("height") == height_hint: + s += 5 + if fps_hint and round(float(vf.get("fps") or 0)) == fps_hint: + s += 3 + return s + + return max(mp4s, key=score) + + +def download_video( + source_file: str, + shared_assets_dir: Path, + api_key: str, +) -> Optional[dict]: + """Download one Pexels video to shared_assets_dir/. + + Returns a metadata dict {description, duration, has_audio=False} on + success, or None on failure. + """ + pexels_id = extract_pexels_id(source_file) + if not pexels_id: + print(f" Cannot extract Pexels ID from: {source_file}", file=sys.stderr) + return None + + target_path = shared_assets_dir / source_file + target_path.parent.mkdir(parents=True, exist_ok=True) + + print(f" [{pexels_id}] Fetching video info...", flush=True) + info = _fetch_video_info(pexels_id, api_key) + if not info: + return None + + description = description_from_url(info.get("url", "")) + duration = float(info.get("duration") or 0) or None + + video_files = info.get("video_files", []) + if not video_files: + print(f" [{pexels_id}] No video files in API response", flush=True) + return None + + best = _pick_best_video_file(video_files, source_file) + if not best: + return None + + download_url = best["link"] + w, h, fps = best.get("width", "?"), best.get("height", "?"), best.get("fps", "?") + q = best.get("quality", "?") + label = f'"{description}" — ' if description else "" + print(f" [{pexels_id}] {label}{q} {w}x{h} @ {fps}fps", flush=True) + print(f" → {target_path}", flush=True) + + try: + req = urllib.request.Request( + download_url, headers={"User-Agent": "Mozilla/5.0 gnommo/1.0"} + ) + with urllib.request.urlopen(req, timeout=300) as resp: + total = int(resp.headers.get("Content-Length") or 0) + downloaded = 0 + chunks: list[bytes] = [] + chunk_size = 1024 * 512 # 512 KB + while True: + chunk = resp.read(chunk_size) + if not chunk: + break + chunks.append(chunk) + downloaded += len(chunk) + if total: + pct = downloaded * 100 // total + mb_done = downloaded / 1024 / 1024 + mb_total = total / 1024 / 1024 + print(f" {pct:3d}% {mb_done:.1f}/{mb_total:.1f} MB\r", end="", flush=True) + print(f" Done — {downloaded / 1024 / 1024:.1f} MB ", flush=True) + target_path.write_bytes(b"".join(chunks)) + except Exception as e: + print(f"\n Download failed: {e}", flush=True) + return None + + return { + "description": description, + "duration": duration, + "has_audio": False, # conservative; renderer probes when needed + } + + +def update_videos_json( + json_path: Path, + video_id: str, + metadata: dict, +) -> None: + """Write description (and other metadata) into an existing videos.json entry.""" + if not json_path.exists(): + return + with open(json_path, "r", encoding="utf-8") as f: + raw = json.load(f) + if video_id not in raw: + return + changed = False + for key, value in metadata.items(): + if value and raw[video_id].get(key) != value: + raw[video_id][key] = value + changed = True + if changed: + with open(json_path, "w", encoding="utf-8") as f: + json.dump(raw, f, indent=2, ensure_ascii=False) + + +def fetch_metadata(pexels_id: str, api_key: str) -> Optional[dict]: + """Fetch only description and duration for a Pexels video (no download).""" + info = _fetch_video_info(pexels_id, api_key) + if not info: + return None + return { + "description": description_from_url(info.get("url", "")), + "duration": float(info.get("duration") or 0) or None, + } + + +def enrich_missing_descriptions( + shared_assets_dir: Path, + api_key: str, +) -> int: + """Fetch descriptions from Pexels API for entries that have a file on disk but no description. + + Scans shared_assets/videos.json for pexels/* entries where: + - description is absent or empty + - source_file exists on disk (locally or via cache) + + Returns number of entries updated. + """ + from .cache import resolve_with_cache + + videos_json = shared_assets_dir / "videos.json" + if not videos_json.exists(): + return 0 + + with open(videos_json, "r", encoding="utf-8") as f: + raw = json.load(f) + + candidates = [ + (vid_id, entry) + for vid_id, entry in raw.items() + if vid_id.startswith("pexels/") and not entry.get("description") + ] + + # Filter to those whose file exists on disk + project_root = shared_assets_dir.parent + to_enrich = [] + for vid_id, entry in candidates: + sf = entry.get("source_file", "") + if not sf: + continue + path = shared_assets_dir / sf + resolved, _ = resolve_with_cache(path, project_root) + if resolved.exists(): + pexels_id = extract_pexels_id(sf) + if pexels_id: + to_enrich.append((vid_id, pexels_id)) + + if not to_enrich: + return 0 + + print(f" Enriching descriptions for {len(to_enrich)} existing pexels video(s)...", flush=True) + + updated = 0 + for vid_id, pexels_id in to_enrich: + meta = fetch_metadata(pexels_id, api_key) + if meta and meta.get("description"): + print(f" [{pexels_id}] \"{meta['description']}\"", flush=True) + update_videos_json(videos_json, vid_id, meta) + updated += 1 + else: + print(f" [{pexels_id}] not found or no description — skipped", flush=True) + + return updated + + +def find_missing_pexels_videos( + manuscript_markers: list[str], + videos: dict, + shared_assets_dir: Path, +) -> list[tuple[str, str]]: + """Return [(video_id, source_file)] for pexels videos referenced but not on disk.""" + from .cache import resolve_with_cache + + _VIDEO_PREFIXES = ( + "video:", "narration:", + "vft:", "vfb:", "vfm:", + "vf2t:", "vf2b:", "vf2m:", + "vst:", "vsb:", "vsm:", + "vftp:", "vfbp:", "vfmp:", + "vf2tp:", "vf2bp:", "vf2mp:", + "vstp:", "vsbp:", "vsmp:", + ) + + seen: set[str] = set() + missing: list[tuple[str, str]] = [] + + for marker in manuscript_markers: + prefix = next((p for p in _VIDEO_PREFIXES if marker.startswith(p)), None) + if prefix is None: + continue + video_id = marker[len(prefix):] + if video_id in seen or not video_id.startswith("pexels/"): + continue + seen.add(video_id) + + source_file = videos.get(video_id, None) + if source_file is None: + continue + sf = source_file.source_file if hasattr(source_file, "source_file") else source_file + + candidate = shared_assets_dir / sf + # resolve_with_cache needs a project_path — use shared_assets parent + resolved, _ = resolve_with_cache(candidate, shared_assets_dir.parent) + if not resolved.exists(): + missing.append((video_id, sf)) + + return missing diff --git a/gnommo/preprocessor.py b/gnommo/preprocessor.py index 5d6cd3f..2303619 100644 --- a/gnommo/preprocessor.py +++ b/gnommo/preprocessor.py @@ -656,6 +656,8 @@ def preprocess_video( batch_num = 0 for batch in filter_batches: first_filter_type = batch[0].get("type") + + if first_filter_type in VIDEO_FILTER_TYPES: # Combined video filter batch - use chunked processing for large files @@ -1065,6 +1067,14 @@ def build_gnommokey_filter(config: dict) -> str: scale_factor = gain * 2.5 key_expr = f"({key_expr})*{scale_factor:.3f}" + # Shadow boost: amplify key signal for dark pixels so dark greens key out fully. + # shadow_factor = 1 - luma/255 (high for dark pixels, 0 for bright pixels) + # extra multiplier = 1 + shadow_boost * shadow_factor + if cfg.shadow_boost > 0: + luma_expr = f"(0.299*r(X,Y)+0.587*g(X,Y)+0.114*b(X,Y))" + shadow_factor = f"(1-{luma_expr}/255)" + key_expr = f"({key_expr})*(1+{cfg.shadow_boost:.3f}*{shadow_factor})" + # Apply clip_black and clip_white to compress the matte # clip_black: key values below this become 0 (those pixels stay opaque) # clip_white: key values above this become 255 (fully transparent) @@ -1082,6 +1092,13 @@ def build_gnommokey_filter(config: dict) -> str: # Invert: high key value (green) = low alpha (transparent) alpha_expr = f"255-{key_expr}" + # Luminance protection: lock bright pixels to fully opaque so white objects + # (headphones, teeth) are never accidentally keyed or jitter. + # protect_luma=-1 disables this. Use ~220 for typical white protection. + if cfg.protect_luma >= 0: + luma_expr = f"(0.299*r(X,Y)+0.587*g(X,Y)+0.114*b(X,Y))" + alpha_expr = f"if(gt({luma_expr},{cfg.protect_luma}),255,{alpha_expr})" + # Build the geq filter for alpha (in RGBA mode) parts.append(f"geq=r='r(X,Y)':g='g(X,Y)':b='b(X,Y)':a='{alpha_expr}'") @@ -1195,6 +1212,8 @@ def parse_gnommokey_config(config: dict) -> GnommoKeyConfig: despill_bias=despill_bias, despill_strength=float(config.get("despill_strength", 0.5)), alpha_bias=alpha_bias, + protect_luma=int(config.get("protect_luma", -1)), + shadow_boost=float(config.get("shadow_boost", 0.0)), edge_erode=int(config.get("edge_erode", 0)), edge_soften=float(config.get("edge_soften", 0.0)), ) @@ -1959,7 +1978,12 @@ def apply_audio_normalize( channel_map -> eq_bands -> highpass -> lowpass -> room_eq -> dereverb -> denoise -> gate -> compress -> normalize """ cfg = parse_audio_normalize_config(config) + if not cfg.enabled: + # No audio processing, just copy + import shutil + shutil.copy2(input_path, output_path) + return # Build audio filter chain (order matters!) audio_filters: list[str] = [] @@ -2109,6 +2133,7 @@ def parse_audio_normalize_config(config: dict[str, Any]) -> AudioNormalizeConfig ) return AudioNormalizeConfig( + enabled=bool(config.get("enabled", True)), # Parametric EQ eq_bands=eq_bands, # Room treatment diff --git a/gnommo/renderer.py b/gnommo/renderer.py index c92c0f1..bc3c047 100644 --- a/gnommo/renderer.py +++ b/gnommo/renderer.py @@ -237,8 +237,27 @@ def _resolve_video_path( source_path = base_dir / video_source.source_file if project_path: resolved, _ = resolve_with_cache(source_path, project_path) - return resolved - return source_path + else: + resolved = source_path + + if not resolved.exists(): + # File not found anywhere — substitute PlaceholderVideo so FFmpeg doesn't crash + placeholder = None + if shared_assets_dir: + p = shared_assets_dir / "PlaceholderVideo.mp4" + if project_path: + p, _ = resolve_with_cache(p, project_path) + if p.exists(): + placeholder = p + if placeholder: + import sys + print( + f" Warning: {video_source.source_file} not found — using PlaceholderVideo", + file=sys.stderr, + ) + return placeholder + + return resolved def _has_audio_stream(video_path: Path) -> bool: @@ -362,6 +381,7 @@ def build_ffmpeg_command(plan: RenderPlan, output_path: Path) -> list[str]: f"Background handle '{bg_handle}' not found in shared_assets/videos.json" ) bg_path = shared_assets_dir / bg_videos[bg_handle]["source_file"] + bg_path, _ = resolve_with_cache(bg_path, plan.project_path) if not bg_path.exists(): raise RenderError( f"Background file not found: {bg_path} (from handle '{bg_handle}')" @@ -404,12 +424,29 @@ def build_ffmpeg_command(plan: RenderPlan, output_path: Path) -> list[str]: videos_dir, event.video_source, shared_assets_dir, project_path ) skip = event.video_source.skip or 0.0 + + # How long this clip needs to play in the output + clip_duration = event.end_time - event.start_time + if event.video_source.take is not None: + clip_duration = min(clip_duration, event.video_source.take) + + # Loop the clip if the file is shorter than the display window. + # Don't loop pause-narration videos — they intentionally play once and stop. + needs_loop = False + if event.video_source.duration is not None and not event.video_source.pause_narration: + remaining = event.video_source.duration - skip + needs_loop = remaining < clip_duration - 0.1 # 0.1 s tolerance + + if needs_loop: + cmd.extend(["-stream_loop", "-1"]) if skip > 0: cmd.extend(["-ss", f"{skip:.3f}"]) cmd.extend(["-analyzeduration", "0", "-probesize", "1000"]) - # Use pre-probed duration to tell FFmpeg exactly how much to read, - # preventing scans of ghost audio tracks on empty MP4 audio streams. - if event.video_source.duration is not None: + # Use pre-probed duration (or loop-limited duration) to tell FFmpeg exactly + # how much to read, preventing scans of ghost audio tracks on empty streams. + if needs_loop: + cmd.extend(["-t", f"{clip_duration:.3f}"]) + elif event.video_source.duration is not None: remaining = event.video_source.duration - skip if remaining > 0: cmd.extend(["-t", f"{remaining:.3f}"]) @@ -881,31 +918,12 @@ def build_filter_complex( enable_expr = f"between(t\\,{event.start_time:.3f}\\,{effective_end:.3f})" filters.append( f"[{current_label}][{video_label}]overlay=" - f"x={cut_x}:y={cut_y}:enable={enable_expr}" + f"x={cut_x}:y={cut_y}:enable={enable_expr}:eof_action=pass" f"[{next_label}]" ) current_label = next_label - # Layer 3: Slides (transparent in the talking-head cutout area) - for i, event in enumerate(plan.slide_events): - slide_idx = slide_inputs[event.slide_id] - - slide_label = f"s{i}" - filters.append( - f"[{slide_idx}:v]scale={width}:{height}:" - f"force_original_aspect_ratio=decrease,pad={width}:{height}:(ow-iw)/2:(oh-ih)/2:color=0x00000000[{slide_label}]" - ) - - next_label = f"sbase{i}" - enable_expr = f"between(t\\,{event.start_time:.3f}\\,{event.end_time:.3f})" - filters.append( - f"[{current_label}][{slide_label}]overlay=" - f"x=0:y=0:enable={enable_expr}" - f"[{next_label}]" - ) - current_label = next_label - - # Layer 4: Always-visible videos (talking head) — above slides, visible through cutout + # Layer 3: Talking head — above below-videos, but under slides so fullscreen slides cover it for i, (video_id, video_source, cutout) in enumerate(plan.narration_videos): input_idx = always_visible_inputs[i] cut_x, cut_y, cut_width, cut_height = _calculate_cutout_position( @@ -958,7 +976,64 @@ def build_filter_complex( ) current_label = next_label - # Layer 5: "above" triggered videos (vft/vf2t/vst) — topmost, covers slides and talking head + # Layer 4: "mid" triggered videos (vfm/vsm) — above talking head, below slides + # Use case: content that should show through a slide's transparent "screen hole" + for i, event in enumerate(plan.video_events): + if event.layer != "mid": + continue + video_idx = video_inputs[i] + cut_x, cut_y, cut_width, cut_height = _calculate_cutout_position( + event.cutout, width, height + ) + + duration = event.end_time - event.start_time + if event.video_source.take is not None: + duration = min(duration, event.video_source.take) + effective_end = event.start_time + duration + + zoom = event.video_source.zoom + zoomed_width = int(cut_width * zoom) + zoomed_height = int(cut_height * zoom) + + video_label = f"tvm{i}" + start_pts = event.start_time + filters.append( + f"[{video_idx}:v]format=yuva444p10le," + f"setpts=PTS-STARTPTS+{start_pts:.3f}/TB," + f"scale={zoomed_width}:{zoomed_height}:force_original_aspect_ratio=increase," + f"crop={cut_width}:{cut_height}:(iw-{cut_width})/2:(ih-{cut_height})/2," + f"format=rgba[{video_label}]" + ) + + next_label = f"tvmbase{i}" + enable_expr = f"between(t\\,{event.start_time:.3f}\\,{effective_end:.3f})" + filters.append( + f"[{current_label}][{video_label}]overlay=" + f"x={cut_x}:y={cut_y}:enable={enable_expr}:eof_action=pass" + f"[{next_label}]" + ) + current_label = next_label + + # Layer 5: Slides — on top of talking head so fullscreen slides cover the narrator + for i, event in enumerate(plan.slide_events): + slide_idx = slide_inputs[event.slide_id] + + slide_label = f"s{i}" + filters.append( + f"[{slide_idx}:v]scale={width}:{height}:" + f"force_original_aspect_ratio=decrease,pad={width}:{height}:(ow-iw)/2:(oh-ih)/2:color=0x00000000[{slide_label}]" + ) + + next_label = f"sbase{i}" + enable_expr = f"between(t\\,{event.start_time:.3f}\\,{event.end_time:.3f})" + filters.append( + f"[{current_label}][{slide_label}]overlay=" + f"x=0:y=0:enable={enable_expr}" + f"[{next_label}]" + ) + current_label = next_label + + # Layer 6: "above" triggered videos (vft/vf2t/vst) — topmost, covers slides and talking head # Use case: fullscreen video that intentionally masks the narrator for i, event in enumerate(plan.video_events): if event.layer != "above": @@ -991,7 +1066,7 @@ def build_filter_complex( enable_expr = f"between(t\\,{event.start_time:.3f}\\,{effective_end:.3f})" filters.append( f"[{current_label}][{video_label}]overlay=" - f"x={cut_x}:y={cut_y}:enable={enable_expr}:format=auto" + f"x={cut_x}:y={cut_y}:enable={enable_expr}:format=auto:eof_action=pass" f"[{next_label}]" ) current_label = next_label diff --git a/gnommo/transformer.py b/gnommo/transformer.py index da1e462..e519684 100644 --- a/gnommo/transformer.py +++ b/gnommo/transformer.py @@ -34,18 +34,24 @@ AUDIO_OFFSET_SECONDS = 1.0 # The pause-variant entries (vftp: etc.) carry a third element "pause_narration" # which is a per-event property, not stored in videos.json. _SHORTHAND_PREFIXES: dict[str, tuple] = { - "vft:": ("fullscreen", "above"), - "vfb:": ("fullscreen", "below"), + "vft:": ("fullscreen", "above"), + "vfb:": ("fullscreen", "below"), + "vfm:": ("fullscreen", "mid"), "vf2t:": ("fullscreen2", "above"), "vf2b:": ("fullscreen2", "below"), - "vst:": ("square", "above"), - "vsb:": ("square", "below"), - "vftp:": ("fullscreen", "above"), - "vfbp:": ("fullscreen", "below"), + "vf2m:": ("fullscreen2", "mid"), + "vst:": ("square", "above"), + "vsb:": ("square", "below"), + "vsm:": ("square", "mid"), + "vftp:": ("fullscreen", "above"), + "vfbp:": ("fullscreen", "below"), + "vfmp:": ("fullscreen", "mid"), "vf2tp:": ("fullscreen2", "above"), "vf2bp:": ("fullscreen2", "below"), - "vstp:": ("square", "above"), - "vsbp:": ("square", "below"), + "vf2mp:": ("fullscreen2", "mid"), + "vstp:": ("square", "above"), + "vsbp:": ("square", "below"), + "vsmp:": ("square", "mid"), } @@ -157,18 +163,12 @@ def _is_known_marker( _VIDEO_PREFIXES = ( "video:", "narration:", - "vft:", - "vfb:", - "vf2t:", - "vf2b:", - "vst:", - "vsb:", - "vftp:", - "vfbp:", - "vf2tp:", - "vf2bp:", - "vstp:", - "vsbp:", + "vft:", "vfb:", "vfm:", + "vf2t:", "vf2b:", "vf2m:", + "vst:", "vsb:", "vsm:", + "vftp:", "vfbp:", "vfmp:", + "vf2tp:", "vf2bp:", "vf2mp:", + "vstp:", "vsbp:", "vsmp:", ) if any(marker_id.startswith(p) for p in _VIDEO_PREFIXES): return True @@ -513,6 +513,73 @@ def align_markers_to_transcription( ) ) + # Repair pass: retry INTERPOLATED markers that the forward scan missed. + # Root cause of cascade failures: one bad match advances last_idx past + # the true positions of several subsequent markers. Fix: search in a + # bounded window [prev_marker_time - 1s, next_marker_time + 2s] so we + # avoid false early matches while still recovering from cascade failures. + if any(t.timestamp < 0 for t in timings): + for i, timing in enumerate(timings): + if timing.timestamp >= 0: + continue + + marker_id, anchor_text, is_borrowed, anchor_type = contexts[i] + if not anchor_text.strip(): + continue + + # Lower bound: previous matched marker's timestamp → word index. + # Repairs processed in order, so already-repaired markers count too. + prev_time = 0.0 + for j in range(i - 1, -1, -1): + if timings[j].timestamp >= 0: + prev_time = max(0.0, timings[j].timestamp - 1.0) + break + win_start = next( + (j for j, w in enumerate(transcription) if w.start >= prev_time), + 0, + ) + + # Upper bound: next matched marker in the timings list (+2s padding) + next_time = float("inf") + for j in range(i + 1, len(timings)): + if timings[j].timestamp >= 0: + next_time = timings[j].timestamp + 2.0 + break + + win_end = ( + next( + (j for j, w in enumerate(transcription) if w.start > next_time), + len(transcription), + ) + if next_time < float("inf") + else len(transcription) + ) + + if win_end <= win_start: + continue + + # Search in the bounded window with a relaxed threshold + sub = transcription[win_start:win_end] + idx, timestamp, confidence, match_end_idx = _find_phrase_timestamp( + anchor_text, + sub, + start_from=0, + fuzzy_threshold=max(0.4, fuzzy_threshold - 0.1), + ) + + if idx >= 0: + if anchor_type == "after" and match_end_idx > 0: + end_word = sub[min(match_end_idx - 1, len(sub) - 1)] + marker_time = end_word.end + else: + marker_time = max(0.0, timestamp - 0.5) + timings[i] = MarkerTiming( + marker_id=marker_id, + timestamp=marker_time, + context=f"(repaired: {anchor_text[:40]})", + confidence=confidence, + ) + # Deduplicate slide markers. The manuscript pattern [SN]\n\n[SN] text... is # common: the first blank occurrence is a visual-transition cue and the second # carries the narration text used for alignment. We keep the first entry in @@ -531,10 +598,24 @@ def align_markers_to_transcription( else: prev_idx = seen[timing.marker_id] prev = deduped[prev_idx] - if ( + # Upgrade if: previous was a placeholder/interpolated and the new one is better. + # Also upgrade if previous used the backward-looking "after" anchor — + # that heuristic gives end-of-preceding-section timing, but a direct + # "before" match on the second occurrence (start-of-new-section − 0.5s) + # is more accurate for when the slide should appear. + should_upgrade = ( prev.context == "(after previous)" and timing.context != "(after previous)" - ): + ) or ( + prev.timestamp < 0 + and timing.timestamp >= 0 + ) or ( + prev.context.startswith("(end of:") + and timing.timestamp >= 0 + and timing.context != "(after previous)" + and not timing.context.startswith("(end of:") + ) + if should_upgrade: deduped[prev_idx] = MarkerTiming( marker_id=prev.marker_id, timestamp=timing.timestamp, @@ -658,18 +739,12 @@ def build_render_plan( _VIDEO_MARKER_PREFIXES = ( "video:", "narration:", - "vft:", - "vfb:", - "vf2t:", - "vf2b:", - "vst:", - "vsb:", - "vftp:", - "vfbp:", - "vf2tp:", - "vf2bp:", - "vstp:", - "vsbp:", + "vft:", "vfb:", "vfm:", + "vf2t:", "vf2b:", "vf2m:", + "vst:", "vsb:", "vsm:", + "vftp:", "vfbp:", "vfmp:", + "vf2tp:", "vf2bp:", "vf2mp:", + "vstp:", "vsbp:", "vsmp:", ) missing_video_ids = [ timing.marker_id[len(prefix) :] @@ -764,7 +839,10 @@ def build_render_plan( slide_event.end_time += pause_duration for vid_event in video_events: - if vid_event.start_time > narration_time: + if vid_event is event: + # Don't shift the pause event by its own pause + continue + if vid_event.start_time >= narration_time: vid_event.start_time += pause_duration if vid_event.end_time > narration_time: vid_event.end_time += pause_duration @@ -1004,7 +1082,7 @@ def _extract_video_events( # Pause-variant prefixes — the only thing the render pass still needs from # shorthand markers at event-build time (pause_narration is per-event, not stored in videos.json). - _PAUSE_PREFIXES = {"vftp:", "vfbp:", "vf2tp:", "vf2bp:", "vstp:", "vsbp:"} + _PAUSE_PREFIXES = {"vftp:", "vfbp:", "vfmp:", "vf2tp:", "vf2bp:", "vf2mp:", "vstp:", "vsbp:", "vsmp:"} # Collect video markers: (time, video_id, event_type, pause_narration) # video_markers: (timestamp, video_id, marker_type, pause_narration) @@ -1088,8 +1166,8 @@ def _extract_video_events( end_time = start_time + video_source.take elif end_on == "end": end_time = total_duration - elif end_on == "next_slide" or (end_on is None and marker_type == "video"): - # End at next slide marker + elif end_on in ("next_slide", "slide") or (end_on is None and marker_type == "video"): + # End at next slide marker ("slide" is a recognised alias for "next_slide") end_time = total_duration for slide_time in slide_times: if slide_time > start_time: diff --git a/gnommo/validator.py b/gnommo/validator.py index 242fd67..2d68fca 100644 --- a/gnommo/validator.py +++ b/gnommo/validator.py @@ -4,7 +4,7 @@ from pathlib import Path from .cache import resolve_with_cache from .errors import ValidationError, ValidationIssue -from .parser import _read_json +from .parser import _read_json, resolve_missing_videos from .models import ( ProjectConfig, SlideDefinition, @@ -38,6 +38,24 @@ def validate_project( issues: list[ValidationIssue] = [] warnings: list[ValidationIssue] = [] + # Collect video IDs actually referenced in the manuscript (for file-existence checks) + _VIDEO_PREFIXES = { + "video:": 6, + "vft:": 4, "vfb:": 4, "vfm:": 4, + "vf2t:": 5, "vf2b:": 5, "vf2m:": 5, + "vst:": 4, "vsb:": 4, "vsm:": 4, + "vftp:": 5, "vfbp:": 5, "vfmp:": 5, + "vf2tp:": 6, "vf2bp:": 6, "vf2mp:": 6, + "vstp:": 5, "vsbp:": 5, "vsmp:": 5, + } + referenced_video_ids: set[str] = set() + for marker in manuscript_markers: + prefix = next((p for p in _VIDEO_PREFIXES if marker.startswith(p)), None) + if prefix is not None: + referenced_video_ids.add(marker[_VIDEO_PREFIXES[prefix]:]) + elif marker.startswith("narration:"): + referenced_video_ids.add(marker[10:]) + # Check for malformed markers first (these are likely typos) if malformed_markers: for line_num, marker_text in malformed_markers: @@ -62,21 +80,6 @@ def validate_project( continue # Validate video trigger markers — both legacy [video:xxx] and # shorthand [vft:xxx] / [vfb:xxx] / [vst:xxx] / [vsb:xxx]. - _VIDEO_PREFIXES = { - "video:": 6, - "vft:": 4, - "vfb:": 4, - "vf2t:": 5, - "vf2b:": 5, - "vst:": 4, - "vsb:": 4, - "vftp:": 5, - "vfbp:": 5, - "vf2tp:": 6, - "vf2bp:": 6, - "vstp:": 5, - "vsbp:": 5, - } matched_prefix = next( (p for p in _VIDEO_PREFIXES if marker.startswith(p)), None ) @@ -94,6 +97,16 @@ def validate_project( project_path / "manuscript.txt", ) ) + else: + vs = videos[video_id] + if not vs.cutout or vs.cutout not in config.cutouts: + warnings.append( + ValidationIssue( + f"[{marker}] video '{video_id}' has no valid cutout in videos.json — " + f"run 'gnommo import' to project values, or set cutout manually.", + project_path / "manuscript.txt", + ) + ) continue # Validate narration trigger markers (narration:xxx) - continuous videos @@ -106,6 +119,16 @@ def validate_project( project_path / "manuscript.txt", ) ) + else: + vs = videos[video_id] + if not vs.cutout or vs.cutout not in config.cutouts: + warnings.append( + ValidationIssue( + f"[{marker}] video '{video_id}' has no valid cutout in videos.json — " + f"run 'gnommo import' to project values, or set cutout manually.", + project_path / "manuscript.txt", + ) + ) continue # Segment markers are structural annotations, not slide references @@ -168,6 +191,10 @@ def validate_project( shared_assets_dir = project_path.parent / "shared_assets" for video_id, video_source in videos.items(): + # Only check files for videos actually used in this manuscript + if video_id not in referenced_video_ids: + continue + # Determine base directory based on is_shared flag if video_source.is_shared: if shared_assets_dir: @@ -186,9 +213,15 @@ def validate_project( video_path = base_dir / video_source.source_file video_path, _ = resolve_with_cache(video_path, project_path) if not video_path.exists(): + sf = video_source.source_file + hint = ( + " — run 'gnommo pexels' to download" + if sf.startswith("pexels/") + else " — falling back to PlaceholderVideo" + ) warnings.append( ValidationIssue( - f"Video file not found: {video_source.source_file} — falling back to PlaceholderVideo", + f"Video file not found: {sf}{hint}", videos_json_path, ) ) @@ -229,6 +262,7 @@ def validate_project( ) else: bg_path = shared_assets_dir / bg_videos[bg_handle]["source_file"] + bg_path, _ = resolve_with_cache(bg_path, project_path) if not bg_path.exists(): issues.append( ValidationIssue( @@ -272,6 +306,20 @@ def validate_project( ) ) + # Check outro videos exist in videos.json or shared_assets + if config.outro: + missing_outro = [vid_id for vid_id in config.outro if vid_id not in videos] + if missing_outro: + found = resolve_missing_videos(missing_outro, project_path, config) + still_missing = [vid_id for vid_id in missing_outro if vid_id not in found] + for vid_id in still_missing: + warnings.append( + ValidationIssue( + f"Outro video '{vid_id}' not found in videos.json or shared_assets — will be skipped at render", + project_path / "project.json", + ) + ) + # If any issues, raise ValidationError if issues: raise ValidationError(issues)