From cf40a19b4eed86adf1168cc20c73297b5398089c Mon Sep 17 00:00:00 2001 From: jenstandstad Date: Wed, 13 May 2026 08:13:20 +0200 Subject: [PATCH] Fixes to gnommo --- all.sh | 9 ++++ example/project.json | 3 +- gnommo/cli.py | 60 +++++++++++++++++++++- gnommo/parser.py | 1 - gnommo/renderer.py | 10 +++- gnommo/transformer.py | 113 ++++++++++++++++++------------------------ 6 files changed, 125 insertions(+), 71 deletions(-) create mode 100755 all.sh diff --git a/all.sh b/all.sh new file mode 100755 index 0000000..1d49795 --- /dev/null +++ b/all.sh @@ -0,0 +1,9 @@ +#!/bin/sh + +./gnommo.sh -p video1 all --force --prod +./gnommo.sh -p video2 all --force --prod +./gnommo.sh -p video3 all --force --prod +#./gnommo.sh -p video4 all --force +#./gnommo.sh -p video5 all --force +#./gnommo.sh -p video6 all --force + diff --git a/example/project.json b/example/project.json index 4cc3f45..79b79fb 100644 --- a/example/project.json +++ b/example/project.json @@ -4,8 +4,7 @@ "description": "In this video, I demonstrate the Gnommo video editing pipeline - a code-first approach to creating presenter-mode videos from Keynote presentations.", "footer": "Subscribe for more tutorials!\nTwitter: @example", "resolution": [1920, 1080], - "fps": 30, - "gnommo_scratch": null, + "fps": 30, "defaultSlideType": "fullscreen", "keynote_file": "media/example.key", "transcript": "media/videos/talking_head.transcript.json", diff --git a/gnommo/cli.py b/gnommo/cli.py index f5e1310..32cb1ec 100644 --- a/gnommo/cli.py +++ b/gnommo/cli.py @@ -2394,6 +2394,57 @@ def _parse_slide_range(slides_arg: str) -> tuple[str, Optional[str]]: return start_slide, end_slide +def _project_markers_to_videos( + markers: list[str], videos_json_path: Path, config +) -> None: + """ETL: project shorthand marker semantics into videos.json. + + Scans the manuscript marker list for shorthand prefixes (vft:, vfb:, vst:, + vsb:, vf2t:, vf2b: and their pause variants) and writes the implied cutout + and layer values directly into videos.json. This runs before parse_videos + so the render pass reads already-projected data and needs no shorthand logic. + + The manuscript is the authoritative source: the LAST shorthand reference to + a given video_id wins, matching what a human editor would expect when they + change a marker near the end of the script. + """ + if not videos_json_path.exists(): + return + + from .transformer import _SHORTHAND_PREFIXES # (cutout, layer) lookup table + + # Build projection: video_id → {cutout, layer} + projection: dict[str, dict] = {} + for marker in markers: + for prefix, implied in _SHORTHAND_PREFIXES.items(): + if marker.startswith(prefix): + video_id = marker[len(prefix):] + cutout, layer = implied[0], implied[1] + projection[video_id] = {"cutout": cutout, "layer": layer} + break + + if not projection: + return + + with open(videos_json_path, "r", encoding="utf-8") as f: + raw = json.load(f) + + changed = False + for video_id, fields in projection.items(): + if video_id not in raw: + continue + for field, value in fields.items(): + if raw[video_id].get(field) != value: + raw[video_id][field] = value + changed = True + + if changed: + with open(videos_json_path, "w", encoding="utf-8") as f: + json.dump(raw, f, indent=2, ensure_ascii=False) + updated = [vid for vid in projection if vid in raw] + print(f" Projected marker semantics → videos.json: {', '.join(updated)}") + + def _writeback_video_metadata(plan, project_path, config) -> None: """Write back cutout/layer derived from shorthand markers to videos.json. @@ -2586,6 +2637,12 @@ def cmd_render( save_citations(citations, citations_path) config = parse_project_config(project_path) + # ETL: project shorthand marker semantics (cutout/layer) into videos.json + # before parse_videos reads it, so the render pass is purely data-driven. + _project_markers_to_videos( + markers, project_path / config.videos_path, config + ) + # Override resolution for preview modes if res != "full": cfg = RES_CONFIGS[res] @@ -2732,8 +2789,7 @@ def cmd_render( if plan.time_offset > 0: print(f" Time offset: {plan.time_offset:.1f}s (partial render)") - # Persist shorthand-derived cutout/layer back to videos.json (idempotent) - _writeback_video_metadata(plan, project_path, config) + # Print detailed render plan with alignment info _print_render_plan_details(plan, marker_timings, slides) diff --git a/gnommo/parser.py b/gnommo/parser.py index fea68b5..5648926 100644 --- a/gnommo/parser.py +++ b/gnommo/parser.py @@ -260,7 +260,6 @@ def parse_project_config(project_path: Path) -> ProjectConfig: audio_path=data.get("audio", "audio.json"), audio_source=data.get("audio_source"), main_video=data.get("main_video"), - gnommo_scratch=data.get("gnommo_scratch"), process_cache=data.get("process_cache"), default_begin=float(data.get("default_begin", 0.0)), default_end_trim=float(data.get("default_end_trim", 0.0)), diff --git a/gnommo/renderer.py b/gnommo/renderer.py index 8c5bc9f..e2bc2f0 100644 --- a/gnommo/renderer.py +++ b/gnommo/renderer.py @@ -1254,10 +1254,13 @@ def build_filter_complex( delay_ms = int(event.start_time * 1000) label = f"tvaud{i}" + vol = event.video_source.volume + vol_filter = f",volume={vol:.2f}" if vol != 1.0 else "" filters.append( f"[{video_idx}:a]atrim=0:{duration:.3f}," f"asetpts=PTS-STARTPTS," - f"adelay={delay_ms}|{delay_ms}[{label}]" + f"adelay={delay_ms}|{delay_ms}" + f"{vol_filter}[{label}]" ) audio_labels_to_mix.append(f"[{label}]") @@ -1273,10 +1276,13 @@ def build_filter_complex( delay_ms = int(event.start_time * 1000) label = f"outroaud{i}" + vol = event.video_source.volume + vol_filter = f",volume={vol:.2f}" if vol != 1.0 else "" filters.append( f"[{video_idx}:a]atrim=0:{duration:.3f}," f"asetpts=PTS-STARTPTS," - f"adelay={delay_ms}|{delay_ms}[{label}]" + f"adelay={delay_ms}|{delay_ms}" + f"{vol_filter}[{label}]" ) audio_labels_to_mix.append(f"[{label}]") diff --git a/gnommo/transformer.py b/gnommo/transformer.py index d276062..bd66176 100644 --- a/gnommo/transformer.py +++ b/gnommo/transformer.py @@ -28,6 +28,26 @@ from .transcriber import TranscribedWord # Audio trigger offset: play sound this many seconds before the marker AUDIO_OFFSET_SECONDS = 1.0 +# Shorthand marker prefix → (cutout_name, layer). +# These are the ETL source-of-truth: when a manuscript contains [vft:X], +# that projects cutout="fullscreen" and layer="above" into videos.json for X. +# The pause-variant entries (vftp: etc.) carry a third element "pause_narration" +# which is a per-event property, not stored in videos.json. +_SHORTHAND_PREFIXES: dict[str, tuple] = { + "vft:": ("fullscreen", "above"), + "vfb:": ("fullscreen", "below"), + "vf2t:": ("fullscreen2", "above"), + "vf2b:": ("fullscreen2", "below"), + "vst:": ("square", "above"), + "vsb:": ("square", "below"), + "vftp:": ("fullscreen", "above"), + "vfbp:": ("fullscreen", "below"), + "vf2tp:": ("fullscreen2", "above"), + "vf2bp:": ("fullscreen2", "below"), + "vstp:": ("square", "above"), + "vsbp:": ("square", "below"), +} + @dataclass class MarkerTiming: @@ -961,26 +981,14 @@ def _extract_video_events( ] ) - # Mapping from shorthand marker prefix → (implied_cutout_name, implied_layer) - # These are the defaults; videos.json values act as a base but the marker wins. - _SHORTHAND: dict[str, tuple[str, str]] = { - "vft:": ("fullscreen", "above"), - "vfb:": ("fullscreen", "below"), - "vf2t:": ("fullscreen2", "above"), - "vf2b:": ("fullscreen2", "below"), - "vst:": ("square", "above"), - "vsb:": ("square", "below"), - "vftp:": ("fullscreen", "above", "pause_narration"), - "vfbp:": ("fullscreen", "below", "pause_narration"), - "vf2tp:": ("fullscreen2", "above", "pause_narration"), - "vf2bp:": ("fullscreen2", "below", "pause_narration"), - "vstp:": ("square", "above", "pause_narration"), - "vsbp:": ("square", "below", "pause_narration"), - } + # Pause-variant prefixes — the only thing the render pass still needs from + # shorthand markers at event-build time (pause_narration is per-event, not stored in videos.json). + _PAUSE_PREFIXES = {"vftp:", "vfbp:", "vf2tp:", "vf2bp:", "vstp:", "vsbp:"} - # Collect video markers: (time, video_id, event_type, cutout_name_override, layer_override) - # event_type is "video" (ends at next slide) or "narration" (runs to end) - video_markers: list[tuple[float, str, str, str | None, str | None]] = [] + # Collect video markers: (time, video_id, event_type, pause_narration) + # video_markers: (timestamp, video_id, marker_type, pause_narration) + # cutout and layer are read from videos.json (projected there by _project_markers_to_videos) + video_markers: list[tuple[float, str, str, bool]] = [] for timing in marker_timings: if timing.timestamp < 0: @@ -988,26 +996,26 @@ def _extract_video_events( mid = timing.marker_id - # --- shorthand markers: vft/vfb/vst/vsb --- - shorthand_match = next((p for p in _SHORTHAND if mid.startswith(p)), None) + # --- shorthand markers (vft:/vfb:/vst:/vsb: and pause variants) --- + shorthand_match = next((p for p in _SHORTHAND_PREFIXES if mid.startswith(p)), None) if shorthand_match: - video_id = mid[len(shorthand_match) :] + video_id = mid[len(shorthand_match):] if video_id not in videos: warnings.append( f"[{mid}] references unknown video '{video_id}' — skipped. " f"Add it to videos.json or remove the marker." ) continue - implied_cutout, implied_layer = _SHORTHAND[shorthand_match] - if implied_cutout not in cutouts: + # Validate that videos.json has the correct cutout (written by ETL) + video_source = videos[video_id] + if not video_source.cutout or video_source.cutout not in cutouts: warnings.append( - f"[{mid}] requires cutout '{implied_cutout}' which is not defined in project config — skipped. " - f"Available cutouts: {list(cutouts.keys())}" + f"[{mid}] video '{video_id}' has no valid cutout in videos.json — " + f"run render once to project values, or set cutout manually." ) continue - video_markers.append( - (timing.timestamp, video_id, "video", implied_cutout, implied_layer) - ) + pause_narration = shorthand_match in _PAUSE_PREFIXES + video_markers.append((timing.timestamp, video_id, "video", pause_narration)) continue # --- legacy [video:xxx] --- @@ -1015,23 +1023,16 @@ def _extract_video_events( video_id = mid[6:] if video_id not in videos: warnings.append( - f"[video:{video_id}] references unknown video '{video_id}' — skipped. " - f"Add it to videos.json or remove the marker." + f"[video:{video_id}] references unknown video '{video_id}' — skipped." ) continue video_source = videos[video_id] - if not video_source.cutout: + if not video_source.cutout or video_source.cutout not in cutouts: warnings.append( - f"[video:{video_id}] has no 'cutout' set in videos.json — skipped." + f"[video:{video_id}] has no valid cutout in videos.json — skipped." ) continue - if video_source.cutout not in cutouts: - warnings.append( - f"[video:{video_id}] cutout '{video_source.cutout}' is not defined in project config — skipped. " - f"Available: {list(cutouts.keys())}" - ) - continue - video_markers.append((timing.timestamp, video_id, "video", None, None)) + video_markers.append((timing.timestamp, video_id, "video", False)) continue # --- [narration:xxx] --- @@ -1039,41 +1040,25 @@ def _extract_video_events( video_id = mid[10:] if video_id not in videos: warnings.append( - f"[narration:{video_id}] references unknown video '{video_id}' — skipped. " - f"Add it to videos.json or remove the marker." + f"[narration:{video_id}] references unknown video '{video_id}' — skipped." ) continue video_source = videos[video_id] - if not video_source.cutout: + if not video_source.cutout or video_source.cutout not in cutouts: warnings.append( - f"[narration:{video_id}] has no 'cutout' set in videos.json — skipped." + f"[narration:{video_id}] has no valid cutout in videos.json — skipped." ) continue - if video_source.cutout not in cutouts: - warnings.append( - f"[narration:{video_id}] cutout '{video_source.cutout}' is not defined in project config — skipped. " - f"Available: {list(cutouts.keys())}" - ) - continue - video_markers.append((timing.timestamp, video_id, "narration", None, None)) + video_markers.append((timing.timestamp, video_id, "narration", False)) events: list[VideoEvent] = [] - for ( - start_time, - video_id, - marker_type, - cutout_override, - layer_override, - ) in video_markers: + for (start_time, video_id, marker_type, pause_narration) in video_markers: video_source = videos[video_id] - # Resolve cutout: marker override > videos.json cutout - # (validation already ensured cutout exists — this is a safety assertion) - cutout_name = cutout_override or video_source.cutout + # Read cutout and layer directly from videos.json (projected by ETL) + cutout_name = video_source.cutout cutout = cutouts[cutout_name] - - # Resolve layer: marker override > videos.json layer - layer = layer_override if layer_override is not None else video_source.layer + layer = video_source.layer end_on = video_source.end_on if end_on == "take" and video_source.take is not None: