Fixes to gnommo

This commit is contained in:
2026-05-13 08:13:20 +02:00
parent 5d7c77db91
commit cf40a19b4e
6 changed files with 125 additions and 71 deletions
Executable
+9
View File
@@ -0,0 +1,9 @@
#!/bin/sh
./gnommo.sh -p video1 all --force --prod
./gnommo.sh -p video2 all --force --prod
./gnommo.sh -p video3 all --force --prod
#./gnommo.sh -p video4 all --force
#./gnommo.sh -p video5 all --force
#./gnommo.sh -p video6 all --force
-1
View File
@@ -5,7 +5,6 @@
"footer": "Subscribe for more tutorials!\nTwitter: @example",
"resolution": [1920, 1080],
"fps": 30,
"gnommo_scratch": null,
"defaultSlideType": "fullscreen",
"keynote_file": "media/example.key",
"transcript": "media/videos/talking_head.transcript.json",
+58 -2
View File
@@ -2394,6 +2394,57 @@ def _parse_slide_range(slides_arg: str) -> tuple[str, Optional[str]]:
return start_slide, end_slide
def _project_markers_to_videos(
markers: list[str], videos_json_path: Path, config
) -> None:
"""ETL: project shorthand marker semantics into videos.json.
Scans the manuscript marker list for shorthand prefixes (vft:, vfb:, vst:,
vsb:, vf2t:, vf2b: and their pause variants) and writes the implied cutout
and layer values directly into videos.json. This runs before parse_videos
so the render pass reads already-projected data and needs no shorthand logic.
The manuscript is the authoritative source: the LAST shorthand reference to
a given video_id wins, matching what a human editor would expect when they
change a marker near the end of the script.
"""
if not videos_json_path.exists():
return
from .transformer import _SHORTHAND_PREFIXES # (cutout, layer) lookup table
# Build projection: video_id → {cutout, layer}
projection: dict[str, dict] = {}
for marker in markers:
for prefix, implied in _SHORTHAND_PREFIXES.items():
if marker.startswith(prefix):
video_id = marker[len(prefix):]
cutout, layer = implied[0], implied[1]
projection[video_id] = {"cutout": cutout, "layer": layer}
break
if not projection:
return
with open(videos_json_path, "r", encoding="utf-8") as f:
raw = json.load(f)
changed = False
for video_id, fields in projection.items():
if video_id not in raw:
continue
for field, value in fields.items():
if raw[video_id].get(field) != value:
raw[video_id][field] = value
changed = True
if changed:
with open(videos_json_path, "w", encoding="utf-8") as f:
json.dump(raw, f, indent=2, ensure_ascii=False)
updated = [vid for vid in projection if vid in raw]
print(f" Projected marker semantics → videos.json: {', '.join(updated)}")
def _writeback_video_metadata(plan, project_path, config) -> None:
"""Write back cutout/layer derived from shorthand markers to videos.json.
@@ -2586,6 +2637,12 @@ def cmd_render(
save_citations(citations, citations_path)
config = parse_project_config(project_path)
# ETL: project shorthand marker semantics (cutout/layer) into videos.json
# before parse_videos reads it, so the render pass is purely data-driven.
_project_markers_to_videos(
markers, project_path / config.videos_path, config
)
# Override resolution for preview modes
if res != "full":
cfg = RES_CONFIGS[res]
@@ -2732,8 +2789,7 @@ def cmd_render(
if plan.time_offset > 0:
print(f" Time offset: {plan.time_offset:.1f}s (partial render)")
# Persist shorthand-derived cutout/layer back to videos.json (idempotent)
_writeback_video_metadata(plan, project_path, config)
# Print detailed render plan with alignment info
_print_render_plan_details(plan, marker_timings, slides)
-1
View File
@@ -260,7 +260,6 @@ def parse_project_config(project_path: Path) -> ProjectConfig:
audio_path=data.get("audio", "audio.json"),
audio_source=data.get("audio_source"),
main_video=data.get("main_video"),
gnommo_scratch=data.get("gnommo_scratch"),
process_cache=data.get("process_cache"),
default_begin=float(data.get("default_begin", 0.0)),
default_end_trim=float(data.get("default_end_trim", 0.0)),
+8 -2
View File
@@ -1254,10 +1254,13 @@ def build_filter_complex(
delay_ms = int(event.start_time * 1000)
label = f"tvaud{i}"
vol = event.video_source.volume
vol_filter = f",volume={vol:.2f}" if vol != 1.0 else ""
filters.append(
f"[{video_idx}:a]atrim=0:{duration:.3f},"
f"asetpts=PTS-STARTPTS,"
f"adelay={delay_ms}|{delay_ms}[{label}]"
f"adelay={delay_ms}|{delay_ms}"
f"{vol_filter}[{label}]"
)
audio_labels_to_mix.append(f"[{label}]")
@@ -1273,10 +1276,13 @@ def build_filter_complex(
delay_ms = int(event.start_time * 1000)
label = f"outroaud{i}"
vol = event.video_source.volume
vol_filter = f",volume={vol:.2f}" if vol != 1.0 else ""
filters.append(
f"[{video_idx}:a]atrim=0:{duration:.3f},"
f"asetpts=PTS-STARTPTS,"
f"adelay={delay_ms}|{delay_ms}[{label}]"
f"adelay={delay_ms}|{delay_ms}"
f"{vol_filter}[{label}]"
)
audio_labels_to_mix.append(f"[{label}]")
+46 -61
View File
@@ -28,6 +28,26 @@ from .transcriber import TranscribedWord
# Audio trigger offset: play sound this many seconds before the marker
AUDIO_OFFSET_SECONDS = 1.0
# Shorthand marker prefix → (cutout_name, layer).
# These are the ETL source-of-truth: when a manuscript contains [vft:X],
# that projects cutout="fullscreen" and layer="above" into videos.json for X.
# The pause-variant entries (vftp: etc.) carry a third element "pause_narration"
# which is a per-event property, not stored in videos.json.
_SHORTHAND_PREFIXES: dict[str, tuple] = {
"vft:": ("fullscreen", "above"),
"vfb:": ("fullscreen", "below"),
"vf2t:": ("fullscreen2", "above"),
"vf2b:": ("fullscreen2", "below"),
"vst:": ("square", "above"),
"vsb:": ("square", "below"),
"vftp:": ("fullscreen", "above"),
"vfbp:": ("fullscreen", "below"),
"vf2tp:": ("fullscreen2", "above"),
"vf2bp:": ("fullscreen2", "below"),
"vstp:": ("square", "above"),
"vsbp:": ("square", "below"),
}
@dataclass
class MarkerTiming:
@@ -961,26 +981,14 @@ def _extract_video_events(
]
)
# Mapping from shorthand marker prefix → (implied_cutout_name, implied_layer)
# These are the defaults; videos.json values act as a base but the marker wins.
_SHORTHAND: dict[str, tuple[str, str]] = {
"vft:": ("fullscreen", "above"),
"vfb:": ("fullscreen", "below"),
"vf2t:": ("fullscreen2", "above"),
"vf2b:": ("fullscreen2", "below"),
"vst:": ("square", "above"),
"vsb:": ("square", "below"),
"vftp:": ("fullscreen", "above", "pause_narration"),
"vfbp:": ("fullscreen", "below", "pause_narration"),
"vf2tp:": ("fullscreen2", "above", "pause_narration"),
"vf2bp:": ("fullscreen2", "below", "pause_narration"),
"vstp:": ("square", "above", "pause_narration"),
"vsbp:": ("square", "below", "pause_narration"),
}
# Pause-variant prefixes — the only thing the render pass still needs from
# shorthand markers at event-build time (pause_narration is per-event, not stored in videos.json).
_PAUSE_PREFIXES = {"vftp:", "vfbp:", "vf2tp:", "vf2bp:", "vstp:", "vsbp:"}
# Collect video markers: (time, video_id, event_type, cutout_name_override, layer_override)
# event_type is "video" (ends at next slide) or "narration" (runs to end)
video_markers: list[tuple[float, str, str, str | None, str | None]] = []
# Collect video markers: (time, video_id, event_type, pause_narration)
# video_markers: (timestamp, video_id, marker_type, pause_narration)
# cutout and layer are read from videos.json (projected there by _project_markers_to_videos)
video_markers: list[tuple[float, str, str, bool]] = []
for timing in marker_timings:
if timing.timestamp < 0:
@@ -988,8 +996,8 @@ def _extract_video_events(
mid = timing.marker_id
# --- shorthand markers: vft/vfb/vst/vsb ---
shorthand_match = next((p for p in _SHORTHAND if mid.startswith(p)), None)
# --- shorthand markers (vft:/vfb:/vst:/vsb: and pause variants) ---
shorthand_match = next((p for p in _SHORTHAND_PREFIXES if mid.startswith(p)), None)
if shorthand_match:
video_id = mid[len(shorthand_match):]
if video_id not in videos:
@@ -998,16 +1006,16 @@ def _extract_video_events(
f"Add it to videos.json or remove the marker."
)
continue
implied_cutout, implied_layer = _SHORTHAND[shorthand_match]
if implied_cutout not in cutouts:
# Validate that videos.json has the correct cutout (written by ETL)
video_source = videos[video_id]
if not video_source.cutout or video_source.cutout not in cutouts:
warnings.append(
f"[{mid}] requires cutout '{implied_cutout}' which is not defined in project config — skipped. "
f"Available cutouts: {list(cutouts.keys())}"
f"[{mid}] video '{video_id}' has no valid cutout in videos.json — "
f"run render once to project values, or set cutout manually."
)
continue
video_markers.append(
(timing.timestamp, video_id, "video", implied_cutout, implied_layer)
)
pause_narration = shorthand_match in _PAUSE_PREFIXES
video_markers.append((timing.timestamp, video_id, "video", pause_narration))
continue
# --- legacy [video:xxx] ---
@@ -1016,22 +1024,15 @@ def _extract_video_events(
if video_id not in videos:
warnings.append(
f"[video:{video_id}] references unknown video '{video_id}' — skipped."
f"Add it to videos.json or remove the marker."
)
continue
video_source = videos[video_id]
if not video_source.cutout:
if not video_source.cutout or video_source.cutout not in cutouts:
warnings.append(
f"[video:{video_id}] has no 'cutout' set in videos.json — skipped."
f"[video:{video_id}] has no valid cutout in videos.json — skipped."
)
continue
if video_source.cutout not in cutouts:
warnings.append(
f"[video:{video_id}] cutout '{video_source.cutout}' is not defined in project config — skipped. "
f"Available: {list(cutouts.keys())}"
)
continue
video_markers.append((timing.timestamp, video_id, "video", None, None))
video_markers.append((timing.timestamp, video_id, "video", False))
continue
# --- [narration:xxx] ---
@@ -1040,40 +1041,24 @@ def _extract_video_events(
if video_id not in videos:
warnings.append(
f"[narration:{video_id}] references unknown video '{video_id}' — skipped."
f"Add it to videos.json or remove the marker."
)
continue
video_source = videos[video_id]
if not video_source.cutout:
if not video_source.cutout or video_source.cutout not in cutouts:
warnings.append(
f"[narration:{video_id}] has no 'cutout' set in videos.json — skipped."
f"[narration:{video_id}] has no valid cutout in videos.json — skipped."
)
continue
if video_source.cutout not in cutouts:
warnings.append(
f"[narration:{video_id}] cutout '{video_source.cutout}' is not defined in project config — skipped. "
f"Available: {list(cutouts.keys())}"
)
continue
video_markers.append((timing.timestamp, video_id, "narration", None, None))
video_markers.append((timing.timestamp, video_id, "narration", False))
events: list[VideoEvent] = []
for (
start_time,
video_id,
marker_type,
cutout_override,
layer_override,
) in video_markers:
for (start_time, video_id, marker_type, pause_narration) in video_markers:
video_source = videos[video_id]
# Resolve cutout: marker override > videos.json cutout
# (validation already ensured cutout exists — this is a safety assertion)
cutout_name = cutout_override or video_source.cutout
# Read cutout and layer directly from videos.json (projected by ETL)
cutout_name = video_source.cutout
cutout = cutouts[cutout_name]
# Resolve layer: marker override > videos.json layer
layer = layer_override if layer_override is not None else video_source.layer
layer = video_source.layer
end_on = video_source.end_on
if end_on == "take" and video_source.take is not None: