From 7f7425da46a982a6fb1339323a648151d9aafd64 Mon Sep 17 00:00:00 2001 From: jenstandstad Date: Mon, 12 Jan 2026 11:49:46 +0100 Subject: [PATCH] Add configurable slides path and malformed marker detection - project.json now supports "slides" field pointing to slides.json location - Slide images are loaded from same directory as slides.json - Validation detects malformed markers (missing ], extra spaces) - Reports line numbers for each malformed marker Co-Authored-By: Claude Opus 4.5 --- gnommo/cli.py | 12 ++++++------ gnommo/models.py | 2 ++ gnommo/parser.py | 40 +++++++++++++++++++++++++++++++++------- gnommo/renderer.py | 6 +++--- gnommo/transformer.py | 5 +++++ gnommo/validator.py | 20 ++++++++++++++++---- 6 files changed, 65 insertions(+), 20 deletions(-) diff --git a/gnommo/cli.py b/gnommo/cli.py index b68caa2..130e0db 100644 --- a/gnommo/cli.py +++ b/gnommo/cli.py @@ -112,13 +112,13 @@ def cmd_validate(project_path: Path) -> int: print(f"Validating project: {project_path}") # Parse all files - _, markers = parse_manuscript(project_path) + _, markers, malformed = parse_manuscript(project_path) config = parse_project_config(project_path) - slides = parse_slides(project_path) + slides = parse_slides(project_path, config) videos = parse_videos(project_path) # Validate - validate_project(project_path, markers, config, slides, videos) + validate_project(project_path, markers, config, slides, videos, malformed) print("Validation passed.") return 0 @@ -132,9 +132,9 @@ def cmd_render(project_path: Path, output_path: Path, verbose: bool, dry_run: bo # Stage 1: Extract print("Stage 1/4: Parsing input files...") - _, markers = parse_manuscript(project_path) + _, markers, malformed = parse_manuscript(project_path) config = parse_project_config(project_path) - slides = parse_slides(project_path) + slides = parse_slides(project_path, config) videos = parse_videos(project_path) transcript = parse_transcript(project_path) @@ -145,7 +145,7 @@ def cmd_render(project_path: Path, output_path: Path, verbose: bool, dry_run: bo # Stage 2: Validate print("Stage 2/4: Validating...") - validate_project(project_path, markers, config, slides, videos) + validate_project(project_path, markers, config, slides, videos, malformed) print(" - Validation passed") print() diff --git a/gnommo/models.py b/gnommo/models.py index eb238ee..fc7061a 100644 --- a/gnommo/models.py +++ b/gnommo/models.py @@ -21,6 +21,7 @@ class ProjectConfig: talking_head: TalkingHeadConfig default_slide_type: str background_video: str + slides_path: str = "slides.json" # path to slides.json relative to project audio_source: Optional[str] = None # defaults to talking head @@ -75,6 +76,7 @@ class RenderPlan: slide_events: list[SlideEvent] total_duration: float slides: dict[str, SlideDefinition] + slides_dir: Path = None # directory containing slide images # Slide layout configurations (hardcoded for POC) diff --git a/gnommo/parser.py b/gnommo/parser.py index 253640b..9b9fe71 100644 --- a/gnommo/parser.py +++ b/gnommo/parser.py @@ -16,12 +16,12 @@ from .models import ( ) -def parse_manuscript(project_path: Path) -> tuple[str, list[str]]: +def parse_manuscript(project_path: Path) -> tuple[str, list[str], list[tuple[int, str]]]: """ Parse manuscript.txt and extract text content and slide markers. Returns: - Tuple of (full text, list of marker IDs found) + Tuple of (full text, list of marker IDs found, list of malformed markers as (line_num, text)) """ manuscript_path = project_path / "manuscript.txt" @@ -30,10 +30,32 @@ def parse_manuscript(project_path: Path) -> tuple[str, list[str]]: text = manuscript_path.read_text(encoding="utf-8") - # Extract all slide markers like [S1], [S2], etc. + # Extract all valid slide markers like [S1], [S2], etc. markers = re.findall(r"\[([A-Za-z0-9_]+)\]", text) - return text, markers + # Find malformed markers (missing brackets, extra spaces, etc.) + malformed: list[tuple[int, str]] = [] + lines = text.split("\n") + + for line_num, line in enumerate(lines, start=1): + # Pattern for potential markers that are malformed: + # - Missing closing bracket: [S1 or [S12 (not followed by ]) + # - Extra spaces: [S 1] or [S1 ] or [ S1] + + # Find unclosed brackets: [S followed by digits, then space/newline/EOF (not ]) + # Match [S1, [S12, [S123 etc that are NOT followed by ] + for match in re.finditer(r"\[S\d+", line): + start, end = match.span() + # Check if there's a ] immediately after + if end >= len(line) or line[end] != "]": + malformed.append((line_num, match.group())) + + # Find markers with internal/trailing spaces like [S 1] or [S1 ] or [ S1] + spaced = re.findall(r"\[\s+S\d+\s*\]|\[S\d+\s+\]|\[S\s+\d+\]", line) + for match in spaced: + malformed.append((line_num, match)) + + return text, markers, malformed def parse_transcript(project_path: Path) -> list[TimedWord]: @@ -108,6 +130,7 @@ def parse_project_config(project_path: Path) -> ProjectConfig: talking_head=talking_head, default_slide_type=data.get("defaultSlideType", "square"), background_video=data.get("background_video", ""), + slides_path=data.get("slides", "slides.json"), audio_source=data.get("audio_source"), ) @@ -123,12 +146,15 @@ def _parse_dimension(value: Any) -> int: return 200 # default -def parse_slides(project_path: Path) -> dict[str, SlideDefinition]: +def parse_slides(project_path: Path, config: ProjectConfig = None) -> dict[str, SlideDefinition]: """Parse slides.json into slide definitions.""" - slides_path = project_path / "slides.json" + if config and config.slides_path: + slides_path = project_path / config.slides_path + else: + slides_path = project_path / "slides.json" if not slides_path.exists(): - raise ParseError("slides.json not found", slides_path) + raise ParseError(f"slides file not found: {slides_path}", slides_path) try: data = json.loads(slides_path.read_text(encoding="utf-8")) diff --git a/gnommo/renderer.py b/gnommo/renderer.py index 165336f..a690a0b 100644 --- a/gnommo/renderer.py +++ b/gnommo/renderer.py @@ -59,13 +59,13 @@ def build_ffmpeg_command(plan: RenderPlan, output_path: Path) -> list[str]: bg_path = project_path / plan.config.background_video cmd.extend(["-i", str(bg_path)]) - # Input: slide images - slides_path = project_path / "media" / "slides" + # Input: slide images (from slides_dir, same directory as slides.json) + slides_dir = plan.slides_dir.resolve() if plan.slides_dir else project_path / "media" / "slides" slide_inputs: list[str] = [] # Track which slides we've added for event in plan.slide_events: if event.slide_id not in slide_inputs: - image_path = slides_path / event.slide_def.image + image_path = slides_dir / event.slide_def.image cmd.extend(["-i", str(image_path)]) slide_inputs.append(event.slide_id) diff --git a/gnommo/transformer.py b/gnommo/transformer.py index 3ecc609..149b167 100644 --- a/gnommo/transformer.py +++ b/gnommo/transformer.py @@ -37,6 +37,10 @@ def build_render_plan( # Build slide events from transcript markers slide_events = _extract_slide_events(transcript, slides, total_duration) + # Derive slides directory from slides_path + slides_json_path = project_path / config.slides_path + slides_dir = slides_json_path.parent + return RenderPlan( project_path=project_path, config=config, @@ -44,6 +48,7 @@ def build_render_plan( slide_events=slide_events, total_duration=total_duration, slides=slides, + slides_dir=slides_dir, ) diff --git a/gnommo/validator.py b/gnommo/validator.py index 8d5063c..b8e8a87 100644 --- a/gnommo/validator.py +++ b/gnommo/validator.py @@ -12,6 +12,7 @@ def validate_project( config: ProjectConfig, slides: dict[str, SlideDefinition], videos: dict[str, VideoSource], + malformed_markers: list[tuple[int, str]] = None, ) -> None: """ Validate all parsed project data. Raises ValidationError if any issues found. @@ -22,9 +23,19 @@ def validate_project( - All video files exist on disk - Background video exists (if specified) - Slide types are valid + - No malformed markers in manuscript """ issues: list[ValidationIssue] = [] + # Check for malformed markers first (these are likely typos) + if malformed_markers: + for line_num, marker_text in malformed_markers: + issues.append(ValidationIssue( + f"Malformed marker: {marker_text}", + project_path / "manuscript.txt", + line_num + )) + # Check all manuscript markers have corresponding slides for marker in manuscript_markers: if marker not in slides: @@ -34,15 +45,16 @@ def validate_project( )) # Check all slide images exist - media_path = project_path / "media" - slides_path = media_path / "slides" + # Slides are in the same directory as the slides.json file + slides_json_path = project_path / config.slides_path + slides_dir = slides_json_path.parent for slide_id, slide_def in slides.items(): - image_path = slides_path / slide_def.image + image_path = slides_dir / slide_def.image if not image_path.exists(): issues.append(ValidationIssue( f"Slide image not found: {slide_def.image}", - project_path / "slides.json" + slides_json_path )) # Check slide type is valid