"""Validation stage: fail-fast checks on parsed data.""" from pathlib import Path from .cache import resolve_with_cache from .errors import ValidationError, ValidationIssue from .parser import _read_json from .models import ( ProjectConfig, SlideDefinition, VideoSource, SLIDE_LAYOUTS, CAMERA_PRESETS, ) def validate_project( project_path: Path, manuscript_markers: list[str], config: ProjectConfig, slides: dict[str, SlideDefinition], videos: dict[str, VideoSource], videos_dir: Path, malformed_markers: list[tuple[int, str]] = None, ) -> list[ValidationIssue]: """ Validate all parsed project data. Raises ValidationError if any issues found. Returns a list of warnings (non-fatal issues). Checks: - All slide markers in manuscript exist in slides.json - All slide images exist on disk - All video files exist on disk - Background video exists (if specified) - Slide types are valid - No malformed markers in manuscript """ issues: list[ValidationIssue] = [] warnings: list[ValidationIssue] = [] # Check for malformed markers first (these are likely typos) if malformed_markers: for line_num, marker_text in malformed_markers: issues.append( ValidationIssue( f"Malformed marker: {marker_text}", project_path / "manuscript.txt", line_num, ) ) # Check all manuscript markers have corresponding slides or videos for marker in manuscript_markers: # Skip camera effect markers (Zoom0, TiltLeft, Reset, etc.) if marker in CAMERA_PRESETS: continue # Skip audio markers (start with 'A' followed by audio id, e.g., Awoosh) if marker.startswith("A") and len(marker) > 1 and marker[1:].isalnum(): continue # Validate video trigger markers (video:xxx) - slide-like videos if marker.startswith("video:"): video_id = marker[6:] # Remove 'video:' prefix if video_id not in videos: # Check if it's a file extension mismatch hint = "" if "." in video_id: base_name = video_id.rsplit(".", 1)[0] if base_name in videos: hint = f" (Did you mean [video:{base_name}]? Don't include file extensions in markers)" warnings.append( ValidationIssue( f"Video marker [{marker}] referenced in manuscript but '{video_id}' not defined in videos.json{hint} — using PlaceholderVideo instead", project_path / "manuscript.txt", ) ) continue # Validate narration trigger markers (narration:xxx) - continuous videos if marker.startswith("narration:"): video_id = marker[10:] # Remove 'narration:' prefix if video_id not in videos: warnings.append( ValidationIssue( f"Narration marker [{marker}] referenced in manuscript but '{video_id}' not defined in videos.json — using PlaceholderVideo instead", project_path / "manuscript.txt", ) ) continue # Segment markers are structural annotations, not slide references if marker.startswith("segment:"): continue # Unknown namespaced markers (e.g. [background:xxx]) — not supported, ignore with warning if ":" in marker: warnings.append( ValidationIssue( f"Unknown marker type [{marker}] — ignoring (no support for '{marker.split(':', 1)[0]}:' markers)", project_path / "manuscript.txt", ) ) continue if marker not in slides: issues.append( ValidationIssue( f"Slide marker [{marker}] referenced in manuscript but not defined in slides.json", project_path / "manuscript.txt", ) ) # Check all slide images exist # Slides are in the same directory as the slides.json file slides_json_path = project_path / config.slides_path slides_dir = slides_json_path.parent for slide_id, slide_def in slides.items(): image_path = slides_dir / slide_def.image image_path, _ = resolve_with_cache(image_path, project_path) if not image_path.exists(): issues.append( ValidationIssue( f"Slide image not found: {slide_def.image}", slides_json_path ) ) # Check slide type is valid if slide_def.type not in SLIDE_LAYOUTS: issues.append( ValidationIssue( f"Unknown slide type '{slide_def.type}' for slide {slide_id}. " f"Valid types: {list(SLIDE_LAYOUTS.keys())}", project_path / "slides.json", ) ) # Check all video files exist (paths relative to videos_dir or shared_assets) videos_json_path = project_path / config.videos_path # Find shared_assets directory shared_assets_dir = None if (project_path / "shared_assets").exists(): shared_assets_dir = project_path / "shared_assets" elif (project_path.parent / "shared_assets").exists(): shared_assets_dir = project_path.parent / "shared_assets" for video_id, video_source in videos.items(): # Determine base directory based on is_shared flag if video_source.is_shared: if shared_assets_dir: base_dir = shared_assets_dir else: issues.append( ValidationIssue( f"Video '{video_id}' has is_shared=true but shared_assets directory not found", videos_json_path, ) ) continue else: base_dir = videos_dir video_path = base_dir / video_source.source_file video_path, _ = resolve_with_cache(video_path, project_path) if not video_path.exists(): warnings.append( ValidationIssue( f"Video file not found: {video_source.source_file} — falling back to PlaceholderVideo", videos_json_path, ) ) # Check preprocessed output exists if filters are defined if video_source.filter and video_source.output_file: output_path = base_dir / video_source.output_file output_path, _ = resolve_with_cache(output_path, project_path) if not output_path.exists(): issues.append( ValidationIssue( f"Preprocessed output not found: {video_source.output_file}. " f"Run with -a preprocess first.", videos_json_path, ) ) # Check background exists — must be a handle in shared_assets/videos.json bg_handle = config.background if bg_handle: shared_assets_dir = project_path.parent / "shared_assets" videos_json_path_bg = shared_assets_dir / "videos.json" if not videos_json_path_bg.exists(): issues.append( ValidationIssue( f"shared_assets/videos.json not found (needed for background handle '{bg_handle}')", project_path / "project.json", ) ) else: bg_videos = _read_json(videos_json_path_bg) if bg_handle not in bg_videos: issues.append( ValidationIssue( f"Background handle '{bg_handle}' not found in shared_assets/videos.json", project_path / "project.json", ) ) else: bg_path = shared_assets_dir / bg_videos[bg_handle]["source_file"] if not bg_path.exists(): issues.append( ValidationIssue( f"Background file not found: {bg_path} (from handle '{bg_handle}')", project_path / "project.json", ) ) # Check we have at least one video source if not videos: issues.append( ValidationIssue( "No video sources defined in videos.json", project_path / "videos.json" ) ) # Check resolution is reasonable width, height = config.resolution if width < 50 or height < 50: issues.append( ValidationIssue( f"Resolution too small: {width}x{height}", project_path / "project.json" ) ) if width > 7680 or height > 4320: issues.append( ValidationIssue( f"Resolution too large: {width}x{height} (max 8K)", project_path / "project.json", ) ) # Check FPS is reasonable if config.fps < 1 or config.fps > 120: issues.append( ValidationIssue( f"Invalid FPS: {config.fps} (must be 1-120)", project_path / "project.json", ) ) # If any issues, raise ValidationError if issues: raise ValidationError(issues) return warnings