Add configurable slides path and malformed marker detection
- project.json now supports "slides" field pointing to slides.json location - Slide images are loaded from same directory as slides.json - Validation detects malformed markers (missing ], extra spaces) - Reports line numbers for each malformed marker Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
+6
-6
@@ -112,13 +112,13 @@ def cmd_validate(project_path: Path) -> int:
|
|||||||
print(f"Validating project: {project_path}")
|
print(f"Validating project: {project_path}")
|
||||||
|
|
||||||
# Parse all files
|
# Parse all files
|
||||||
_, markers = parse_manuscript(project_path)
|
_, markers, malformed = parse_manuscript(project_path)
|
||||||
config = parse_project_config(project_path)
|
config = parse_project_config(project_path)
|
||||||
slides = parse_slides(project_path)
|
slides = parse_slides(project_path, config)
|
||||||
videos = parse_videos(project_path)
|
videos = parse_videos(project_path)
|
||||||
|
|
||||||
# Validate
|
# Validate
|
||||||
validate_project(project_path, markers, config, slides, videos)
|
validate_project(project_path, markers, config, slides, videos, malformed)
|
||||||
|
|
||||||
print("Validation passed.")
|
print("Validation passed.")
|
||||||
return 0
|
return 0
|
||||||
@@ -132,9 +132,9 @@ def cmd_render(project_path: Path, output_path: Path, verbose: bool, dry_run: bo
|
|||||||
|
|
||||||
# Stage 1: Extract
|
# Stage 1: Extract
|
||||||
print("Stage 1/4: Parsing input files...")
|
print("Stage 1/4: Parsing input files...")
|
||||||
_, markers = parse_manuscript(project_path)
|
_, markers, malformed = parse_manuscript(project_path)
|
||||||
config = parse_project_config(project_path)
|
config = parse_project_config(project_path)
|
||||||
slides = parse_slides(project_path)
|
slides = parse_slides(project_path, config)
|
||||||
videos = parse_videos(project_path)
|
videos = parse_videos(project_path)
|
||||||
transcript = parse_transcript(project_path)
|
transcript = parse_transcript(project_path)
|
||||||
|
|
||||||
@@ -145,7 +145,7 @@ def cmd_render(project_path: Path, output_path: Path, verbose: bool, dry_run: bo
|
|||||||
|
|
||||||
# Stage 2: Validate
|
# Stage 2: Validate
|
||||||
print("Stage 2/4: Validating...")
|
print("Stage 2/4: Validating...")
|
||||||
validate_project(project_path, markers, config, slides, videos)
|
validate_project(project_path, markers, config, slides, videos, malformed)
|
||||||
print(" - Validation passed")
|
print(" - Validation passed")
|
||||||
print()
|
print()
|
||||||
|
|
||||||
|
|||||||
@@ -21,6 +21,7 @@ class ProjectConfig:
|
|||||||
talking_head: TalkingHeadConfig
|
talking_head: TalkingHeadConfig
|
||||||
default_slide_type: str
|
default_slide_type: str
|
||||||
background_video: str
|
background_video: str
|
||||||
|
slides_path: str = "slides.json" # path to slides.json relative to project
|
||||||
audio_source: Optional[str] = None # defaults to talking head
|
audio_source: Optional[str] = None # defaults to talking head
|
||||||
|
|
||||||
|
|
||||||
@@ -75,6 +76,7 @@ class RenderPlan:
|
|||||||
slide_events: list[SlideEvent]
|
slide_events: list[SlideEvent]
|
||||||
total_duration: float
|
total_duration: float
|
||||||
slides: dict[str, SlideDefinition]
|
slides: dict[str, SlideDefinition]
|
||||||
|
slides_dir: Path = None # directory containing slide images
|
||||||
|
|
||||||
|
|
||||||
# Slide layout configurations (hardcoded for POC)
|
# Slide layout configurations (hardcoded for POC)
|
||||||
|
|||||||
+32
-6
@@ -16,12 +16,12 @@ from .models import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def parse_manuscript(project_path: Path) -> tuple[str, list[str]]:
|
def parse_manuscript(project_path: Path) -> tuple[str, list[str], list[tuple[int, str]]]:
|
||||||
"""
|
"""
|
||||||
Parse manuscript.txt and extract text content and slide markers.
|
Parse manuscript.txt and extract text content and slide markers.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Tuple of (full text, list of marker IDs found)
|
Tuple of (full text, list of marker IDs found, list of malformed markers as (line_num, text))
|
||||||
"""
|
"""
|
||||||
manuscript_path = project_path / "manuscript.txt"
|
manuscript_path = project_path / "manuscript.txt"
|
||||||
|
|
||||||
@@ -30,10 +30,32 @@ def parse_manuscript(project_path: Path) -> tuple[str, list[str]]:
|
|||||||
|
|
||||||
text = manuscript_path.read_text(encoding="utf-8")
|
text = manuscript_path.read_text(encoding="utf-8")
|
||||||
|
|
||||||
# Extract all slide markers like [S1], [S2], etc.
|
# Extract all valid slide markers like [S1], [S2], etc.
|
||||||
markers = re.findall(r"\[([A-Za-z0-9_]+)\]", text)
|
markers = re.findall(r"\[([A-Za-z0-9_]+)\]", text)
|
||||||
|
|
||||||
return text, markers
|
# Find malformed markers (missing brackets, extra spaces, etc.)
|
||||||
|
malformed: list[tuple[int, str]] = []
|
||||||
|
lines = text.split("\n")
|
||||||
|
|
||||||
|
for line_num, line in enumerate(lines, start=1):
|
||||||
|
# Pattern for potential markers that are malformed:
|
||||||
|
# - Missing closing bracket: [S1 or [S12 (not followed by ])
|
||||||
|
# - Extra spaces: [S 1] or [S1 ] or [ S1]
|
||||||
|
|
||||||
|
# Find unclosed brackets: [S followed by digits, then space/newline/EOF (not ])
|
||||||
|
# Match [S1, [S12, [S123 etc that are NOT followed by ]
|
||||||
|
for match in re.finditer(r"\[S\d+", line):
|
||||||
|
start, end = match.span()
|
||||||
|
# Check if there's a ] immediately after
|
||||||
|
if end >= len(line) or line[end] != "]":
|
||||||
|
malformed.append((line_num, match.group()))
|
||||||
|
|
||||||
|
# Find markers with internal/trailing spaces like [S 1] or [S1 ] or [ S1]
|
||||||
|
spaced = re.findall(r"\[\s+S\d+\s*\]|\[S\d+\s+\]|\[S\s+\d+\]", line)
|
||||||
|
for match in spaced:
|
||||||
|
malformed.append((line_num, match))
|
||||||
|
|
||||||
|
return text, markers, malformed
|
||||||
|
|
||||||
|
|
||||||
def parse_transcript(project_path: Path) -> list[TimedWord]:
|
def parse_transcript(project_path: Path) -> list[TimedWord]:
|
||||||
@@ -108,6 +130,7 @@ def parse_project_config(project_path: Path) -> ProjectConfig:
|
|||||||
talking_head=talking_head,
|
talking_head=talking_head,
|
||||||
default_slide_type=data.get("defaultSlideType", "square"),
|
default_slide_type=data.get("defaultSlideType", "square"),
|
||||||
background_video=data.get("background_video", ""),
|
background_video=data.get("background_video", ""),
|
||||||
|
slides_path=data.get("slides", "slides.json"),
|
||||||
audio_source=data.get("audio_source"),
|
audio_source=data.get("audio_source"),
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -123,12 +146,15 @@ def _parse_dimension(value: Any) -> int:
|
|||||||
return 200 # default
|
return 200 # default
|
||||||
|
|
||||||
|
|
||||||
def parse_slides(project_path: Path) -> dict[str, SlideDefinition]:
|
def parse_slides(project_path: Path, config: ProjectConfig = None) -> dict[str, SlideDefinition]:
|
||||||
"""Parse slides.json into slide definitions."""
|
"""Parse slides.json into slide definitions."""
|
||||||
|
if config and config.slides_path:
|
||||||
|
slides_path = project_path / config.slides_path
|
||||||
|
else:
|
||||||
slides_path = project_path / "slides.json"
|
slides_path = project_path / "slides.json"
|
||||||
|
|
||||||
if not slides_path.exists():
|
if not slides_path.exists():
|
||||||
raise ParseError("slides.json not found", slides_path)
|
raise ParseError(f"slides file not found: {slides_path}", slides_path)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
data = json.loads(slides_path.read_text(encoding="utf-8"))
|
data = json.loads(slides_path.read_text(encoding="utf-8"))
|
||||||
|
|||||||
+3
-3
@@ -59,13 +59,13 @@ def build_ffmpeg_command(plan: RenderPlan, output_path: Path) -> list[str]:
|
|||||||
bg_path = project_path / plan.config.background_video
|
bg_path = project_path / plan.config.background_video
|
||||||
cmd.extend(["-i", str(bg_path)])
|
cmd.extend(["-i", str(bg_path)])
|
||||||
|
|
||||||
# Input: slide images
|
# Input: slide images (from slides_dir, same directory as slides.json)
|
||||||
slides_path = project_path / "media" / "slides"
|
slides_dir = plan.slides_dir.resolve() if plan.slides_dir else project_path / "media" / "slides"
|
||||||
slide_inputs: list[str] = [] # Track which slides we've added
|
slide_inputs: list[str] = [] # Track which slides we've added
|
||||||
|
|
||||||
for event in plan.slide_events:
|
for event in plan.slide_events:
|
||||||
if event.slide_id not in slide_inputs:
|
if event.slide_id not in slide_inputs:
|
||||||
image_path = slides_path / event.slide_def.image
|
image_path = slides_dir / event.slide_def.image
|
||||||
cmd.extend(["-i", str(image_path)])
|
cmd.extend(["-i", str(image_path)])
|
||||||
slide_inputs.append(event.slide_id)
|
slide_inputs.append(event.slide_id)
|
||||||
|
|
||||||
|
|||||||
@@ -37,6 +37,10 @@ def build_render_plan(
|
|||||||
# Build slide events from transcript markers
|
# Build slide events from transcript markers
|
||||||
slide_events = _extract_slide_events(transcript, slides, total_duration)
|
slide_events = _extract_slide_events(transcript, slides, total_duration)
|
||||||
|
|
||||||
|
# Derive slides directory from slides_path
|
||||||
|
slides_json_path = project_path / config.slides_path
|
||||||
|
slides_dir = slides_json_path.parent
|
||||||
|
|
||||||
return RenderPlan(
|
return RenderPlan(
|
||||||
project_path=project_path,
|
project_path=project_path,
|
||||||
config=config,
|
config=config,
|
||||||
@@ -44,6 +48,7 @@ def build_render_plan(
|
|||||||
slide_events=slide_events,
|
slide_events=slide_events,
|
||||||
total_duration=total_duration,
|
total_duration=total_duration,
|
||||||
slides=slides,
|
slides=slides,
|
||||||
|
slides_dir=slides_dir,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
+16
-4
@@ -12,6 +12,7 @@ def validate_project(
|
|||||||
config: ProjectConfig,
|
config: ProjectConfig,
|
||||||
slides: dict[str, SlideDefinition],
|
slides: dict[str, SlideDefinition],
|
||||||
videos: dict[str, VideoSource],
|
videos: dict[str, VideoSource],
|
||||||
|
malformed_markers: list[tuple[int, str]] = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""
|
"""
|
||||||
Validate all parsed project data. Raises ValidationError if any issues found.
|
Validate all parsed project data. Raises ValidationError if any issues found.
|
||||||
@@ -22,9 +23,19 @@ def validate_project(
|
|||||||
- All video files exist on disk
|
- All video files exist on disk
|
||||||
- Background video exists (if specified)
|
- Background video exists (if specified)
|
||||||
- Slide types are valid
|
- Slide types are valid
|
||||||
|
- No malformed markers in manuscript
|
||||||
"""
|
"""
|
||||||
issues: list[ValidationIssue] = []
|
issues: list[ValidationIssue] = []
|
||||||
|
|
||||||
|
# Check for malformed markers first (these are likely typos)
|
||||||
|
if malformed_markers:
|
||||||
|
for line_num, marker_text in malformed_markers:
|
||||||
|
issues.append(ValidationIssue(
|
||||||
|
f"Malformed marker: {marker_text}",
|
||||||
|
project_path / "manuscript.txt",
|
||||||
|
line_num
|
||||||
|
))
|
||||||
|
|
||||||
# Check all manuscript markers have corresponding slides
|
# Check all manuscript markers have corresponding slides
|
||||||
for marker in manuscript_markers:
|
for marker in manuscript_markers:
|
||||||
if marker not in slides:
|
if marker not in slides:
|
||||||
@@ -34,15 +45,16 @@ def validate_project(
|
|||||||
))
|
))
|
||||||
|
|
||||||
# Check all slide images exist
|
# Check all slide images exist
|
||||||
media_path = project_path / "media"
|
# Slides are in the same directory as the slides.json file
|
||||||
slides_path = media_path / "slides"
|
slides_json_path = project_path / config.slides_path
|
||||||
|
slides_dir = slides_json_path.parent
|
||||||
|
|
||||||
for slide_id, slide_def in slides.items():
|
for slide_id, slide_def in slides.items():
|
||||||
image_path = slides_path / slide_def.image
|
image_path = slides_dir / slide_def.image
|
||||||
if not image_path.exists():
|
if not image_path.exists():
|
||||||
issues.append(ValidationIssue(
|
issues.append(ValidationIssue(
|
||||||
f"Slide image not found: {slide_def.image}",
|
f"Slide image not found: {slide_def.image}",
|
||||||
project_path / "slides.json"
|
slides_json_path
|
||||||
))
|
))
|
||||||
|
|
||||||
# Check slide type is valid
|
# Check slide type is valid
|
||||||
|
|||||||
Reference in New Issue
Block a user