Adding some files
This commit is contained in:
+37
-9
@@ -234,7 +234,7 @@ Examples:
|
|||||||
args.res,
|
args.res,
|
||||||
)
|
)
|
||||||
elif action == "trim":
|
elif action == "trim":
|
||||||
return cmd_trim(project_path, args.verbose, args.force, args.threshold)
|
return cmd_trim(project_path, args.verbose, args.force, args.threshold, args.res)
|
||||||
elif action == "transcode":
|
elif action == "transcode":
|
||||||
return cmd_transcode(
|
return cmd_transcode(
|
||||||
project_path,
|
project_path,
|
||||||
@@ -1197,7 +1197,7 @@ def cmd_preprocess(
|
|||||||
"""
|
"""
|
||||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||||
from .parser import parse_project_config, parse_videos
|
from .parser import parse_project_config, parse_videos
|
||||||
from .preprocessor import preprocess_video
|
from .preprocessor import preprocess_video, RES_CONFIGS
|
||||||
from .models import VideoSource as _VideoSource
|
from .models import VideoSource as _VideoSource
|
||||||
|
|
||||||
mode_str = f" ({res.upper()})" if res != "full" else ""
|
mode_str = f" ({res.upper()})" if res != "full" else ""
|
||||||
@@ -1278,6 +1278,14 @@ def cmd_preprocess(
|
|||||||
if using_compressed and segment_id.endswith("_compressed"):
|
if using_compressed and segment_id.endswith("_compressed"):
|
||||||
segment_id = segment_id[: -len("_compressed")]
|
segment_id = segment_id[: -len("_compressed")]
|
||||||
|
|
||||||
|
# For non-full res, write into the res subdir so stitch --res low finds the
|
||||||
|
# files at narration/low/processed/ (narration.json still records the plain
|
||||||
|
# "processed/..." path; stitch shifts the base dir itself).
|
||||||
|
_res_cfg = RES_CONFIGS.get(res) if res != "full" else None
|
||||||
|
if _res_cfg:
|
||||||
|
_, _, _subdir = _res_cfg
|
||||||
|
output_file = f"{_subdir}/processed/{segment_id}_processed.mov"
|
||||||
|
else:
|
||||||
output_file = f"processed/{segment_id}_processed.mov"
|
output_file = f"processed/{segment_id}_processed.mov"
|
||||||
output_path = narration_dir / output_file
|
output_path = narration_dir / output_file
|
||||||
|
|
||||||
@@ -1343,6 +1351,7 @@ def cmd_preprocess(
|
|||||||
verbose=False,
|
verbose=False,
|
||||||
force=force,
|
force=force,
|
||||||
custom_gnommo_scratch=gnommo_scratch,
|
custom_gnommo_scratch=gnommo_scratch,
|
||||||
|
res=res,
|
||||||
)
|
)
|
||||||
return task
|
return task
|
||||||
|
|
||||||
@@ -1371,6 +1380,7 @@ def cmd_preprocess(
|
|||||||
verbose,
|
verbose,
|
||||||
force,
|
force,
|
||||||
gnommo_scratch,
|
gnommo_scratch,
|
||||||
|
res=res,
|
||||||
)
|
)
|
||||||
output_path = narration_dir / segment_source.output_file
|
output_path = narration_dir / segment_source.output_file
|
||||||
if output_path.exists():
|
if output_path.exists():
|
||||||
@@ -1396,8 +1406,8 @@ def cmd_preprocess(
|
|||||||
for key in _PRESERVE_KEYS:
|
for key in _PRESERVE_KEYS:
|
||||||
if key in existing_entry:
|
if key in existing_entry:
|
||||||
entry[key] = existing_entry[key]
|
entry[key] = existing_entry[key]
|
||||||
# Point source_file to the processed output
|
# Always record the plain path; stitch shifts the base dir for low/tiny.
|
||||||
entry["source_file"] = segment_source.output_file
|
entry["source_file"] = f"processed/{segment_id}_processed.mov"
|
||||||
entry.setdefault("use_audio_channels", "auto")
|
entry.setdefault("use_audio_channels", "auto")
|
||||||
entry.setdefault("defer_loudnorm", True)
|
entry.setdefault("defer_loudnorm", True)
|
||||||
existing_narration[segment_id] = entry
|
existing_narration[segment_id] = entry
|
||||||
@@ -1437,7 +1447,7 @@ def cmd_preprocess(
|
|||||||
continue
|
continue
|
||||||
print(f" Processing: {video_id}")
|
print(f" Processing: {video_id}")
|
||||||
preprocess_video(
|
preprocess_video(
|
||||||
videos_dir, video_id, video_source, verbose, force, gnommo_scratch
|
videos_dir, video_id, video_source, verbose, force, gnommo_scratch, res=res
|
||||||
)
|
)
|
||||||
|
|
||||||
print("\nPreprocessing complete.")
|
print("\nPreprocessing complete.")
|
||||||
@@ -1454,6 +1464,7 @@ def cmd_trim(
|
|||||||
verbose: bool,
|
verbose: bool,
|
||||||
force: bool = False,
|
force: bool = False,
|
||||||
threshold_db: float = -40.0,
|
threshold_db: float = -40.0,
|
||||||
|
res: str = "full",
|
||||||
) -> int:
|
) -> int:
|
||||||
"""
|
"""
|
||||||
Auto-detect silence bounds for all narration segments and write skip/take
|
Auto-detect silence bounds for all narration segments and write skip/take
|
||||||
@@ -1482,6 +1493,22 @@ def cmd_trim(
|
|||||||
print(" Run 'gnommo -p <project> import' first.")
|
print(" Run 'gnommo -p <project> import' first.")
|
||||||
return 1
|
return 1
|
||||||
|
|
||||||
|
# Build a lookup of raw source files by segment ID. Raw files give cleaner
|
||||||
|
# silence detection — loudnorm can introduce early peaks in processed audio.
|
||||||
|
_video_exts = {".mov", ".mp4", ".avi", ".mkv", ".m4v"}
|
||||||
|
raw_dir = narration_dir / "raw_mov"
|
||||||
|
compressed_dir = narration_dir / "raw_mp4"
|
||||||
|
|
||||||
|
raw_lookup: dict[str, Path] = {}
|
||||||
|
for search_dir in (raw_dir, compressed_dir):
|
||||||
|
if search_dir.exists():
|
||||||
|
for f in search_dir.iterdir():
|
||||||
|
if f.is_file() and f.suffix.lower() in _video_exts and not f.name.startswith("."):
|
||||||
|
stem = f.stem
|
||||||
|
if stem.endswith("_compressed"):
|
||||||
|
stem = stem[: -len("_compressed")]
|
||||||
|
raw_lookup[stem] = f
|
||||||
|
|
||||||
narration_json_path = narration_dir / "narration.json"
|
narration_json_path = narration_dir / "narration.json"
|
||||||
raw_data: dict = _read_json(narration_json_path)
|
raw_data: dict = _read_json(narration_json_path)
|
||||||
|
|
||||||
@@ -1495,14 +1522,15 @@ def cmd_trim(
|
|||||||
print(f" {seg_id}: already trimmed, skipping (use --force to redo)")
|
print(f" {seg_id}: already trimmed, skipping (use --force to redo)")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Always analyse the raw source file — it's always present and has the
|
# Prefer raw file; fall back to processed if raw not available.
|
||||||
# same audio as any processed version (processing is video-only).
|
source_path = raw_lookup.get(seg_id)
|
||||||
|
if source_path is None:
|
||||||
source_path = narration_dir / seg.source_file
|
source_path = narration_dir / seg.source_file
|
||||||
if not source_path.exists():
|
if not source_path.exists():
|
||||||
print(f" {seg_id}: source file not found ({seg.source_file}), skipping")
|
print(f" {seg_id}: source file not found, skipping")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
print(f" {seg_id}: analysing...", end="", flush=True)
|
print(f" {seg_id}: analysing {source_path.parent.name}/{source_path.name}...", end="", flush=True)
|
||||||
first_sound, last_sound = detect_silence_bounds(
|
first_sound, last_sound = detect_silence_bounds(
|
||||||
source_path, noise_threshold_db=threshold_db, verbose=verbose
|
source_path, noise_threshold_db=threshold_db, verbose=verbose
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -550,6 +550,7 @@ def preprocess_video(
|
|||||||
verbose: bool = False,
|
verbose: bool = False,
|
||||||
force: bool = False,
|
force: bool = False,
|
||||||
custom_gnommo_scratch: Optional[Path] = None,
|
custom_gnommo_scratch: Optional[Path] = None,
|
||||||
|
res: str = "full",
|
||||||
) -> Path:
|
) -> Path:
|
||||||
"""
|
"""
|
||||||
Apply preprocessing filters to a video source.
|
Apply preprocessing filters to a video source.
|
||||||
@@ -562,6 +563,7 @@ def preprocess_video(
|
|||||||
video_id: ID of the video being processed
|
video_id: ID of the video being processed
|
||||||
video_source: VideoSource with source_file, filter, and output_file
|
video_source: VideoSource with source_file, filter, and output_file
|
||||||
custom_gnommo_scratch: Optional external directory for intermediate files (e.g., SSD)
|
custom_gnommo_scratch: Optional external directory for intermediate files (e.g., SSD)
|
||||||
|
res: Resolution preset — when not "full", source is downscaled before filtering
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Path to the final preprocessed output file.
|
Path to the final preprocessed output file.
|
||||||
@@ -586,6 +588,18 @@ def preprocess_video(
|
|||||||
filter_type=None,
|
filter_type=None,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# For non-full res, downscale the raw source first so all subsequent
|
||||||
|
# filters (chroma key, color grade, etc.) operate on the small file.
|
||||||
|
if res != "full":
|
||||||
|
cfg = RES_CONFIGS.get(res)
|
||||||
|
if cfg:
|
||||||
|
width, height, _ = cfg
|
||||||
|
print(f" Downscaling source to {width}x{height} ({res})...")
|
||||||
|
raw_low_dir = gnommo_scratch / f"raw_{res}"
|
||||||
|
current_input = create_downscaled_video(
|
||||||
|
current_input, raw_low_dir, width, height, force
|
||||||
|
)
|
||||||
|
|
||||||
# Resolve channel setting (auto-detect if needed) and sanity check
|
# Resolve channel setting (auto-detect if needed) and sanity check
|
||||||
channel = video_source.use_audio_channels
|
channel = video_source.use_audio_channels
|
||||||
if channel == "auto":
|
if channel == "auto":
|
||||||
|
|||||||
+77
-20
@@ -182,14 +182,17 @@ def _extract_marker_contexts(
|
|||||||
slides: dict = None,
|
slides: dict = None,
|
||||||
videos: dict = None,
|
videos: dict = None,
|
||||||
audio: dict = None,
|
audio: dict = None,
|
||||||
) -> list[tuple[str, str, bool]]:
|
) -> list[tuple[str, str, bool, str]]:
|
||||||
"""
|
"""
|
||||||
Extract known markers and the text immediately following them from manuscript.
|
Extract known markers and the text immediately following them from manuscript.
|
||||||
|
|
||||||
Unknown markers are filtered out and stripped from following text.
|
Unknown markers are filtered out and stripped from following text.
|
||||||
Note: [cite:...] markers are already stripped at parse time.
|
Note: [cite:...] markers are already stripped at parse time.
|
||||||
|
|
||||||
Returns list of (marker_id, following_text, is_borrowed) tuples for known markers only.
|
Returns list of (marker_id, anchor_text, is_borrowed, anchor_type) tuples.
|
||||||
|
anchor_type is "before" (default — place before the matched phrase) or
|
||||||
|
"after" (place at the end of the matched phrase — used for markers that
|
||||||
|
trail a narration block and have no following text of their own).
|
||||||
"""
|
"""
|
||||||
slides = slides or {}
|
slides = slides or {}
|
||||||
videos = videos or {}
|
videos = videos or {}
|
||||||
@@ -227,7 +230,7 @@ def _extract_marker_contexts(
|
|||||||
for i, (marker_id, following_text) in enumerate(raw_contexts):
|
for i, (marker_id, following_text) in enumerate(raw_contexts):
|
||||||
if following_text:
|
if following_text:
|
||||||
words = following_text.split()[:10]
|
words = following_text.split()[:10]
|
||||||
contexts.append((marker_id, " ".join(words), False))
|
contexts.append((marker_id, " ".join(words), False, "before"))
|
||||||
else:
|
else:
|
||||||
borrowed = False
|
borrowed = False
|
||||||
for j in range(i + 1, len(raw_contexts)):
|
for j in range(i + 1, len(raw_contexts)):
|
||||||
@@ -236,11 +239,24 @@ def _extract_marker_contexts(
|
|||||||
if next_marker_id in (slides or {}):
|
if next_marker_id in (slides or {}):
|
||||||
break
|
break
|
||||||
words = next_text.split()[:10]
|
words = next_text.split()[:10]
|
||||||
contexts.append((marker_id, " ".join(words), True))
|
contexts.append((marker_id, " ".join(words), True, "before"))
|
||||||
borrowed = True
|
borrowed = True
|
||||||
break
|
break
|
||||||
if not borrowed:
|
if not borrowed:
|
||||||
contexts.append((marker_id, "", False))
|
# No following text and blocked by a slide boundary — look
|
||||||
|
# backward for the tail of the preceding narration block and
|
||||||
|
# anchor to the END of those words instead of extrapolating.
|
||||||
|
preceding_text = ""
|
||||||
|
for k in range(i - 1, -1, -1):
|
||||||
|
if raw_contexts[k][1]:
|
||||||
|
preceding_text = raw_contexts[k][1]
|
||||||
|
break
|
||||||
|
if preceding_text:
|
||||||
|
words = preceding_text.split()
|
||||||
|
tail = " ".join(words[-6:])
|
||||||
|
contexts.append((marker_id, tail, False, "after"))
|
||||||
|
else:
|
||||||
|
contexts.append((marker_id, "", False, "before"))
|
||||||
|
|
||||||
return contexts
|
return contexts
|
||||||
|
|
||||||
@@ -250,13 +266,18 @@ def _fuzzy_match_ratio(
|
|||||||
transcription: list[TranscribedWord],
|
transcription: list[TranscribedWord],
|
||||||
start_idx: int,
|
start_idx: int,
|
||||||
window_size: int = 10,
|
window_size: int = 10,
|
||||||
|
pre_filler: int = 30,
|
||||||
|
inter_filler: int = 3,
|
||||||
) -> tuple[float, int, int]:
|
) -> tuple[float, int, int]:
|
||||||
"""
|
"""
|
||||||
Calculate how many words from phrase match the transcription at start_idx.
|
Calculate how many words from phrase match the transcription at start_idx.
|
||||||
|
|
||||||
Words are matched sequentially: each phrase word must appear at or after
|
Words are matched sequentially. Two separate filler tolerances:
|
||||||
the position of the previous match. This prevents false matches where
|
- pre_filler: max words before the FIRST phrase word (absorbs ad-libs)
|
||||||
phrase words appear out of order or far into the window.
|
- inter_filler: max words between consecutive phrase words (keeps the
|
||||||
|
match tight so common words don't stretch the window far
|
||||||
|
into later text, which would push last_idx past subsequent
|
||||||
|
markers' positions)
|
||||||
|
|
||||||
Returns (ratio, first_match_offset, last_match_end_offset) where offsets
|
Returns (ratio, first_match_offset, last_match_end_offset) where offsets
|
||||||
are relative to start_idx. last_match_end_offset points past the last
|
are relative to start_idx. last_match_end_offset points past the last
|
||||||
@@ -265,14 +286,13 @@ def _fuzzy_match_ratio(
|
|||||||
if not phrase_words:
|
if not phrase_words:
|
||||||
return 0.0, 0, 0
|
return 0.0, 0, 0
|
||||||
|
|
||||||
words_to_check = min(len(phrase_words), window_size)
|
|
||||||
# +30 filler allowance: absorbs ad-libbed words spoken before or between
|
|
||||||
# the manuscript cue words without breaking the match ratio.
|
|
||||||
transcript_end = min(start_idx + words_to_check + 30, len(transcription))
|
|
||||||
|
|
||||||
if start_idx >= len(transcription):
|
if start_idx >= len(transcription):
|
||||||
return 0.0, 0, 0
|
return 0.0, 0, 0
|
||||||
|
|
||||||
|
words_to_check = min(len(phrase_words), window_size)
|
||||||
|
# Window only needs to cover pre_filler + phrase words + inter_filler slack
|
||||||
|
transcript_end = min(start_idx + pre_filler + words_to_check + inter_filler, len(transcription))
|
||||||
|
|
||||||
transcript_words = [
|
transcript_words = [
|
||||||
_normalize_token(transcription[j].word)
|
_normalize_token(transcription[j].word)
|
||||||
for j in range(start_idx, transcript_end)
|
for j in range(start_idx, transcript_end)
|
||||||
@@ -290,7 +310,14 @@ def _fuzzy_match_ratio(
|
|||||||
continue
|
continue
|
||||||
words_checked += 1
|
words_checked += 1
|
||||||
|
|
||||||
for j in range(t_pos, len(transcript_words)):
|
# First phrase word may be preceded by a long ad-lib; subsequent words
|
||||||
|
# should appear within a few positions of each other.
|
||||||
|
if matches == 0:
|
||||||
|
search_end = min(t_pos + pre_filler + 1, len(transcript_words))
|
||||||
|
else:
|
||||||
|
search_end = min(t_pos + inter_filler + 1, len(transcript_words))
|
||||||
|
|
||||||
|
for j in range(t_pos, search_end):
|
||||||
t_word = transcript_words[j]
|
t_word = transcript_words[j]
|
||||||
matched = False
|
matched = False
|
||||||
if normalized == t_word:
|
if normalized == t_word:
|
||||||
@@ -344,7 +371,11 @@ def _find_phrase_timestamp(
|
|||||||
best_first_offset = first_offset
|
best_first_offset = first_offset
|
||||||
best_end_offset = end_offset
|
best_end_offset = end_offset
|
||||||
|
|
||||||
if ratio >= 0.95:
|
# Sequential alignment: stop at the first position that clears the
|
||||||
|
# threshold. Continuing to scan the full transcript risks jumping
|
||||||
|
# to a higher-ratio match much later and skipping over subsequent
|
||||||
|
# markers' positions entirely.
|
||||||
|
if best_ratio >= fuzzy_threshold:
|
||||||
break
|
break
|
||||||
|
|
||||||
if best_ratio >= fuzzy_threshold and best_idx >= 0:
|
if best_ratio >= fuzzy_threshold and best_idx >= 0:
|
||||||
@@ -393,8 +424,8 @@ def align_markers_to_transcription(
|
|||||||
last_idx = 0
|
last_idx = 0
|
||||||
last_end_time = 0.0
|
last_end_time = 0.0
|
||||||
|
|
||||||
for marker_id, following_text, is_borrowed in contexts:
|
for marker_id, anchor_text, is_borrowed, anchor_type in contexts:
|
||||||
if not following_text.strip():
|
if not anchor_text.strip():
|
||||||
marker_time = last_end_time + 1.0
|
marker_time = last_end_time + 1.0
|
||||||
timings.append(
|
timings.append(
|
||||||
MarkerTiming(
|
MarkerTiming(
|
||||||
@@ -408,19 +439,35 @@ def align_markers_to_transcription(
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
idx, timestamp, confidence, match_end_idx = _find_phrase_timestamp(
|
idx, timestamp, confidence, match_end_idx = _find_phrase_timestamp(
|
||||||
following_text,
|
anchor_text,
|
||||||
transcription,
|
transcription,
|
||||||
start_from=last_idx,
|
start_from=last_idx,
|
||||||
fuzzy_threshold=fuzzy_threshold,
|
fuzzy_threshold=fuzzy_threshold,
|
||||||
)
|
)
|
||||||
|
|
||||||
if idx >= 0:
|
if idx >= 0:
|
||||||
|
if anchor_type == "after":
|
||||||
|
# Marker trails a narration block — place it at the END of the
|
||||||
|
# matched phrase (when those words finish being spoken).
|
||||||
|
end_idx = min(match_end_idx - 1, len(transcription) - 1)
|
||||||
|
marker_time = transcription[end_idx].end if transcription else 0.0
|
||||||
|
timings.append(
|
||||||
|
MarkerTiming(
|
||||||
|
marker_id=marker_id,
|
||||||
|
timestamp=marker_time,
|
||||||
|
context=f"(end of: {anchor_text[:40]})",
|
||||||
|
confidence=confidence,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
last_idx = match_end_idx
|
||||||
|
last_end_time = marker_time
|
||||||
|
else:
|
||||||
adjusted_time = max(0.0, timestamp - 0.5)
|
adjusted_time = max(0.0, timestamp - 0.5)
|
||||||
timings.append(
|
timings.append(
|
||||||
MarkerTiming(
|
MarkerTiming(
|
||||||
marker_id=marker_id,
|
marker_id=marker_id,
|
||||||
timestamp=adjusted_time,
|
timestamp=adjusted_time,
|
||||||
context=following_text[:50],
|
context=anchor_text[:50],
|
||||||
confidence=confidence,
|
confidence=confidence,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
@@ -435,7 +482,7 @@ def align_markers_to_transcription(
|
|||||||
MarkerTiming(
|
MarkerTiming(
|
||||||
marker_id=marker_id,
|
marker_id=marker_id,
|
||||||
timestamp=-1.0,
|
timestamp=-1.0,
|
||||||
context=following_text[:50],
|
context=anchor_text[:50],
|
||||||
confidence=0.0,
|
confidence=0.0,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
@@ -696,6 +743,16 @@ def build_render_plan(
|
|||||||
# Save narration end time (before outro)
|
# Save narration end time (before outro)
|
||||||
narration_end_time = total_duration
|
narration_end_time = total_duration
|
||||||
|
|
||||||
|
# Resolve any outro videos missing from videos.json via shared_assets.
|
||||||
|
if config.outro:
|
||||||
|
missing_outro_ids = [vid_id for vid_id in config.outro if vid_id not in videos]
|
||||||
|
if missing_outro_ids:
|
||||||
|
found = resolve_missing_videos(missing_outro_ids, project_path, config)
|
||||||
|
videos.update(found)
|
||||||
|
still_missing = [vid_id for vid_id in config.outro if vid_id not in videos]
|
||||||
|
for vid_id in still_missing:
|
||||||
|
print(f" WARNING: outro video '{vid_id}' not found in videos.json or shared_assets — skipped", flush=True)
|
||||||
|
|
||||||
# Build outro events (plays after narration ends)
|
# Build outro events (plays after narration ends)
|
||||||
outro_events = _extract_outro_events(
|
outro_events = _extract_outro_events(
|
||||||
config.outro,
|
config.outro,
|
||||||
|
|||||||
Reference in New Issue
Block a user