Adding some files
This commit is contained in:
+37
-9
@@ -234,7 +234,7 @@ Examples:
|
||||
args.res,
|
||||
)
|
||||
elif action == "trim":
|
||||
return cmd_trim(project_path, args.verbose, args.force, args.threshold)
|
||||
return cmd_trim(project_path, args.verbose, args.force, args.threshold, args.res)
|
||||
elif action == "transcode":
|
||||
return cmd_transcode(
|
||||
project_path,
|
||||
@@ -1197,7 +1197,7 @@ def cmd_preprocess(
|
||||
"""
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
from .parser import parse_project_config, parse_videos
|
||||
from .preprocessor import preprocess_video
|
||||
from .preprocessor import preprocess_video, RES_CONFIGS
|
||||
from .models import VideoSource as _VideoSource
|
||||
|
||||
mode_str = f" ({res.upper()})" if res != "full" else ""
|
||||
@@ -1278,6 +1278,14 @@ def cmd_preprocess(
|
||||
if using_compressed and segment_id.endswith("_compressed"):
|
||||
segment_id = segment_id[: -len("_compressed")]
|
||||
|
||||
# For non-full res, write into the res subdir so stitch --res low finds the
|
||||
# files at narration/low/processed/ (narration.json still records the plain
|
||||
# "processed/..." path; stitch shifts the base dir itself).
|
||||
_res_cfg = RES_CONFIGS.get(res) if res != "full" else None
|
||||
if _res_cfg:
|
||||
_, _, _subdir = _res_cfg
|
||||
output_file = f"{_subdir}/processed/{segment_id}_processed.mov"
|
||||
else:
|
||||
output_file = f"processed/{segment_id}_processed.mov"
|
||||
output_path = narration_dir / output_file
|
||||
|
||||
@@ -1343,6 +1351,7 @@ def cmd_preprocess(
|
||||
verbose=False,
|
||||
force=force,
|
||||
custom_gnommo_scratch=gnommo_scratch,
|
||||
res=res,
|
||||
)
|
||||
return task
|
||||
|
||||
@@ -1371,6 +1380,7 @@ def cmd_preprocess(
|
||||
verbose,
|
||||
force,
|
||||
gnommo_scratch,
|
||||
res=res,
|
||||
)
|
||||
output_path = narration_dir / segment_source.output_file
|
||||
if output_path.exists():
|
||||
@@ -1396,8 +1406,8 @@ def cmd_preprocess(
|
||||
for key in _PRESERVE_KEYS:
|
||||
if key in existing_entry:
|
||||
entry[key] = existing_entry[key]
|
||||
# Point source_file to the processed output
|
||||
entry["source_file"] = segment_source.output_file
|
||||
# Always record the plain path; stitch shifts the base dir for low/tiny.
|
||||
entry["source_file"] = f"processed/{segment_id}_processed.mov"
|
||||
entry.setdefault("use_audio_channels", "auto")
|
||||
entry.setdefault("defer_loudnorm", True)
|
||||
existing_narration[segment_id] = entry
|
||||
@@ -1437,7 +1447,7 @@ def cmd_preprocess(
|
||||
continue
|
||||
print(f" Processing: {video_id}")
|
||||
preprocess_video(
|
||||
videos_dir, video_id, video_source, verbose, force, gnommo_scratch
|
||||
videos_dir, video_id, video_source, verbose, force, gnommo_scratch, res=res
|
||||
)
|
||||
|
||||
print("\nPreprocessing complete.")
|
||||
@@ -1454,6 +1464,7 @@ def cmd_trim(
|
||||
verbose: bool,
|
||||
force: bool = False,
|
||||
threshold_db: float = -40.0,
|
||||
res: str = "full",
|
||||
) -> int:
|
||||
"""
|
||||
Auto-detect silence bounds for all narration segments and write skip/take
|
||||
@@ -1482,6 +1493,22 @@ def cmd_trim(
|
||||
print(" Run 'gnommo -p <project> import' first.")
|
||||
return 1
|
||||
|
||||
# Build a lookup of raw source files by segment ID. Raw files give cleaner
|
||||
# silence detection — loudnorm can introduce early peaks in processed audio.
|
||||
_video_exts = {".mov", ".mp4", ".avi", ".mkv", ".m4v"}
|
||||
raw_dir = narration_dir / "raw_mov"
|
||||
compressed_dir = narration_dir / "raw_mp4"
|
||||
|
||||
raw_lookup: dict[str, Path] = {}
|
||||
for search_dir in (raw_dir, compressed_dir):
|
||||
if search_dir.exists():
|
||||
for f in search_dir.iterdir():
|
||||
if f.is_file() and f.suffix.lower() in _video_exts and not f.name.startswith("."):
|
||||
stem = f.stem
|
||||
if stem.endswith("_compressed"):
|
||||
stem = stem[: -len("_compressed")]
|
||||
raw_lookup[stem] = f
|
||||
|
||||
narration_json_path = narration_dir / "narration.json"
|
||||
raw_data: dict = _read_json(narration_json_path)
|
||||
|
||||
@@ -1495,14 +1522,15 @@ def cmd_trim(
|
||||
print(f" {seg_id}: already trimmed, skipping (use --force to redo)")
|
||||
continue
|
||||
|
||||
# Always analyse the raw source file — it's always present and has the
|
||||
# same audio as any processed version (processing is video-only).
|
||||
# Prefer raw file; fall back to processed if raw not available.
|
||||
source_path = raw_lookup.get(seg_id)
|
||||
if source_path is None:
|
||||
source_path = narration_dir / seg.source_file
|
||||
if not source_path.exists():
|
||||
print(f" {seg_id}: source file not found ({seg.source_file}), skipping")
|
||||
print(f" {seg_id}: source file not found, skipping")
|
||||
continue
|
||||
|
||||
print(f" {seg_id}: analysing...", end="", flush=True)
|
||||
print(f" {seg_id}: analysing {source_path.parent.name}/{source_path.name}...", end="", flush=True)
|
||||
first_sound, last_sound = detect_silence_bounds(
|
||||
source_path, noise_threshold_db=threshold_db, verbose=verbose
|
||||
)
|
||||
|
||||
@@ -550,6 +550,7 @@ def preprocess_video(
|
||||
verbose: bool = False,
|
||||
force: bool = False,
|
||||
custom_gnommo_scratch: Optional[Path] = None,
|
||||
res: str = "full",
|
||||
) -> Path:
|
||||
"""
|
||||
Apply preprocessing filters to a video source.
|
||||
@@ -562,6 +563,7 @@ def preprocess_video(
|
||||
video_id: ID of the video being processed
|
||||
video_source: VideoSource with source_file, filter, and output_file
|
||||
custom_gnommo_scratch: Optional external directory for intermediate files (e.g., SSD)
|
||||
res: Resolution preset — when not "full", source is downscaled before filtering
|
||||
|
||||
Returns:
|
||||
Path to the final preprocessed output file.
|
||||
@@ -586,6 +588,18 @@ def preprocess_video(
|
||||
filter_type=None,
|
||||
)
|
||||
|
||||
# For non-full res, downscale the raw source first so all subsequent
|
||||
# filters (chroma key, color grade, etc.) operate on the small file.
|
||||
if res != "full":
|
||||
cfg = RES_CONFIGS.get(res)
|
||||
if cfg:
|
||||
width, height, _ = cfg
|
||||
print(f" Downscaling source to {width}x{height} ({res})...")
|
||||
raw_low_dir = gnommo_scratch / f"raw_{res}"
|
||||
current_input = create_downscaled_video(
|
||||
current_input, raw_low_dir, width, height, force
|
||||
)
|
||||
|
||||
# Resolve channel setting (auto-detect if needed) and sanity check
|
||||
channel = video_source.use_audio_channels
|
||||
if channel == "auto":
|
||||
|
||||
+77
-20
@@ -182,14 +182,17 @@ def _extract_marker_contexts(
|
||||
slides: dict = None,
|
||||
videos: dict = None,
|
||||
audio: dict = None,
|
||||
) -> list[tuple[str, str, bool]]:
|
||||
) -> list[tuple[str, str, bool, str]]:
|
||||
"""
|
||||
Extract known markers and the text immediately following them from manuscript.
|
||||
|
||||
Unknown markers are filtered out and stripped from following text.
|
||||
Note: [cite:...] markers are already stripped at parse time.
|
||||
|
||||
Returns list of (marker_id, following_text, is_borrowed) tuples for known markers only.
|
||||
Returns list of (marker_id, anchor_text, is_borrowed, anchor_type) tuples.
|
||||
anchor_type is "before" (default — place before the matched phrase) or
|
||||
"after" (place at the end of the matched phrase — used for markers that
|
||||
trail a narration block and have no following text of their own).
|
||||
"""
|
||||
slides = slides or {}
|
||||
videos = videos or {}
|
||||
@@ -227,7 +230,7 @@ def _extract_marker_contexts(
|
||||
for i, (marker_id, following_text) in enumerate(raw_contexts):
|
||||
if following_text:
|
||||
words = following_text.split()[:10]
|
||||
contexts.append((marker_id, " ".join(words), False))
|
||||
contexts.append((marker_id, " ".join(words), False, "before"))
|
||||
else:
|
||||
borrowed = False
|
||||
for j in range(i + 1, len(raw_contexts)):
|
||||
@@ -236,11 +239,24 @@ def _extract_marker_contexts(
|
||||
if next_marker_id in (slides or {}):
|
||||
break
|
||||
words = next_text.split()[:10]
|
||||
contexts.append((marker_id, " ".join(words), True))
|
||||
contexts.append((marker_id, " ".join(words), True, "before"))
|
||||
borrowed = True
|
||||
break
|
||||
if not borrowed:
|
||||
contexts.append((marker_id, "", False))
|
||||
# No following text and blocked by a slide boundary — look
|
||||
# backward for the tail of the preceding narration block and
|
||||
# anchor to the END of those words instead of extrapolating.
|
||||
preceding_text = ""
|
||||
for k in range(i - 1, -1, -1):
|
||||
if raw_contexts[k][1]:
|
||||
preceding_text = raw_contexts[k][1]
|
||||
break
|
||||
if preceding_text:
|
||||
words = preceding_text.split()
|
||||
tail = " ".join(words[-6:])
|
||||
contexts.append((marker_id, tail, False, "after"))
|
||||
else:
|
||||
contexts.append((marker_id, "", False, "before"))
|
||||
|
||||
return contexts
|
||||
|
||||
@@ -250,13 +266,18 @@ def _fuzzy_match_ratio(
|
||||
transcription: list[TranscribedWord],
|
||||
start_idx: int,
|
||||
window_size: int = 10,
|
||||
pre_filler: int = 30,
|
||||
inter_filler: int = 3,
|
||||
) -> tuple[float, int, int]:
|
||||
"""
|
||||
Calculate how many words from phrase match the transcription at start_idx.
|
||||
|
||||
Words are matched sequentially: each phrase word must appear at or after
|
||||
the position of the previous match. This prevents false matches where
|
||||
phrase words appear out of order or far into the window.
|
||||
Words are matched sequentially. Two separate filler tolerances:
|
||||
- pre_filler: max words before the FIRST phrase word (absorbs ad-libs)
|
||||
- inter_filler: max words between consecutive phrase words (keeps the
|
||||
match tight so common words don't stretch the window far
|
||||
into later text, which would push last_idx past subsequent
|
||||
markers' positions)
|
||||
|
||||
Returns (ratio, first_match_offset, last_match_end_offset) where offsets
|
||||
are relative to start_idx. last_match_end_offset points past the last
|
||||
@@ -265,14 +286,13 @@ def _fuzzy_match_ratio(
|
||||
if not phrase_words:
|
||||
return 0.0, 0, 0
|
||||
|
||||
words_to_check = min(len(phrase_words), window_size)
|
||||
# +30 filler allowance: absorbs ad-libbed words spoken before or between
|
||||
# the manuscript cue words without breaking the match ratio.
|
||||
transcript_end = min(start_idx + words_to_check + 30, len(transcription))
|
||||
|
||||
if start_idx >= len(transcription):
|
||||
return 0.0, 0, 0
|
||||
|
||||
words_to_check = min(len(phrase_words), window_size)
|
||||
# Window only needs to cover pre_filler + phrase words + inter_filler slack
|
||||
transcript_end = min(start_idx + pre_filler + words_to_check + inter_filler, len(transcription))
|
||||
|
||||
transcript_words = [
|
||||
_normalize_token(transcription[j].word)
|
||||
for j in range(start_idx, transcript_end)
|
||||
@@ -290,7 +310,14 @@ def _fuzzy_match_ratio(
|
||||
continue
|
||||
words_checked += 1
|
||||
|
||||
for j in range(t_pos, len(transcript_words)):
|
||||
# First phrase word may be preceded by a long ad-lib; subsequent words
|
||||
# should appear within a few positions of each other.
|
||||
if matches == 0:
|
||||
search_end = min(t_pos + pre_filler + 1, len(transcript_words))
|
||||
else:
|
||||
search_end = min(t_pos + inter_filler + 1, len(transcript_words))
|
||||
|
||||
for j in range(t_pos, search_end):
|
||||
t_word = transcript_words[j]
|
||||
matched = False
|
||||
if normalized == t_word:
|
||||
@@ -344,7 +371,11 @@ def _find_phrase_timestamp(
|
||||
best_first_offset = first_offset
|
||||
best_end_offset = end_offset
|
||||
|
||||
if ratio >= 0.95:
|
||||
# Sequential alignment: stop at the first position that clears the
|
||||
# threshold. Continuing to scan the full transcript risks jumping
|
||||
# to a higher-ratio match much later and skipping over subsequent
|
||||
# markers' positions entirely.
|
||||
if best_ratio >= fuzzy_threshold:
|
||||
break
|
||||
|
||||
if best_ratio >= fuzzy_threshold and best_idx >= 0:
|
||||
@@ -393,8 +424,8 @@ def align_markers_to_transcription(
|
||||
last_idx = 0
|
||||
last_end_time = 0.0
|
||||
|
||||
for marker_id, following_text, is_borrowed in contexts:
|
||||
if not following_text.strip():
|
||||
for marker_id, anchor_text, is_borrowed, anchor_type in contexts:
|
||||
if not anchor_text.strip():
|
||||
marker_time = last_end_time + 1.0
|
||||
timings.append(
|
||||
MarkerTiming(
|
||||
@@ -408,19 +439,35 @@ def align_markers_to_transcription(
|
||||
continue
|
||||
|
||||
idx, timestamp, confidence, match_end_idx = _find_phrase_timestamp(
|
||||
following_text,
|
||||
anchor_text,
|
||||
transcription,
|
||||
start_from=last_idx,
|
||||
fuzzy_threshold=fuzzy_threshold,
|
||||
)
|
||||
|
||||
if idx >= 0:
|
||||
if anchor_type == "after":
|
||||
# Marker trails a narration block — place it at the END of the
|
||||
# matched phrase (when those words finish being spoken).
|
||||
end_idx = min(match_end_idx - 1, len(transcription) - 1)
|
||||
marker_time = transcription[end_idx].end if transcription else 0.0
|
||||
timings.append(
|
||||
MarkerTiming(
|
||||
marker_id=marker_id,
|
||||
timestamp=marker_time,
|
||||
context=f"(end of: {anchor_text[:40]})",
|
||||
confidence=confidence,
|
||||
)
|
||||
)
|
||||
last_idx = match_end_idx
|
||||
last_end_time = marker_time
|
||||
else:
|
||||
adjusted_time = max(0.0, timestamp - 0.5)
|
||||
timings.append(
|
||||
MarkerTiming(
|
||||
marker_id=marker_id,
|
||||
timestamp=adjusted_time,
|
||||
context=following_text[:50],
|
||||
context=anchor_text[:50],
|
||||
confidence=confidence,
|
||||
)
|
||||
)
|
||||
@@ -435,7 +482,7 @@ def align_markers_to_transcription(
|
||||
MarkerTiming(
|
||||
marker_id=marker_id,
|
||||
timestamp=-1.0,
|
||||
context=following_text[:50],
|
||||
context=anchor_text[:50],
|
||||
confidence=0.0,
|
||||
)
|
||||
)
|
||||
@@ -696,6 +743,16 @@ def build_render_plan(
|
||||
# Save narration end time (before outro)
|
||||
narration_end_time = total_duration
|
||||
|
||||
# Resolve any outro videos missing from videos.json via shared_assets.
|
||||
if config.outro:
|
||||
missing_outro_ids = [vid_id for vid_id in config.outro if vid_id not in videos]
|
||||
if missing_outro_ids:
|
||||
found = resolve_missing_videos(missing_outro_ids, project_path, config)
|
||||
videos.update(found)
|
||||
still_missing = [vid_id for vid_id in config.outro if vid_id not in videos]
|
||||
for vid_id in still_missing:
|
||||
print(f" WARNING: outro video '{vid_id}' not found in videos.json or shared_assets — skipped", flush=True)
|
||||
|
||||
# Build outro events (plays after narration ends)
|
||||
outro_events = _extract_outro_events(
|
||||
config.outro,
|
||||
|
||||
Reference in New Issue
Block a user