Compare commits
33 Commits
4b4d6caacf
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
| b9b5a8e77d | |||
| 980bb84dac | |||
| 20aba06be1 | |||
| 12b052eb1d | |||
| cf40a19b4e | |||
| 5d7c77db91 | |||
| 87424a6531 | |||
| 60e2f20b0f | |||
| 4a24d3987f | |||
| 7c53daec8a | |||
| 41d96501b6 | |||
| ff47ffea8f | |||
| b4c48d81b0 | |||
| 409d7790c0 | |||
| 994a2e0bb6 | |||
| feb4df0506 | |||
| b9376cd650 | |||
| 0c2d097cdf | |||
| 2dff8f45b9 | |||
| 00e01237ed | |||
| 3a9e5d17e9 | |||
| dac6dfc48b | |||
| a351022a8f | |||
| efd1eba5df | |||
| ad07de2e9a | |||
| e6a6968109 | |||
| d722272edc | |||
| f8d359543a | |||
| 12bf494f2d | |||
| 831c0c4e60 | |||
| f0387f24bb | |||
| 26d027a44e | |||
| 2516e3eeef |
+21
-2
@@ -7,16 +7,35 @@ __pycache__/
|
|||||||
venv/
|
venv/
|
||||||
.venv/
|
.venv/
|
||||||
*.egg-info/
|
*.egg-info/
|
||||||
|
*.pdf
|
||||||
Video1/*
|
*.png
|
||||||
|
*.key
|
||||||
|
*.bak
|
||||||
|
shared_assets/*
|
||||||
|
Video*/*
|
||||||
|
Illustrations
|
||||||
# OS
|
# OS
|
||||||
.DS_Store
|
.DS_Store
|
||||||
Thumbs.db
|
Thumbs.db
|
||||||
|
*/intermediate/*
|
||||||
|
|
||||||
# Output
|
# Output
|
||||||
**/out/
|
**/out/
|
||||||
*.mp4
|
*.mp4
|
||||||
|
*.mov
|
||||||
|
*.mp3
|
||||||
|
*.aifc
|
||||||
|
*.wav
|
||||||
|
|
||||||
|
|
||||||
# Temp
|
# Temp
|
||||||
*.tmp
|
*.tmp
|
||||||
.cache/
|
.cache/
|
||||||
|
|
||||||
|
# Secrets
|
||||||
|
.env
|
||||||
|
.env.*
|
||||||
|
|
||||||
|
# Sync state (local only, per-environment)
|
||||||
|
.gnommo_sync.json
|
||||||
|
.gnommo_sync.prod.json
|
||||||
|
|||||||
@@ -7,13 +7,14 @@
|
|||||||
"platform_targets": ["youtube"],
|
"platform_targets": ["youtube"],
|
||||||
"status": "scripted",
|
"status": "scripted",
|
||||||
"youtube_url": null,
|
"youtube_url": null,
|
||||||
"resolution": [1960, 1080],
|
"resolution": [1920, 1080],
|
||||||
"fps": 30,
|
"fps": 30,
|
||||||
"duration_seconds": null,
|
"duration_seconds": null,
|
||||||
"default_filters": {
|
"default_filters": {
|
||||||
"audioonly": [
|
"audioonly": [
|
||||||
{
|
{
|
||||||
"type": "audio_normalize",
|
"type": "audio_normalize",
|
||||||
|
"enable":false,
|
||||||
"compress": false,
|
"compress": false,
|
||||||
"normalize": true,
|
"normalize": true,
|
||||||
"target_lufs": -14,
|
"target_lufs": -14,
|
||||||
@@ -24,6 +25,7 @@
|
|||||||
"talkinghead": [
|
"talkinghead": [
|
||||||
{
|
{
|
||||||
"type": "audio_normalize",
|
"type": "audio_normalize",
|
||||||
|
"enable":false,
|
||||||
"normalize": true,
|
"normalize": true,
|
||||||
"target_lufs": -14,
|
"target_lufs": -14,
|
||||||
"target_lra": 11,
|
"target_lra": 11,
|
||||||
|
|||||||
@@ -0,0 +1,9 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
|
||||||
|
./gnommo.sh -p video1 all --force --prod
|
||||||
|
./gnommo.sh -p video2 all --force --prod
|
||||||
|
./gnommo.sh -p video3 all --force --prod
|
||||||
|
./gnommo.sh -p video4 all --force --prod
|
||||||
|
#./gnommo.sh -p video5 all --force
|
||||||
|
#./gnommo.sh -p video6 all --force
|
||||||
|
|
||||||
@@ -0,0 +1,5 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
claude --resume df8f915f-0f99-4e0f-b345-3562a49fcb06
|
||||||
|
|
||||||
|
|
||||||
@@ -4,8 +4,7 @@
|
|||||||
"description": "In this video, I demonstrate the Gnommo video editing pipeline - a code-first approach to creating presenter-mode videos from Keynote presentations.",
|
"description": "In this video, I demonstrate the Gnommo video editing pipeline - a code-first approach to creating presenter-mode videos from Keynote presentations.",
|
||||||
"footer": "Subscribe for more tutorials!\nTwitter: @example",
|
"footer": "Subscribe for more tutorials!\nTwitter: @example",
|
||||||
"resolution": [1920, 1080],
|
"resolution": [1920, 1080],
|
||||||
"fps": 30,
|
"fps": 30,
|
||||||
"gnommo_scratch": null,
|
|
||||||
"defaultSlideType": "fullscreen",
|
"defaultSlideType": "fullscreen",
|
||||||
"keynote_file": "media/example.key",
|
"keynote_file": "media/example.key",
|
||||||
"transcript": "media/videos/talking_head.transcript.json",
|
"transcript": "media/videos/talking_head.transcript.json",
|
||||||
@@ -14,10 +13,12 @@
|
|||||||
"videos": "media/videos/videos.json",
|
"videos": "media/videos/videos.json",
|
||||||
"slides": "media/slides/Example/slides.json",
|
"slides": "media/slides/Example/slides.json",
|
||||||
"audio": "media/audio/audio.json",
|
"audio": "media/audio/audio.json",
|
||||||
|
"output": "final.mp4",
|
||||||
"default_filters": {
|
"default_filters": {
|
||||||
"talkinghead": [
|
"talkinghead": [
|
||||||
{
|
{
|
||||||
"type": "audio_normalize",
|
"type": "audio_normalize",
|
||||||
|
"enable":false,
|
||||||
"eq_bands": [
|
"eq_bands": [
|
||||||
{"freq": 47, "gain": -15, "type": "lowshelf"},
|
{"freq": 47, "gain": -15, "type": "lowshelf"},
|
||||||
{"freq": 107, "gain": -1.3, "q": 1.2},
|
{"freq": 107, "gain": -1.3, "q": 1.2},
|
||||||
|
|||||||
+101
-2
@@ -11,10 +11,66 @@ Files are looked up first locally, then in the cache at:
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
import configparser
|
import configparser
|
||||||
|
import os
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Optional, Tuple
|
from typing import Optional, Tuple
|
||||||
|
|
||||||
_cache_config: Optional[dict] = None
|
_cache_config: Optional[dict] = None
|
||||||
|
_perf_config: Optional[dict] = None
|
||||||
|
|
||||||
|
|
||||||
|
def get_ffmpeg_thread_count() -> int:
|
||||||
|
"""Return FFmpeg thread count based on [performance] cpu_limit in ~/.gnommo.conf.
|
||||||
|
|
||||||
|
cpu_limit is a fraction of logical CPUs (e.g. 0.8 = 80%).
|
||||||
|
Defaults to 1 when not configured, which is safe on memory-constrained machines.
|
||||||
|
|
||||||
|
Example ~/.gnommo.conf:
|
||||||
|
[performance]
|
||||||
|
cpu_limit = 0.8
|
||||||
|
"""
|
||||||
|
global _perf_config
|
||||||
|
if _perf_config is None:
|
||||||
|
config_path = Path.home() / ".gnommo.conf"
|
||||||
|
_perf_config = {}
|
||||||
|
if config_path.exists():
|
||||||
|
cfg = configparser.ConfigParser()
|
||||||
|
cfg.read(config_path)
|
||||||
|
if cfg.has_option("performance", "cpu_limit"):
|
||||||
|
try:
|
||||||
|
_perf_config["cpu_limit"] = float(
|
||||||
|
cfg.get("performance", "cpu_limit")
|
||||||
|
)
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
cpu_limit = _perf_config.get("cpu_limit")
|
||||||
|
if cpu_limit is None:
|
||||||
|
return 1
|
||||||
|
cpu_count = os.cpu_count() or 1
|
||||||
|
return max(1, int(cpu_count * cpu_limit))
|
||||||
|
|
||||||
|
|
||||||
|
def get_render_chunk_size() -> Optional[int]:
|
||||||
|
"""Return slides-per-chunk for auto-chunked rendering, or None if not configured.
|
||||||
|
|
||||||
|
When set, cmd_render splits the filter graph into chunks of this many slides
|
||||||
|
to avoid OOM from allocating filter buffers for the entire video at once.
|
||||||
|
|
||||||
|
Example ~/.gnommo.conf:
|
||||||
|
[performance]
|
||||||
|
render_chunk_slides = 15
|
||||||
|
"""
|
||||||
|
global _perf_config
|
||||||
|
if _perf_config is None:
|
||||||
|
get_ffmpeg_thread_count() # populates _perf_config
|
||||||
|
val = _perf_config.get("render_chunk_slides")
|
||||||
|
if val is None:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
return max(1, int(val))
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
def load_cache_config() -> Optional[Path]:
|
def load_cache_config() -> Optional[Path]:
|
||||||
@@ -73,18 +129,61 @@ def resolve_with_cache(
|
|||||||
if cache_base is None:
|
if cache_base is None:
|
||||||
return local_path, False # No cache configured
|
return local_path, False # No cache configured
|
||||||
|
|
||||||
# Build cache path: {cache_base}/{project_name}/{relative_path}
|
# Try 1: path inside the project → cache_base / project_name / relative
|
||||||
try:
|
try:
|
||||||
relative = local_path.relative_to(project_path)
|
relative = local_path.relative_to(project_path)
|
||||||
cache_path = cache_base / project_path.name / relative
|
cache_path = cache_base / project_path.name / relative
|
||||||
if cache_path.exists():
|
if cache_path.exists():
|
||||||
return cache_path, True
|
return cache_path, True
|
||||||
except ValueError:
|
except ValueError:
|
||||||
pass # local_path is not relative to project_path
|
pass # local_path is not under project_path
|
||||||
|
|
||||||
|
# Try 2: path relative to gnommo root (sibling dirs like shared_assets)
|
||||||
|
# e.g. shared_assets/pexels/file.mp4 → cache_base / shared_assets / pexels / file.mp4
|
||||||
|
try:
|
||||||
|
relative = local_path.relative_to(project_path.parent)
|
||||||
|
cache_path = cache_base / relative
|
||||||
|
if cache_path.exists():
|
||||||
|
return cache_path, True
|
||||||
|
except ValueError:
|
||||||
|
pass # local_path is not under project_path.parent either
|
||||||
|
|
||||||
return local_path, False
|
return local_path, False
|
||||||
|
|
||||||
|
|
||||||
|
def load_server_config() -> Optional[dict]:
|
||||||
|
"""Load server rsync config from ~/.gnommo.conf.
|
||||||
|
|
||||||
|
Expected config:
|
||||||
|
[server]
|
||||||
|
host = 76.13.144.52
|
||||||
|
user = root
|
||||||
|
path = /gnommo/project
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dict with keys host, user, path (and optionally port), or None.
|
||||||
|
"""
|
||||||
|
config_path = Path.home() / ".gnommo.conf"
|
||||||
|
if not config_path.exists():
|
||||||
|
return None
|
||||||
|
|
||||||
|
config = configparser.ConfigParser()
|
||||||
|
config.read(config_path)
|
||||||
|
|
||||||
|
if not config.has_section("server"):
|
||||||
|
return None
|
||||||
|
|
||||||
|
host = config.get("server", "host", fallback=None)
|
||||||
|
user = config.get("server", "user", fallback="root")
|
||||||
|
path = config.get("server", "path", fallback="/gnommo/project")
|
||||||
|
port = config.get("server", "port", fallback="22")
|
||||||
|
|
||||||
|
if not host:
|
||||||
|
return None
|
||||||
|
|
||||||
|
return {"host": host, "user": user, "path": path, "port": port}
|
||||||
|
|
||||||
|
|
||||||
def is_cache_configured() -> bool:
|
def is_cache_configured() -> bool:
|
||||||
"""Check if cache is configured (for status messages)."""
|
"""Check if cache is configured (for status messages)."""
|
||||||
return load_cache_config() is not None
|
return load_cache_config() is not None
|
||||||
|
|||||||
+1069
-162
File diff suppressed because it is too large
Load Diff
+60
-37
@@ -1,4 +1,4 @@
|
|||||||
"""Hand off a finished video to the gnommoweb server.
|
"""Hand off a finished video to MinIO storage via gnommoeditor (prod) or gnommoweb (local).
|
||||||
|
|
||||||
Works for any gnommo project type: parent videos and shorts alike.
|
Works for any gnommo project type: parent videos and shorts alike.
|
||||||
|
|
||||||
@@ -10,14 +10,17 @@ Usage:
|
|||||||
Reads project.json for the 'output_video' field (path relative to the
|
Reads project.json for the 'output_video' field (path relative to the
|
||||||
project directory). Override with --file.
|
project directory). Override with --file.
|
||||||
|
|
||||||
On success:
|
On success (production):
|
||||||
- Uploads the video to MinIO via POST /api/projects/:handle/handoff
|
- Uploads the video to MinIO via POST /api/assets/upload on gnommoeditor
|
||||||
- For shorts: server auto-advances status to 'processed'
|
- Updates .gnommo_sync.prod.json with asset URL
|
||||||
- Bumps video_version on every upload
|
|
||||||
|
On success (local):
|
||||||
|
- Uploads via POST /api/projects/:handle/handoff on gnommoweb
|
||||||
- Updates .gnommo_sync.json with new video_version
|
- Updates .gnommo_sync.json with new video_version
|
||||||
|
|
||||||
Configuration (from .env or environment):
|
Configuration (from .env or environment):
|
||||||
GNOMMOWEB_URL Base URL (e.g. http://localhost:3001)
|
GNOMMOEDITOR_URL Base URL for production (e.g. https://editor.glitch.university)
|
||||||
|
GNOMMOWEB_URL Base URL for local dev (e.g. http://localhost:3001)
|
||||||
GNOMMOWEB_API_KEY Bearer token (CONTENT_API_KEY from gnommoweb)
|
GNOMMOWEB_API_KEY Bearer token (CONTENT_API_KEY from gnommoweb)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@@ -83,13 +86,9 @@ def cmd_handoff(
|
|||||||
_load_env_file()
|
_load_env_file()
|
||||||
|
|
||||||
if prod:
|
if prod:
|
||||||
api_url = os.environ.get("GNOMMOWEB_PROD_URL", "").rstrip("/")
|
api_url = os.environ.get("GNOMMOEDITOR_URL", "").rstrip("/")
|
||||||
api_key = os.environ.get("GNOMMOWEB_PROD_API_KEY", "")
|
|
||||||
if not api_url:
|
if not api_url:
|
||||||
print("Error: GNOMMOWEB_PROD_URL is not set.", file=sys.stderr)
|
print("Error: GNOMMOEDITOR_URL is not set.", file=sys.stderr)
|
||||||
return 1
|
|
||||||
if not api_key:
|
|
||||||
print("Error: GNOMMOWEB_PROD_API_KEY is not set.", file=sys.stderr)
|
|
||||||
return 1
|
return 1
|
||||||
else:
|
else:
|
||||||
api_url = os.environ.get("GNOMMOWEB_URL", "").rstrip("/")
|
api_url = os.environ.get("GNOMMOWEB_URL", "").rstrip("/")
|
||||||
@@ -102,7 +101,7 @@ def cmd_handoff(
|
|||||||
return 1
|
return 1
|
||||||
|
|
||||||
if verbose:
|
if verbose:
|
||||||
target = "production" if prod else "local"
|
target = "production (gnommoeditor)" if prod else "local"
|
||||||
print(f" → {target}: {api_url}")
|
print(f" → {target}: {api_url}")
|
||||||
|
|
||||||
project_file = project_path / "project.json"
|
project_file = project_path / "project.json"
|
||||||
@@ -147,13 +146,23 @@ def cmd_handoff(
|
|||||||
|
|
||||||
# ── Upload ─────────────────────────────────────────────────────────────────
|
# ── Upload ─────────────────────────────────────────────────────────────────
|
||||||
try:
|
try:
|
||||||
with open(video_path, "rb") as vf:
|
if prod:
|
||||||
r = requests.post(
|
# gnommoeditor: POST /api/assets/upload — field name is 'file', no auth
|
||||||
f"{api_url}/api/projects/{project_id}/handoff",
|
with open(video_path, "rb") as vf:
|
||||||
files={"video": (video_path.name, vf, _mime_type(video_path))},
|
r = requests.post(
|
||||||
headers={"Authorization": f"Bearer {api_key}"},
|
f"{api_url}/api/assets/upload",
|
||||||
timeout=None, # large files may take a while
|
files={"file": (video_path.name, vf, _mime_type(video_path))},
|
||||||
)
|
timeout=None,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
# gnommoweb: POST /api/projects/:id/handoff
|
||||||
|
with open(video_path, "rb") as vf:
|
||||||
|
r = requests.post(
|
||||||
|
f"{api_url}/api/projects/{project_id}/handoff",
|
||||||
|
files={"video": (video_path.name, vf, _mime_type(video_path))},
|
||||||
|
headers={"Authorization": f"Bearer {api_key}"},
|
||||||
|
timeout=None,
|
||||||
|
)
|
||||||
except requests.exceptions.ConnectionError:
|
except requests.exceptions.ConnectionError:
|
||||||
print(f"✗ Could not connect to {api_url}")
|
print(f"✗ Could not connect to {api_url}")
|
||||||
return 1
|
return 1
|
||||||
@@ -167,28 +176,42 @@ def cmd_handoff(
|
|||||||
return 1
|
return 1
|
||||||
|
|
||||||
result = r.json()
|
result = r.json()
|
||||||
video_version = result.get("video_version", "?")
|
|
||||||
video_url = result.get("video_url", "")
|
|
||||||
|
|
||||||
# ── Write sync state ───────────────────────────────────────────────────────
|
# ── Write sync state ───────────────────────────────────────────────────────
|
||||||
now_iso = datetime.now(tz=timezone.utc).isoformat(timespec="seconds")
|
now_iso = datetime.now(tz=timezone.utc).isoformat(timespec="seconds")
|
||||||
existing_sync = _read_sync(project_path, prod)
|
existing_sync = _read_sync(project_path, prod)
|
||||||
_write_sync(
|
|
||||||
project_path,
|
|
||||||
{
|
|
||||||
**existing_sync,
|
|
||||||
"last_handoff_at": now_iso,
|
|
||||||
"video_version": video_version,
|
|
||||||
"server_updated_at": result.get("asset", {}).get(
|
|
||||||
"updated_at", existing_sync.get("server_updated_at")
|
|
||||||
),
|
|
||||||
},
|
|
||||||
prod,
|
|
||||||
)
|
|
||||||
|
|
||||||
print(f"✓ {project_id} → v{video_version} [processed]")
|
if prod:
|
||||||
if video_url:
|
# gnommoeditor response: { asset: { id, url, minio_object_key, ... } }
|
||||||
print(f" {video_url}")
|
asset = result.get("asset", {})
|
||||||
|
asset_url = asset.get("url", "")
|
||||||
|
_write_sync(
|
||||||
|
project_path,
|
||||||
|
{**existing_sync, "last_handoff_at": now_iso, "asset_url": asset_url},
|
||||||
|
prod,
|
||||||
|
)
|
||||||
|
print(f"✓ {project_id} → uploaded [asset #{asset.get('id')}]")
|
||||||
|
if asset_url:
|
||||||
|
print(f" {asset_url}")
|
||||||
|
else:
|
||||||
|
# gnommoweb response: { video_version, video_url, asset: { updated_at } }
|
||||||
|
video_version = result.get("video_version", "?")
|
||||||
|
video_url = result.get("video_url", "")
|
||||||
|
_write_sync(
|
||||||
|
project_path,
|
||||||
|
{
|
||||||
|
**existing_sync,
|
||||||
|
"last_handoff_at": now_iso,
|
||||||
|
"video_version": video_version,
|
||||||
|
"server_updated_at": result.get("asset", {}).get(
|
||||||
|
"updated_at", existing_sync.get("server_updated_at")
|
||||||
|
),
|
||||||
|
},
|
||||||
|
prod,
|
||||||
|
)
|
||||||
|
print(f"✓ {project_id} → v{video_version} [processed]")
|
||||||
|
if video_url:
|
||||||
|
print(f" {video_url}")
|
||||||
|
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
|||||||
+23
-5
@@ -49,6 +49,7 @@ class ProjectConfig:
|
|||||||
slides_path: str = "slides.json" # path to slides.json relative to project
|
slides_path: str = "slides.json" # path to slides.json relative to project
|
||||||
videos_path: str = "videos.json" # path to videos.json relative to project
|
videos_path: str = "videos.json" # path to videos.json relative to project
|
||||||
audio_path: str = "audio.json" # path to audio.json relative to project
|
audio_path: str = "audio.json" # path to audio.json relative to project
|
||||||
|
transcript_path: Optional[str] = None # path to transcript.json relative to project (always saved locally)
|
||||||
audio_source: Optional[str] = None # defaults to talking head
|
audio_source: Optional[str] = None # defaults to talking head
|
||||||
main_video: Optional[
|
main_video: Optional[
|
||||||
Union[str, list]
|
Union[str, list]
|
||||||
@@ -56,6 +57,9 @@ class ProjectConfig:
|
|||||||
gnommo_scratch: Optional[
|
gnommo_scratch: Optional[
|
||||||
str
|
str
|
||||||
] = None # directory for intermediate files (e.g., external SSD)
|
] = None # directory for intermediate files (e.g., external SSD)
|
||||||
|
process_cache: Optional[
|
||||||
|
str
|
||||||
|
] = None # external directory for processed/combined outputs (saves laptop disk space)
|
||||||
default_begin: float = 0.0 # Trim this many seconds from the start of each segment (if no explicit begin/skip)
|
default_begin: float = 0.0 # Trim this many seconds from the start of each segment (if no explicit begin/skip)
|
||||||
default_end_trim: float = 0.0 # Trim this many seconds from the end of each segment (if no explicit end/take)
|
default_end_trim: float = 0.0 # Trim this many seconds from the end of each segment (if no explicit end/take)
|
||||||
# Outro sequence - plays after narration ends (not marker-triggered)
|
# Outro sequence - plays after narration ends (not marker-triggered)
|
||||||
@@ -132,6 +136,15 @@ class GnommoKeyConfig:
|
|||||||
# Can help with edge color contamination
|
# Can help with edge color contamination
|
||||||
alpha_bias: tuple[int, int, int] = None
|
alpha_bias: tuple[int, int, int] = None
|
||||||
|
|
||||||
|
# Luminance protection: pixels with luma above this stay fully opaque (0-255, -1 = off)
|
||||||
|
# Use ~220 to protect white objects (headphones, teeth) from being partially keyed.
|
||||||
|
protect_luma: int = -1
|
||||||
|
|
||||||
|
# Shadow boost: extra key strength for dark pixels (0.0-5.0, 0 = off)
|
||||||
|
# Ramps up key signal proportionally to how dark a pixel is, helping key dark greens
|
||||||
|
# without affecting bright foreground areas. Values 1.0-2.0 are typical.
|
||||||
|
shadow_boost: float = 0.0
|
||||||
|
|
||||||
# Edge refinement
|
# Edge refinement
|
||||||
edge_erode: int = 0 # Pixels to erode from alpha edge (0-5)
|
edge_erode: int = 0 # Pixels to erode from alpha edge (0-5)
|
||||||
edge_soften: float = 0.0 # Blur the alpha edge (0-5 pixels)
|
edge_soften: float = 0.0 # Blur the alpha edge (0-5 pixels)
|
||||||
@@ -192,7 +205,7 @@ class AudioNormalizeConfig:
|
|||||||
Applies noise reduction, compression, and loudness normalization
|
Applies noise reduction, compression, and loudness normalization
|
||||||
to improve audio quality and consistency.
|
to improve audio quality and consistency.
|
||||||
"""
|
"""
|
||||||
|
enabled: bool = True # Master switch to enable/disable all audio processing
|
||||||
# Parametric EQ bands (applied before other processing)
|
# Parametric EQ bands (applied before other processing)
|
||||||
eq_bands: list[EQBand] = field(default_factory=list)
|
eq_bands: list[EQBand] = field(default_factory=list)
|
||||||
|
|
||||||
@@ -297,10 +310,14 @@ class VideoSource:
|
|||||||
False # If True, skip loudnorm during preprocessing (apply after concatenation)
|
False # If True, skip loudnorm during preprocessing (apply after concatenation)
|
||||||
)
|
)
|
||||||
volume: float = 1.0 # Volume multiplier (1.0=full, >1.0=boost, <1.0=reduce)
|
volume: float = 1.0 # Volume multiplier (1.0=full, >1.0=boost, <1.0=reduce)
|
||||||
layer: str = "above" # "above" = renders on top of slides; "below" = behind slides
|
layer: str = "above" # "above" = on top of slides; "mid" = above narrator/below slides; "below" = behind narrator
|
||||||
duration: Optional[float] = None # Pre-probed file duration in seconds (set by import)
|
duration: Optional[
|
||||||
|
float
|
||||||
|
] = None # Pre-probed file duration in seconds (set by import)
|
||||||
has_audio: Optional[bool] = None # Pre-detected audio presence (set by import)
|
has_audio: Optional[bool] = None # Pre-detected audio presence (set by import)
|
||||||
end_on: Optional[str] = None # When video event ends: "next_slide" | "end" | "take" (None = marker-type default)
|
end_on: Optional[
|
||||||
|
str
|
||||||
|
] = None # When video event ends: "next_slide" | "end" | "take" (None = marker-type default)
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
@@ -333,7 +350,7 @@ class SlideEvent:
|
|||||||
class AudioDefinition:
|
class AudioDefinition:
|
||||||
"""Definition of an audio clip from audio.json."""
|
"""Definition of an audio clip from audio.json."""
|
||||||
|
|
||||||
file: str # Audio filename (relative to audio.json location)
|
file: str # Audio filename (relative to audio.json location, or to shared_assets/media/audio/ if is_shared)
|
||||||
volume: float = 1.0 # Volume multiplier (0.0-1.0)
|
volume: float = 1.0 # Volume multiplier (0.0-1.0)
|
||||||
loop: bool = False # If True, loop for entire duration from trigger point
|
loop: bool = False # If True, loop for entire duration from trigger point
|
||||||
overlap: Optional[float] = None # Crossfade overlap in seconds when looping
|
overlap: Optional[float] = None # Crossfade overlap in seconds when looping
|
||||||
@@ -341,6 +358,7 @@ class AudioDefinition:
|
|||||||
False # If True, audio continues playing during narration pauses
|
False # If True, audio continues playing during narration pauses
|
||||||
)
|
)
|
||||||
duration: Optional[float] = None # Pre-probed duration in seconds (set by import)
|
duration: Optional[float] = None # Pre-probed duration in seconds (set by import)
|
||||||
|
is_shared: bool = False # If True, file is relative to shared_assets/media/audio/
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
|
|||||||
+156
-8
@@ -25,6 +25,35 @@ def _read_json(path: Path) -> Any:
|
|||||||
return json.loads(text) if text else {}
|
return json.loads(text) if text else {}
|
||||||
|
|
||||||
|
|
||||||
|
def _resolve_case_insensitive(path: Path) -> Path:
|
||||||
|
"""Return the real on-disk path, resolving each component case-insensitively.
|
||||||
|
|
||||||
|
On case-insensitive filesystems (macOS) paths just work. On case-sensitive
|
||||||
|
ones (Linux/WSL) a mismatch between project.json and the actual directory
|
||||||
|
name causes a FileNotFoundError. This walks each component and picks the
|
||||||
|
first directory entry whose name matches case-insensitively, returning the
|
||||||
|
corrected path. If the path already exists, it is returned unchanged.
|
||||||
|
"""
|
||||||
|
if path.exists():
|
||||||
|
return path
|
||||||
|
|
||||||
|
resolved = path.anchor and Path(path.anchor) or Path(".")
|
||||||
|
for part in path.parts[len(Path(path.anchor).parts) :]:
|
||||||
|
if (resolved / part).exists():
|
||||||
|
resolved = resolved / part
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
match = next(
|
||||||
|
(p for p in resolved.iterdir() if p.name.lower() == part.lower()),
|
||||||
|
None,
|
||||||
|
)
|
||||||
|
except (OSError, NotADirectoryError):
|
||||||
|
match = None
|
||||||
|
resolved = match if match else (resolved / part)
|
||||||
|
|
||||||
|
return resolved
|
||||||
|
|
||||||
|
|
||||||
def parse_manuscript(
|
def parse_manuscript(
|
||||||
project_path: Path,
|
project_path: Path,
|
||||||
) -> tuple[str, list[str], list[tuple[int, str]], list[Citation]]:
|
) -> tuple[str, list[str], list[tuple[int, str]], list[Citation]]:
|
||||||
@@ -55,9 +84,9 @@ def parse_manuscript(
|
|||||||
text = re.sub(r"\[marker:[^\]]+\]", "", text)
|
text = re.sub(r"\[marker:[^\]]+\]", "", text)
|
||||||
text = re.sub(r"\[cue:[^\]]+\]", "", text)
|
text = re.sub(r"\[cue:[^\]]+\]", "", text)
|
||||||
|
|
||||||
# Extract all valid markers like [S1], [video:demo], [Zoom2], etc.
|
# Extract all valid markers like [S1], [video:demo], [vf2m:pexels/clip-name], etc.
|
||||||
# Include . in pattern to catch markers with file extensions (so validator can warn about them)
|
# Include / and - to capture pexels/library video IDs; . to catch file extensions in markers.
|
||||||
markers = re.findall(r"\[([A-Za-z0-9_:.]+)\]", text)
|
markers = re.findall(r"\[([A-Za-z0-9_:./\-]+)\]", text)
|
||||||
|
|
||||||
# Find malformed markers (missing brackets, extra spaces, etc.)
|
# Find malformed markers (missing brackets, extra spaces, etc.)
|
||||||
malformed: list[tuple[int, str]] = []
|
malformed: list[tuple[int, str]] = []
|
||||||
@@ -229,9 +258,10 @@ def parse_project_config(project_path: Path) -> ProjectConfig:
|
|||||||
slides_path=data.get("slides", "slides.json"),
|
slides_path=data.get("slides", "slides.json"),
|
||||||
videos_path=data.get("videos", "videos.json"),
|
videos_path=data.get("videos", "videos.json"),
|
||||||
audio_path=data.get("audio", "audio.json"),
|
audio_path=data.get("audio", "audio.json"),
|
||||||
|
transcript_path=data.get("transcript"),
|
||||||
audio_source=data.get("audio_source"),
|
audio_source=data.get("audio_source"),
|
||||||
main_video=data.get("main_video"),
|
main_video=data.get("main_video"),
|
||||||
gnommo_scratch=data.get("gnommo_scratch"),
|
process_cache=data.get("process_cache"),
|
||||||
default_begin=float(data.get("default_begin", 0.0)),
|
default_begin=float(data.get("default_begin", 0.0)),
|
||||||
default_end_trim=float(data.get("default_end_trim", 0.0)),
|
default_end_trim=float(data.get("default_end_trim", 0.0)),
|
||||||
outro=data.get("outro", []),
|
outro=data.get("outro", []),
|
||||||
@@ -263,7 +293,10 @@ def parse_slides(
|
|||||||
) -> dict[str, SlideDefinition]:
|
) -> dict[str, SlideDefinition]:
|
||||||
"""Parse slides.json into slide definitions."""
|
"""Parse slides.json into slide definitions."""
|
||||||
if config and config.slides_path:
|
if config and config.slides_path:
|
||||||
local_slides_path = project_path / config.slides_path
|
# Lowercase the path so that a capital-cased project name embedded by
|
||||||
|
# the import stage (e.g. "media/slides/video2/slides.json") resolves
|
||||||
|
# correctly on case-sensitive filesystems (WSL/Linux).
|
||||||
|
local_slides_path = project_path / config.slides_path.lower()
|
||||||
else:
|
else:
|
||||||
local_slides_path = project_path / "slides.json"
|
local_slides_path = project_path / "slides.json"
|
||||||
|
|
||||||
@@ -342,6 +375,7 @@ def parse_audio(
|
|||||||
overlap=overlap,
|
overlap=overlap,
|
||||||
ignore_pauses=bool(audio_data.get("ignore_pauses", False)),
|
ignore_pauses=bool(audio_data.get("ignore_pauses", False)),
|
||||||
duration=float(raw_duration) if raw_duration is not None else None,
|
duration=float(raw_duration) if raw_duration is not None else None,
|
||||||
|
is_shared=bool(audio_data.get("is_shared", False)),
|
||||||
)
|
)
|
||||||
|
|
||||||
return audio, audio_dir
|
return audio, audio_dir
|
||||||
@@ -356,6 +390,7 @@ def parse_timestamp(value: str) -> float:
|
|||||||
- "2:54" → 2 minutes 54 seconds (174.0)
|
- "2:54" → 2 minutes 54 seconds (174.0)
|
||||||
- "1:23:45" → 1 hour 23 minutes 45 seconds
|
- "1:23:45" → 1 hour 23 minutes 45 seconds
|
||||||
- "2:54.5" → 2 minutes 54.5 seconds
|
- "2:54.5" → 2 minutes 54.5 seconds
|
||||||
|
- "2m:3.5s" → 2 minutes 3.5 seconds
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Time in seconds as a float.
|
Time in seconds as a float.
|
||||||
@@ -366,6 +401,10 @@ def parse_timestamp(value: str) -> float:
|
|||||||
value = value.strip()
|
value = value.strip()
|
||||||
|
|
||||||
# Remove trailing 's' if present (e.g., "3.5s")
|
# Remove trailing 's' if present (e.g., "3.5s")
|
||||||
|
if "h" in value:
|
||||||
|
value = value.replace("h", ":")
|
||||||
|
if "m" in value:
|
||||||
|
value = value.replace("m", ":")
|
||||||
if value.endswith("s"):
|
if value.endswith("s"):
|
||||||
value = value[:-1]
|
value = value[:-1]
|
||||||
|
|
||||||
@@ -462,8 +501,12 @@ def parse_videos(
|
|||||||
filter_list = filter_value
|
filter_list = filter_value
|
||||||
|
|
||||||
# Handle skip/take - can use begin/end as user-friendly alternatives
|
# Handle skip/take - can use begin/end as user-friendly alternatives
|
||||||
skip = video_data.get("skip", 0.0)
|
skip = float(video_data.get("skip") or 0.0)
|
||||||
take = video_data.get("take")
|
take = (
|
||||||
|
float(video_data["take"])
|
||||||
|
if video_data.get("take") not in (None, "")
|
||||||
|
else None
|
||||||
|
)
|
||||||
|
|
||||||
# Convert begin/end to skip/take if provided
|
# Convert begin/end to skip/take if provided
|
||||||
if "begin" in video_data and video_data["begin"]:
|
if "begin" in video_data and video_data["begin"]:
|
||||||
@@ -571,9 +614,11 @@ def parse_narration(
|
|||||||
skip = segment_data.get("skip", default_begin)
|
skip = segment_data.get("skip", default_begin)
|
||||||
take = segment_data.get("take")
|
take = segment_data.get("take")
|
||||||
|
|
||||||
# Explicit begin/end always override defaults
|
# Explicit begin/start/end always override defaults
|
||||||
if "begin" in segment_data and segment_data["begin"]:
|
if "begin" in segment_data and segment_data["begin"]:
|
||||||
skip = parse_timestamp(segment_data["begin"])
|
skip = parse_timestamp(segment_data["begin"])
|
||||||
|
elif "start" in segment_data and segment_data["start"]:
|
||||||
|
skip = parse_timestamp(segment_data["start"])
|
||||||
if "end" in segment_data and segment_data["end"]:
|
if "end" in segment_data and segment_data["end"]:
|
||||||
end_time = parse_timestamp(segment_data["end"])
|
end_time = parse_timestamp(segment_data["end"])
|
||||||
# take = end - begin (duration from begin to end)
|
# take = end - begin (duration from begin to end)
|
||||||
@@ -688,3 +733,106 @@ def resolve_video_file(
|
|||||||
|
|
||||||
# Direct video file reference
|
# Direct video file reference
|
||||||
return ref_path, None
|
return ref_path, None
|
||||||
|
|
||||||
|
|
||||||
|
def resolve_missing_videos(
|
||||||
|
missing_ids: list[str],
|
||||||
|
project_path: Path,
|
||||||
|
config: Optional[ProjectConfig] = None,
|
||||||
|
) -> dict[str, VideoSource]:
|
||||||
|
"""
|
||||||
|
For video IDs not found in the project's videos.json, look them up in
|
||||||
|
shared_assets/videos.json. When a match is found the entry is written back
|
||||||
|
into the project's videos.json with ``is_shared: true`` so subsequent runs
|
||||||
|
find it without another lookup.
|
||||||
|
|
||||||
|
Returns a dict of newly resolved VideoSource objects (only the ones found).
|
||||||
|
Silently ignores IDs that aren't in the shared library either.
|
||||||
|
"""
|
||||||
|
if not missing_ids:
|
||||||
|
return {}
|
||||||
|
|
||||||
|
# Locate shared_assets
|
||||||
|
shared_dir: Optional[Path] = None
|
||||||
|
if (project_path / "shared_assets").exists():
|
||||||
|
shared_dir = project_path / "shared_assets"
|
||||||
|
elif (project_path.parent / "shared_assets").exists():
|
||||||
|
shared_dir = project_path.parent / "shared_assets"
|
||||||
|
|
||||||
|
if shared_dir is None:
|
||||||
|
return {}
|
||||||
|
|
||||||
|
shared_videos_path = shared_dir / "videos.json"
|
||||||
|
if not shared_videos_path.exists():
|
||||||
|
return {}
|
||||||
|
|
||||||
|
try:
|
||||||
|
shared_data = _read_json(shared_videos_path)
|
||||||
|
except (json.JSONDecodeError, OSError):
|
||||||
|
return {}
|
||||||
|
|
||||||
|
found = {vid_id for vid_id in missing_ids if vid_id in shared_data}
|
||||||
|
if not found:
|
||||||
|
return {}
|
||||||
|
|
||||||
|
# Load the project's videos.json so we can append to it
|
||||||
|
if config and config.videos_path:
|
||||||
|
local_videos_path = project_path / config.videos_path
|
||||||
|
else:
|
||||||
|
local_videos_path = project_path / "videos.json"
|
||||||
|
|
||||||
|
try:
|
||||||
|
local_data = _read_json(local_videos_path) if local_videos_path.exists() else {}
|
||||||
|
except (json.JSONDecodeError, OSError):
|
||||||
|
local_data = {}
|
||||||
|
|
||||||
|
resolved: dict[str, VideoSource] = {}
|
||||||
|
for video_id in sorted(found):
|
||||||
|
entry = dict(shared_data[video_id])
|
||||||
|
entry["is_shared"] = True
|
||||||
|
|
||||||
|
# Persist into the project's videos.json
|
||||||
|
local_data[video_id] = entry
|
||||||
|
print(f" → Copied shared video '{video_id}' into videos.json (is_shared=true)")
|
||||||
|
|
||||||
|
# Build the in-memory VideoSource
|
||||||
|
attribution = None
|
||||||
|
if "attribution" in entry:
|
||||||
|
attr = entry["attribution"]
|
||||||
|
attribution = Attribution(
|
||||||
|
source=attr.get("source", "unknown"),
|
||||||
|
creator=attr.get("creator", "Unknown"),
|
||||||
|
url=attr.get("url"),
|
||||||
|
)
|
||||||
|
|
||||||
|
raw_duration = entry.get("duration")
|
||||||
|
raw_has_audio = entry.get("has_audio")
|
||||||
|
resolved[video_id] = VideoSource(
|
||||||
|
source_file=entry["source_file"],
|
||||||
|
filter=entry.get("filter", []),
|
||||||
|
output_file=entry.get("output_file"),
|
||||||
|
take=entry.get("take"),
|
||||||
|
skip=float(entry.get("skip", 0.0)),
|
||||||
|
zoom=float(entry.get("zoom", 1.0)),
|
||||||
|
cutout=entry.get("cutout"),
|
||||||
|
always_visible=bool(entry.get("always_visible", False)),
|
||||||
|
is_shared=True,
|
||||||
|
pause_narration=float(entry.get("pause_narration", 0)),
|
||||||
|
attribution=attribution,
|
||||||
|
use_audio_channels=entry.get("use_audio_channels", "both"),
|
||||||
|
defer_loudnorm=bool(entry.get("defer_loudnorm", False)),
|
||||||
|
volume=float(entry.get("volume", 1.0)),
|
||||||
|
layer=entry.get("layer", "above"),
|
||||||
|
duration=float(raw_duration) if raw_duration is not None else None,
|
||||||
|
has_audio=bool(raw_has_audio) if raw_has_audio is not None else None,
|
||||||
|
end_on=entry.get("end_on"),
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
with open(local_videos_path, "w", encoding="utf-8") as fh:
|
||||||
|
json.dump(local_data, fh, indent=4)
|
||||||
|
fh.write("\n")
|
||||||
|
except OSError as e:
|
||||||
|
print(f" Warning: could not update videos.json: {e}")
|
||||||
|
|
||||||
|
return resolved
|
||||||
|
|||||||
@@ -0,0 +1,312 @@
|
|||||||
|
"""Pexels video downloader for gnommo shared_assets.
|
||||||
|
|
||||||
|
Configure API key in ~/.gnommo.conf:
|
||||||
|
|
||||||
|
[pexels]
|
||||||
|
api_key = YOUR_KEY_HERE
|
||||||
|
|
||||||
|
Get a free key at https://www.pexels.com/api/
|
||||||
|
"""
|
||||||
|
|
||||||
|
import configparser
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
import urllib.error
|
||||||
|
import urllib.request
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
|
||||||
|
def get_pexels_api_key() -> Optional[str]:
|
||||||
|
config_path = Path.home() / ".gnommo.conf"
|
||||||
|
if not config_path.exists():
|
||||||
|
return None
|
||||||
|
cfg = configparser.ConfigParser()
|
||||||
|
cfg.read(config_path)
|
||||||
|
return cfg.get("pexels", "api_key", fallback=None)
|
||||||
|
|
||||||
|
|
||||||
|
def extract_pexels_id(source_file: str) -> Optional[str]:
|
||||||
|
"""Extract the numeric Pexels video ID from a source_file path.
|
||||||
|
|
||||||
|
Handles names like 'pexels/11868263-hd_1920_1080_24fps.mp4'
|
||||||
|
and 'pexels/12136677_1080_1920_30fps.mp4'.
|
||||||
|
"""
|
||||||
|
name = Path(source_file).stem.split("/")[-1]
|
||||||
|
m = re.match(r"^(\d+)", name)
|
||||||
|
return m.group(1) if m else None
|
||||||
|
|
||||||
|
|
||||||
|
def _fetch_video_info(pexels_id: str, api_key: str) -> Optional[dict]:
|
||||||
|
url = f"https://api.pexels.com/videos/videos/{pexels_id}"
|
||||||
|
req = urllib.request.Request(
|
||||||
|
url,
|
||||||
|
headers={"Authorization": api_key, "User-Agent": "Mozilla/5.0 gnommo/1.0"},
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
with urllib.request.urlopen(req, timeout=15) as resp:
|
||||||
|
return json.loads(resp.read())
|
||||||
|
except urllib.error.HTTPError as e:
|
||||||
|
print(f" [{pexels_id}] Pexels API error {e.code} — video may have been deleted", flush=True)
|
||||||
|
return None
|
||||||
|
except Exception as e:
|
||||||
|
print(f" [{pexels_id}] Pexels API error: {e}", flush=True)
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def description_from_url(video_url: str) -> str:
|
||||||
|
"""Extract human-readable description from a Pexels video URL slug.
|
||||||
|
|
||||||
|
'https://www.pexels.com/video/abstract-television-noise-11868263/'
|
||||||
|
→ 'Abstract Television Noise'
|
||||||
|
"""
|
||||||
|
m = re.search(r"/video/([a-z0-9][a-z0-9-]+?)-\d+/?$", video_url)
|
||||||
|
if m:
|
||||||
|
return m.group(1).replace("-", " ").title()
|
||||||
|
return ""
|
||||||
|
|
||||||
|
|
||||||
|
def _pick_best_video_file(video_files: list, source_file: str) -> Optional[dict]:
|
||||||
|
"""Select the video_files entry that best matches the hints in source_file."""
|
||||||
|
stem = Path(source_file).stem.split("/")[-1]
|
||||||
|
|
||||||
|
width_hint = height_hint = fps_hint = quality_hint = None
|
||||||
|
m = re.search(r"[_-](\d{3,4})[_-](\d{3,4})[_-](\d+)fps", stem)
|
||||||
|
if m:
|
||||||
|
width_hint = int(m.group(1))
|
||||||
|
height_hint = int(m.group(2))
|
||||||
|
fps_hint = int(m.group(3))
|
||||||
|
for q in ("uhd", "hd", "sd"):
|
||||||
|
if q in stem.lower():
|
||||||
|
quality_hint = q
|
||||||
|
break
|
||||||
|
|
||||||
|
mp4s = [f for f in video_files if f.get("file_type") == "video/mp4"]
|
||||||
|
if not mp4s:
|
||||||
|
mp4s = video_files # fall back to any format
|
||||||
|
|
||||||
|
def score(vf: dict) -> int:
|
||||||
|
s = 0
|
||||||
|
if quality_hint and vf.get("quality", "").lower() == quality_hint:
|
||||||
|
s += 10
|
||||||
|
if width_hint and vf.get("width") == width_hint:
|
||||||
|
s += 5
|
||||||
|
if height_hint and vf.get("height") == height_hint:
|
||||||
|
s += 5
|
||||||
|
if fps_hint and round(float(vf.get("fps") or 0)) == fps_hint:
|
||||||
|
s += 3
|
||||||
|
return s
|
||||||
|
|
||||||
|
return max(mp4s, key=score)
|
||||||
|
|
||||||
|
|
||||||
|
def download_video(
|
||||||
|
source_file: str,
|
||||||
|
shared_assets_dir: Path,
|
||||||
|
api_key: str,
|
||||||
|
) -> Optional[dict]:
|
||||||
|
"""Download one Pexels video to shared_assets_dir/<source_file>.
|
||||||
|
|
||||||
|
Returns a metadata dict {description, duration, has_audio=False} on
|
||||||
|
success, or None on failure.
|
||||||
|
"""
|
||||||
|
pexels_id = extract_pexels_id(source_file)
|
||||||
|
if not pexels_id:
|
||||||
|
print(f" Cannot extract Pexels ID from: {source_file}", file=sys.stderr)
|
||||||
|
return None
|
||||||
|
|
||||||
|
target_path = shared_assets_dir / source_file
|
||||||
|
target_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
print(f" [{pexels_id}] Fetching video info...", flush=True)
|
||||||
|
info = _fetch_video_info(pexels_id, api_key)
|
||||||
|
if not info:
|
||||||
|
return None
|
||||||
|
|
||||||
|
description = description_from_url(info.get("url", ""))
|
||||||
|
duration = float(info.get("duration") or 0) or None
|
||||||
|
|
||||||
|
video_files = info.get("video_files", [])
|
||||||
|
if not video_files:
|
||||||
|
print(f" [{pexels_id}] No video files in API response", flush=True)
|
||||||
|
return None
|
||||||
|
|
||||||
|
best = _pick_best_video_file(video_files, source_file)
|
||||||
|
if not best:
|
||||||
|
return None
|
||||||
|
|
||||||
|
download_url = best["link"]
|
||||||
|
w, h, fps = best.get("width", "?"), best.get("height", "?"), best.get("fps", "?")
|
||||||
|
q = best.get("quality", "?")
|
||||||
|
label = f'"{description}" — ' if description else ""
|
||||||
|
print(f" [{pexels_id}] {label}{q} {w}x{h} @ {fps}fps", flush=True)
|
||||||
|
print(f" → {target_path}", flush=True)
|
||||||
|
|
||||||
|
try:
|
||||||
|
req = urllib.request.Request(
|
||||||
|
download_url, headers={"User-Agent": "Mozilla/5.0 gnommo/1.0"}
|
||||||
|
)
|
||||||
|
with urllib.request.urlopen(req, timeout=300) as resp:
|
||||||
|
total = int(resp.headers.get("Content-Length") or 0)
|
||||||
|
downloaded = 0
|
||||||
|
chunks: list[bytes] = []
|
||||||
|
chunk_size = 1024 * 512 # 512 KB
|
||||||
|
while True:
|
||||||
|
chunk = resp.read(chunk_size)
|
||||||
|
if not chunk:
|
||||||
|
break
|
||||||
|
chunks.append(chunk)
|
||||||
|
downloaded += len(chunk)
|
||||||
|
if total:
|
||||||
|
pct = downloaded * 100 // total
|
||||||
|
mb_done = downloaded / 1024 / 1024
|
||||||
|
mb_total = total / 1024 / 1024
|
||||||
|
print(f" {pct:3d}% {mb_done:.1f}/{mb_total:.1f} MB\r", end="", flush=True)
|
||||||
|
print(f" Done — {downloaded / 1024 / 1024:.1f} MB ", flush=True)
|
||||||
|
target_path.write_bytes(b"".join(chunks))
|
||||||
|
except Exception as e:
|
||||||
|
print(f"\n Download failed: {e}", flush=True)
|
||||||
|
return None
|
||||||
|
|
||||||
|
return {
|
||||||
|
"description": description,
|
||||||
|
"duration": duration,
|
||||||
|
"has_audio": False, # conservative; renderer probes when needed
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def update_videos_json(
|
||||||
|
json_path: Path,
|
||||||
|
video_id: str,
|
||||||
|
metadata: dict,
|
||||||
|
) -> None:
|
||||||
|
"""Write description (and other metadata) into an existing videos.json entry."""
|
||||||
|
if not json_path.exists():
|
||||||
|
return
|
||||||
|
with open(json_path, "r", encoding="utf-8") as f:
|
||||||
|
raw = json.load(f)
|
||||||
|
if video_id not in raw:
|
||||||
|
return
|
||||||
|
changed = False
|
||||||
|
for key, value in metadata.items():
|
||||||
|
if value and raw[video_id].get(key) != value:
|
||||||
|
raw[video_id][key] = value
|
||||||
|
changed = True
|
||||||
|
if changed:
|
||||||
|
with open(json_path, "w", encoding="utf-8") as f:
|
||||||
|
json.dump(raw, f, indent=2, ensure_ascii=False)
|
||||||
|
|
||||||
|
|
||||||
|
def fetch_metadata(pexels_id: str, api_key: str) -> Optional[dict]:
|
||||||
|
"""Fetch only description and duration for a Pexels video (no download)."""
|
||||||
|
info = _fetch_video_info(pexels_id, api_key)
|
||||||
|
if not info:
|
||||||
|
return None
|
||||||
|
return {
|
||||||
|
"description": description_from_url(info.get("url", "")),
|
||||||
|
"duration": float(info.get("duration") or 0) or None,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def enrich_missing_descriptions(
|
||||||
|
shared_assets_dir: Path,
|
||||||
|
api_key: str,
|
||||||
|
) -> int:
|
||||||
|
"""Fetch descriptions from Pexels API for entries that have a file on disk but no description.
|
||||||
|
|
||||||
|
Scans shared_assets/videos.json for pexels/* entries where:
|
||||||
|
- description is absent or empty
|
||||||
|
- source_file exists on disk (locally or via cache)
|
||||||
|
|
||||||
|
Returns number of entries updated.
|
||||||
|
"""
|
||||||
|
from .cache import resolve_with_cache
|
||||||
|
|
||||||
|
videos_json = shared_assets_dir / "videos.json"
|
||||||
|
if not videos_json.exists():
|
||||||
|
return 0
|
||||||
|
|
||||||
|
with open(videos_json, "r", encoding="utf-8") as f:
|
||||||
|
raw = json.load(f)
|
||||||
|
|
||||||
|
candidates = [
|
||||||
|
(vid_id, entry)
|
||||||
|
for vid_id, entry in raw.items()
|
||||||
|
if vid_id.startswith("pexels/") and not entry.get("description")
|
||||||
|
]
|
||||||
|
|
||||||
|
# Filter to those whose file exists on disk
|
||||||
|
project_root = shared_assets_dir.parent
|
||||||
|
to_enrich = []
|
||||||
|
for vid_id, entry in candidates:
|
||||||
|
sf = entry.get("source_file", "")
|
||||||
|
if not sf:
|
||||||
|
continue
|
||||||
|
path = shared_assets_dir / sf
|
||||||
|
resolved, _ = resolve_with_cache(path, project_root)
|
||||||
|
if resolved.exists():
|
||||||
|
pexels_id = extract_pexels_id(sf)
|
||||||
|
if pexels_id:
|
||||||
|
to_enrich.append((vid_id, pexels_id))
|
||||||
|
|
||||||
|
if not to_enrich:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
print(f" Enriching descriptions for {len(to_enrich)} existing pexels video(s)...", flush=True)
|
||||||
|
|
||||||
|
updated = 0
|
||||||
|
for vid_id, pexels_id in to_enrich:
|
||||||
|
meta = fetch_metadata(pexels_id, api_key)
|
||||||
|
if meta and meta.get("description"):
|
||||||
|
print(f" [{pexels_id}] \"{meta['description']}\"", flush=True)
|
||||||
|
update_videos_json(videos_json, vid_id, meta)
|
||||||
|
updated += 1
|
||||||
|
else:
|
||||||
|
print(f" [{pexels_id}] not found or no description — skipped", flush=True)
|
||||||
|
|
||||||
|
return updated
|
||||||
|
|
||||||
|
|
||||||
|
def find_missing_pexels_videos(
|
||||||
|
manuscript_markers: list[str],
|
||||||
|
videos: dict,
|
||||||
|
shared_assets_dir: Path,
|
||||||
|
) -> list[tuple[str, str]]:
|
||||||
|
"""Return [(video_id, source_file)] for pexels videos referenced but not on disk."""
|
||||||
|
from .cache import resolve_with_cache
|
||||||
|
|
||||||
|
_VIDEO_PREFIXES = (
|
||||||
|
"video:", "narration:",
|
||||||
|
"vft:", "vfb:", "vfm:",
|
||||||
|
"vf2t:", "vf2b:", "vf2m:",
|
||||||
|
"vst:", "vsb:", "vsm:",
|
||||||
|
"vftp:", "vfbp:", "vfmp:",
|
||||||
|
"vf2tp:", "vf2bp:", "vf2mp:",
|
||||||
|
"vstp:", "vsbp:", "vsmp:",
|
||||||
|
)
|
||||||
|
|
||||||
|
seen: set[str] = set()
|
||||||
|
missing: list[tuple[str, str]] = []
|
||||||
|
|
||||||
|
for marker in manuscript_markers:
|
||||||
|
prefix = next((p for p in _VIDEO_PREFIXES if marker.startswith(p)), None)
|
||||||
|
if prefix is None:
|
||||||
|
continue
|
||||||
|
video_id = marker[len(prefix):]
|
||||||
|
if video_id in seen or not video_id.startswith("pexels/"):
|
||||||
|
continue
|
||||||
|
seen.add(video_id)
|
||||||
|
|
||||||
|
source_file = videos.get(video_id, None)
|
||||||
|
if source_file is None:
|
||||||
|
continue
|
||||||
|
sf = source_file.source_file if hasattr(source_file, "source_file") else source_file
|
||||||
|
|
||||||
|
candidate = shared_assets_dir / sf
|
||||||
|
# resolve_with_cache needs a project_path — use shared_assets parent
|
||||||
|
resolved, _ = resolve_with_cache(candidate, shared_assets_dir.parent)
|
||||||
|
if not resolved.exists():
|
||||||
|
missing.append((video_id, sf))
|
||||||
|
|
||||||
|
return missing
|
||||||
+134
-29
@@ -18,15 +18,23 @@ from .models import (
|
|||||||
)
|
)
|
||||||
from typing import Union, Optional
|
from typing import Union, Optional
|
||||||
|
|
||||||
|
|
||||||
|
def _tc() -> str:
|
||||||
|
"""Return FFmpeg thread count string from ~/.gnommo.conf [performance] cpu_limit."""
|
||||||
|
from .cache import get_ffmpeg_thread_count
|
||||||
|
|
||||||
|
return str(get_ffmpeg_thread_count())
|
||||||
|
|
||||||
|
|
||||||
# Number of parallel workers for chunk processing
|
# Number of parallel workers for chunk processing
|
||||||
DEFAULT_CHUNK_WORKERS = 4
|
DEFAULT_CHUNK_WORKERS = 1
|
||||||
|
|
||||||
# Chunk duration in seconds for parallel filter processing (avoids huge intermediate files)
|
# Chunk duration in seconds for parallel filter processing (avoids huge intermediate files)
|
||||||
CHUNK_DURATION = 60
|
CHUNK_DURATION = 60
|
||||||
|
|
||||||
# Resolution presets for preview/proxy workflow
|
# Resolution presets for preview/proxy workflow
|
||||||
# Each entry: (width, height, subdir_name)
|
# Each entry: (width, height, subdir_name)
|
||||||
RES_CONFIGS: dict[str, tuple[int, int, str] | None] = {
|
RES_CONFIGS: dict[str, Optional[tuple]] = {
|
||||||
"full": None, # no downscale, no subdir
|
"full": None, # no downscale, no subdir
|
||||||
"low": (490, 270, "low"),
|
"low": (490, 270, "low"),
|
||||||
"tiny": (320, 180, "proxy"), # "proxy" subdir kept for backward compat
|
"tiny": (320, 180, "proxy"), # "proxy" subdir kept for backward compat
|
||||||
@@ -120,8 +128,12 @@ def create_downscaled_video(
|
|||||||
"ultrafast",
|
"ultrafast",
|
||||||
"-crf",
|
"-crf",
|
||||||
"28",
|
"28",
|
||||||
|
"-vsync",
|
||||||
|
"cfr",
|
||||||
"-c:a",
|
"-c:a",
|
||||||
"copy",
|
"aac", # re-encode audio so both streams share the same PTS origin,
|
||||||
|
"-ar", # avoiding the lip-sync drift caused by libx264 encoder delay
|
||||||
|
"48000", # when audio is copied with its original timestamps
|
||||||
str(out_path),
|
str(out_path),
|
||||||
]
|
]
|
||||||
result = subprocess.run(cmd, capture_output=True, text=True)
|
result = subprocess.run(cmd, capture_output=True, text=True)
|
||||||
@@ -302,7 +314,6 @@ def run_ffmpeg_with_progress(cmd, duration, description="Processing"):
|
|||||||
while True:
|
while True:
|
||||||
# If process ended and no more output, break
|
# If process ended and no more output, break
|
||||||
if p.poll() is not None:
|
if p.poll() is not None:
|
||||||
|
|
||||||
# drain any remaining output quickly
|
# drain any remaining output quickly
|
||||||
while True:
|
while True:
|
||||||
line = p.stdout.readline()
|
line = p.stdout.readline()
|
||||||
@@ -358,7 +369,9 @@ def run_ffmpeg_with_progress(cmd, duration, description="Processing"):
|
|||||||
else:
|
else:
|
||||||
code = p.returncode
|
code = p.returncode
|
||||||
# On macOS/Linux, -9 means SIGKILL (OOM kill by OS), -6 = SIGABRT
|
# On macOS/Linux, -9 means SIGKILL (OOM kill by OS), -6 = SIGABRT
|
||||||
signal_hint = " (OOM kill)" if code == -9 else (" (abort)" if code == -6 else "")
|
signal_hint = (
|
||||||
|
" (OOM kill)" if code == -9 else (" (abort)" if code == -6 else "")
|
||||||
|
)
|
||||||
sys.stdout.write(f"\n FFmpeg exited with code {code}{signal_hint}\n")
|
sys.stdout.write(f"\n FFmpeg exited with code {code}{signal_hint}\n")
|
||||||
sys.stdout.flush()
|
sys.stdout.flush()
|
||||||
|
|
||||||
@@ -371,12 +384,19 @@ def _has_audio_stream(video_path: Path) -> bool:
|
|||||||
"""Return True if the file has a real (non-ghost) audio stream."""
|
"""Return True if the file has a real (non-ghost) audio stream."""
|
||||||
result = subprocess.run(
|
result = subprocess.run(
|
||||||
[
|
[
|
||||||
"ffprobe", "-v", "error",
|
"ffprobe",
|
||||||
"-analyzeduration", "0",
|
"-v",
|
||||||
"-probesize", "1000000",
|
"error",
|
||||||
"-select_streams", "a:0",
|
"-analyzeduration",
|
||||||
"-show_entries", "stream=index,nb_frames",
|
"0",
|
||||||
"-of", "csv=p=0",
|
"-probesize",
|
||||||
|
"1000000",
|
||||||
|
"-select_streams",
|
||||||
|
"a:0",
|
||||||
|
"-show_entries",
|
||||||
|
"stream=index,nb_frames",
|
||||||
|
"-of",
|
||||||
|
"csv=p=0",
|
||||||
str(video_path),
|
str(video_path),
|
||||||
],
|
],
|
||||||
capture_output=True,
|
capture_output=True,
|
||||||
@@ -542,6 +562,7 @@ def preprocess_video(
|
|||||||
verbose: bool = False,
|
verbose: bool = False,
|
||||||
force: bool = False,
|
force: bool = False,
|
||||||
custom_gnommo_scratch: Optional[Path] = None,
|
custom_gnommo_scratch: Optional[Path] = None,
|
||||||
|
res: str = "full",
|
||||||
) -> Path:
|
) -> Path:
|
||||||
"""
|
"""
|
||||||
Apply preprocessing filters to a video source.
|
Apply preprocessing filters to a video source.
|
||||||
@@ -554,6 +575,7 @@ def preprocess_video(
|
|||||||
video_id: ID of the video being processed
|
video_id: ID of the video being processed
|
||||||
video_source: VideoSource with source_file, filter, and output_file
|
video_source: VideoSource with source_file, filter, and output_file
|
||||||
custom_gnommo_scratch: Optional external directory for intermediate files (e.g., SSD)
|
custom_gnommo_scratch: Optional external directory for intermediate files (e.g., SSD)
|
||||||
|
res: Resolution preset — when not "full", source is downscaled before filtering
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Path to the final preprocessed output file.
|
Path to the final preprocessed output file.
|
||||||
@@ -578,6 +600,18 @@ def preprocess_video(
|
|||||||
filter_type=None,
|
filter_type=None,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# For non-full res, downscale the raw source first so all subsequent
|
||||||
|
# filters (chroma key, color grade, etc.) operate on the small file.
|
||||||
|
if res != "full":
|
||||||
|
cfg = RES_CONFIGS.get(res)
|
||||||
|
if cfg:
|
||||||
|
width, height, _ = cfg
|
||||||
|
print(f" Downscaling source to {width}x{height} ({res})...")
|
||||||
|
raw_low_dir = gnommo_scratch / f"raw_{res}"
|
||||||
|
current_input = create_downscaled_video(
|
||||||
|
current_input, raw_low_dir, width, height, force
|
||||||
|
)
|
||||||
|
|
||||||
# Resolve channel setting (auto-detect if needed) and sanity check
|
# Resolve channel setting (auto-detect if needed) and sanity check
|
||||||
channel = video_source.use_audio_channels
|
channel = video_source.use_audio_channels
|
||||||
if channel == "auto":
|
if channel == "auto":
|
||||||
@@ -622,6 +656,8 @@ def preprocess_video(
|
|||||||
batch_num = 0
|
batch_num = 0
|
||||||
for batch in filter_batches:
|
for batch in filter_batches:
|
||||||
first_filter_type = batch[0].get("type")
|
first_filter_type = batch[0].get("type")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if first_filter_type in VIDEO_FILTER_TYPES:
|
if first_filter_type in VIDEO_FILTER_TYPES:
|
||||||
# Combined video filter batch - use chunked processing for large files
|
# Combined video filter batch - use chunked processing for large files
|
||||||
@@ -744,12 +780,18 @@ def apply_combined_video_filters(
|
|||||||
|
|
||||||
# Build FFmpeg command
|
# Build FFmpeg command
|
||||||
cmd = ["ffmpeg", "-y"]
|
cmd = ["ffmpeg", "-y"]
|
||||||
|
# Global options before -i (after -i they become output options and don't limit filter threads)
|
||||||
|
cmd.extend(["-threads", _tc(), "-filter_threads", _tc()])
|
||||||
|
|
||||||
if take is not None:
|
if take is not None:
|
||||||
cmd.extend(["-t", str(take)])
|
cmd.extend(["-t", str(take)])
|
||||||
|
|
||||||
cmd.extend(
|
cmd.extend(
|
||||||
[
|
[
|
||||||
|
"-probesize",
|
||||||
|
"50000000",
|
||||||
|
"-analyzeduration",
|
||||||
|
"50000000",
|
||||||
"-i",
|
"-i",
|
||||||
str(input_path),
|
str(input_path),
|
||||||
"-vf",
|
"-vf",
|
||||||
@@ -859,7 +901,9 @@ def build_mask_filter(config: dict) -> str:
|
|||||||
alpha_expr = "+".join(conditions)
|
alpha_expr = "+".join(conditions)
|
||||||
alpha_expr = f"if({alpha_expr},0,alpha(X,Y))"
|
alpha_expr = f"if({alpha_expr},0,alpha(X,Y))"
|
||||||
|
|
||||||
return f"geq=lum='lum(X,Y)':cb='cb(X,Y)':cr='cr(X,Y)':a='{alpha_expr}'"
|
# Use r/g/b passthrough so this works in rgba space (as output by gnommokey/color_grade)
|
||||||
|
# without triggering an rgba→yuv conversion that would spawn 11 more swscaler threads.
|
||||||
|
return f"geq=r='r(X,Y)':g='g(X,Y)':b='b(X,Y)':a='{alpha_expr}'"
|
||||||
|
|
||||||
|
|
||||||
def build_color_grade_filter(config: dict) -> str:
|
def build_color_grade_filter(config: dict) -> str:
|
||||||
@@ -1023,6 +1067,14 @@ def build_gnommokey_filter(config: dict) -> str:
|
|||||||
scale_factor = gain * 2.5
|
scale_factor = gain * 2.5
|
||||||
key_expr = f"({key_expr})*{scale_factor:.3f}"
|
key_expr = f"({key_expr})*{scale_factor:.3f}"
|
||||||
|
|
||||||
|
# Shadow boost: amplify key signal for dark pixels so dark greens key out fully.
|
||||||
|
# shadow_factor = 1 - luma/255 (high for dark pixels, 0 for bright pixels)
|
||||||
|
# extra multiplier = 1 + shadow_boost * shadow_factor
|
||||||
|
if cfg.shadow_boost > 0:
|
||||||
|
luma_expr = f"(0.299*r(X,Y)+0.587*g(X,Y)+0.114*b(X,Y))"
|
||||||
|
shadow_factor = f"(1-{luma_expr}/255)"
|
||||||
|
key_expr = f"({key_expr})*(1+{cfg.shadow_boost:.3f}*{shadow_factor})"
|
||||||
|
|
||||||
# Apply clip_black and clip_white to compress the matte
|
# Apply clip_black and clip_white to compress the matte
|
||||||
# clip_black: key values below this become 0 (those pixels stay opaque)
|
# clip_black: key values below this become 0 (those pixels stay opaque)
|
||||||
# clip_white: key values above this become 255 (fully transparent)
|
# clip_white: key values above this become 255 (fully transparent)
|
||||||
@@ -1040,6 +1092,13 @@ def build_gnommokey_filter(config: dict) -> str:
|
|||||||
# Invert: high key value (green) = low alpha (transparent)
|
# Invert: high key value (green) = low alpha (transparent)
|
||||||
alpha_expr = f"255-{key_expr}"
|
alpha_expr = f"255-{key_expr}"
|
||||||
|
|
||||||
|
# Luminance protection: lock bright pixels to fully opaque so white objects
|
||||||
|
# (headphones, teeth) are never accidentally keyed or jitter.
|
||||||
|
# protect_luma=-1 disables this. Use ~220 for typical white protection.
|
||||||
|
if cfg.protect_luma >= 0:
|
||||||
|
luma_expr = f"(0.299*r(X,Y)+0.587*g(X,Y)+0.114*b(X,Y))"
|
||||||
|
alpha_expr = f"if(gt({luma_expr},{cfg.protect_luma}),255,{alpha_expr})"
|
||||||
|
|
||||||
# Build the geq filter for alpha (in RGBA mode)
|
# Build the geq filter for alpha (in RGBA mode)
|
||||||
parts.append(f"geq=r='r(X,Y)':g='g(X,Y)':b='b(X,Y)':a='{alpha_expr}'")
|
parts.append(f"geq=r='r(X,Y)':g='g(X,Y)':b='b(X,Y)':a='{alpha_expr}'")
|
||||||
|
|
||||||
@@ -1112,8 +1171,9 @@ def build_gnommokey_filter(config: dict) -> str:
|
|||||||
parts.append(f"alphaextract,avgblur=sizeX={radius}:sizeY={radius}[blur]")
|
parts.append(f"alphaextract,avgblur=sizeX={radius}:sizeY={radius}[blur]")
|
||||||
# This gets complex - for now, skip alpha blur and just use erosion
|
# This gets complex - for now, skip alpha blur and just use erosion
|
||||||
|
|
||||||
# Ensure output is in a good format
|
# Stay in rgba so downstream filters (color_grade, mask) don't trigger
|
||||||
parts.append("format=yuva444p10le")
|
# a redundant yuva444p10le→rgba round-trip and its 11-thread swscaler call.
|
||||||
|
# The caller (_process_chunk_to_prores4444) appends format=yuva444p10le at the end.
|
||||||
|
|
||||||
return ",".join(parts)
|
return ",".join(parts)
|
||||||
|
|
||||||
@@ -1152,6 +1212,8 @@ def parse_gnommokey_config(config: dict) -> GnommoKeyConfig:
|
|||||||
despill_bias=despill_bias,
|
despill_bias=despill_bias,
|
||||||
despill_strength=float(config.get("despill_strength", 0.5)),
|
despill_strength=float(config.get("despill_strength", 0.5)),
|
||||||
alpha_bias=alpha_bias,
|
alpha_bias=alpha_bias,
|
||||||
|
protect_luma=int(config.get("protect_luma", -1)),
|
||||||
|
shadow_boost=float(config.get("shadow_boost", 0.0)),
|
||||||
edge_erode=int(config.get("edge_erode", 0)),
|
edge_erode=int(config.get("edge_erode", 0)),
|
||||||
edge_soften=float(config.get("edge_soften", 0.0)),
|
edge_soften=float(config.get("edge_soften", 0.0)),
|
||||||
)
|
)
|
||||||
@@ -1325,10 +1387,20 @@ def _process_chunk_to_prores4444(
|
|||||||
# Build FFmpeg command
|
# Build FFmpeg command
|
||||||
cmd: list[str] = ["ffmpeg", "-y"]
|
cmd: list[str] = ["ffmpeg", "-y"]
|
||||||
|
|
||||||
|
# Global thread limits MUST be before the first -i.
|
||||||
|
# After -i they become output-stream options and FFmpeg ignores them for the
|
||||||
|
# filter graph — each geq stage then spawns one thread per CPU core (11 on M-series),
|
||||||
|
# causing the N-way RGBA frame buffer explosion that OOM-kills the process.
|
||||||
|
cmd.extend(["-threads", _tc(), "-filter_threads", _tc()])
|
||||||
|
|
||||||
# Seek to start time (before input for fast seeking)
|
# Seek to start time (before input for fast seeking)
|
||||||
if start_time > 0:
|
if start_time > 0:
|
||||||
cmd.extend(["-ss", str(start_time)])
|
cmd.extend(["-ss", str(start_time)])
|
||||||
|
|
||||||
|
# Limit initial file analysis to 50 MB. Without this, FFmpeg scans the entire
|
||||||
|
# source file when moov is at the end (common for camera recordings), which reads
|
||||||
|
# gigabytes of data and triggers OOM when multiple chunk workers run in parallel.
|
||||||
|
cmd.extend(["-probesize", "50000000", "-analyzeduration", "50000000"])
|
||||||
cmd.extend(["-i", str(input_path)])
|
cmd.extend(["-i", str(input_path)])
|
||||||
|
|
||||||
# Limit duration
|
# Limit duration
|
||||||
@@ -1336,7 +1408,6 @@ def _process_chunk_to_prores4444(
|
|||||||
if actual_take is not None:
|
if actual_take is not None:
|
||||||
cmd.extend(["-t", str(actual_take)])
|
cmd.extend(["-t", str(actual_take)])
|
||||||
|
|
||||||
# Video encode: ProRes 4444 with alpha
|
|
||||||
cmd.extend(
|
cmd.extend(
|
||||||
[
|
[
|
||||||
"-vf",
|
"-vf",
|
||||||
@@ -1349,8 +1420,6 @@ def _process_chunk_to_prores4444(
|
|||||||
"yuva444p10le", # must carry alpha
|
"yuva444p10le", # must carry alpha
|
||||||
"-vendor",
|
"-vendor",
|
||||||
"apl0", # optional; helps some NLEs tag as Apple ProRes
|
"apl0", # optional; helps some NLEs tag as Apple ProRes
|
||||||
"-movflags",
|
|
||||||
"+faststart", # optional; makes MOV streamable
|
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -1378,6 +1447,31 @@ def _process_chunk_to_prores4444(
|
|||||||
stderr=result.stderr,
|
stderr=result.stderr,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Validate the output file is a readable MOV (moov atom present).
|
||||||
|
# FFmpeg can return 0 but write a corrupt/incomplete file (e.g. moov atom
|
||||||
|
# missing) when faststart rewrite fails or disk is under pressure.
|
||||||
|
probe = subprocess.run(
|
||||||
|
[
|
||||||
|
"ffprobe",
|
||||||
|
"-v",
|
||||||
|
"error",
|
||||||
|
"-show_entries",
|
||||||
|
"format=duration",
|
||||||
|
"-of",
|
||||||
|
"csv=p=0",
|
||||||
|
str(output_path),
|
||||||
|
],
|
||||||
|
capture_output=True,
|
||||||
|
text=True,
|
||||||
|
)
|
||||||
|
if probe.returncode != 0 or not probe.stdout.strip():
|
||||||
|
raise PreprocessError(
|
||||||
|
f"Chunk output file is unreadable or missing moov atom: {output_path.name}",
|
||||||
|
filter_type="chunk",
|
||||||
|
command=" ".join(cmd),
|
||||||
|
stderr=probe.stderr,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def _process_chunk_to_webm(
|
def _process_chunk_to_webm(
|
||||||
input_path: Path,
|
input_path: Path,
|
||||||
@@ -1627,10 +1721,9 @@ def apply_chroma_key(
|
|||||||
|
|
||||||
# Build FFmpeg command
|
# Build FFmpeg command
|
||||||
# ProRes 4444 profile for alpha channel support
|
# ProRes 4444 profile for alpha channel support
|
||||||
cmd = [
|
cmd = ["ffmpeg", "-y"]
|
||||||
"ffmpeg",
|
# Global options before -i
|
||||||
"-y", # Overwrite output
|
cmd.extend(["-threads", _tc(), "-filter_threads", _tc()])
|
||||||
]
|
|
||||||
|
|
||||||
# Add duration limit if specified (before input for efficiency)
|
# Add duration limit if specified (before input for efficiency)
|
||||||
if take is not None:
|
if take is not None:
|
||||||
@@ -1729,14 +1822,13 @@ def apply_mask(
|
|||||||
# Using: if(condition, 0, alpha(X,Y))
|
# Using: if(condition, 0, alpha(X,Y))
|
||||||
alpha_expr = f"if({alpha_expr},0,alpha(X,Y))"
|
alpha_expr = f"if({alpha_expr},0,alpha(X,Y))"
|
||||||
|
|
||||||
# Build the geq filter - preserve luma, chroma, modify alpha
|
# Build the geq filter - preserve RGB channels, modify alpha
|
||||||
video_filter = f"geq=lum='lum(X,Y)':cb='cb(X,Y)':cr='cr(X,Y)':a='{alpha_expr}'"
|
video_filter = f"geq=r='r(X,Y)':g='g(X,Y)':b='b(X,Y)':a='{alpha_expr}'"
|
||||||
|
|
||||||
# Build FFmpeg command
|
# Build FFmpeg command
|
||||||
cmd = [
|
cmd = ["ffmpeg", "-y"]
|
||||||
"ffmpeg",
|
# Global options before -i
|
||||||
"-y", # Overwrite output
|
cmd.extend(["-threads", _tc(), "-filter_threads", _tc()])
|
||||||
]
|
|
||||||
|
|
||||||
if take is not None:
|
if take is not None:
|
||||||
cmd.extend(["-t", str(take)])
|
cmd.extend(["-t", str(take)])
|
||||||
@@ -1886,7 +1978,12 @@ def apply_audio_normalize(
|
|||||||
channel_map -> eq_bands -> highpass -> lowpass -> room_eq -> dereverb -> denoise -> gate -> compress -> normalize
|
channel_map -> eq_bands -> highpass -> lowpass -> room_eq -> dereverb -> denoise -> gate -> compress -> normalize
|
||||||
"""
|
"""
|
||||||
cfg = parse_audio_normalize_config(config)
|
cfg = parse_audio_normalize_config(config)
|
||||||
|
if not cfg.enabled:
|
||||||
|
# No audio processing, just copy
|
||||||
|
import shutil
|
||||||
|
|
||||||
|
shutil.copy2(input_path, output_path)
|
||||||
|
return
|
||||||
# Build audio filter chain (order matters!)
|
# Build audio filter chain (order matters!)
|
||||||
audio_filters: list[str] = []
|
audio_filters: list[str] = []
|
||||||
|
|
||||||
@@ -2036,6 +2133,7 @@ def parse_audio_normalize_config(config: dict[str, Any]) -> AudioNormalizeConfig
|
|||||||
)
|
)
|
||||||
|
|
||||||
return AudioNormalizeConfig(
|
return AudioNormalizeConfig(
|
||||||
|
enabled=bool(config.get("enabled", True)),
|
||||||
# Parametric EQ
|
# Parametric EQ
|
||||||
eq_bands=eq_bands,
|
eq_bands=eq_bands,
|
||||||
# Room treatment
|
# Room treatment
|
||||||
@@ -2144,6 +2242,7 @@ def stitch_narration_segments(
|
|||||||
output_path: Path,
|
output_path: Path,
|
||||||
verbose: bool = False,
|
verbose: bool = False,
|
||||||
default_end_trim: float = 0.0,
|
default_end_trim: float = 0.0,
|
||||||
|
loudnorm_config: Optional[dict] = None,
|
||||||
) -> Path:
|
) -> Path:
|
||||||
"""
|
"""
|
||||||
Stitch multiple narration video segments into a single file.
|
Stitch multiple narration video segments into a single file.
|
||||||
@@ -2334,7 +2433,13 @@ def stitch_narration_segments(
|
|||||||
output_path.parent / f"{output_path.stem}_normalized{output_path.suffix}"
|
output_path.parent / f"{output_path.stem}_normalized{output_path.suffix}"
|
||||||
)
|
)
|
||||||
|
|
||||||
# Use EBU R128 loudnorm targeting YouTube's recommended levels
|
# Build loudnorm filter string from project config (or fall back to defaults)
|
||||||
|
_cfg = loudnorm_config or {}
|
||||||
|
_lufs = float(_cfg.get("target_lufs", -14))
|
||||||
|
_lra = float(_cfg.get("target_lra", 11))
|
||||||
|
_tp = float(_cfg.get("target_tp", -1.5))
|
||||||
|
loudnorm_filter = f"loudnorm=I={_lufs:.1f}:LRA={_lra:.1f}:TP={_tp:.1f}"
|
||||||
|
|
||||||
loudnorm_cmd = [
|
loudnorm_cmd = [
|
||||||
"ffmpeg",
|
"ffmpeg",
|
||||||
"-y",
|
"-y",
|
||||||
@@ -2343,7 +2448,7 @@ def stitch_narration_segments(
|
|||||||
"-c:v",
|
"-c:v",
|
||||||
"copy",
|
"copy",
|
||||||
"-af",
|
"-af",
|
||||||
"loudnorm=I=-14:LRA=11:TP=-1.5",
|
loudnorm_filter,
|
||||||
"-c:a",
|
"-c:a",
|
||||||
"aac",
|
"aac",
|
||||||
"-b:a",
|
"-b:a",
|
||||||
|
|||||||
+211
-60
@@ -1,36 +1,19 @@
|
|||||||
"""Push project metadata to gnommoweb server.
|
"""Push project metadata to gnommoeditor (prod) or gnommoweb (local).
|
||||||
|
|
||||||
Usage:
|
Usage:
|
||||||
gnommo push -p video1 # push parent video project
|
gnommo push -p video1 # push parent video project
|
||||||
gnommo push -p short_pixelated_universe # push a short project
|
gnommo push -p short_pixelated_universe # push a short project
|
||||||
gnommo push -p myproject --force # force push, overwrite server
|
gnommo push -p myproject --force # force push, overwrite server
|
||||||
|
|
||||||
Reads project.json and POSTs to POST /api/projects/push.
|
Reads project.json and companion JSON files, then POSTs to:
|
||||||
|
Production: POST /api/ingest (gnommoeditor, uses INGEST_API_KEY)
|
||||||
If project.json contains a "parent_project" field, the project is pushed
|
Local: POST /api/projects/push (gnommoweb, uses GNOMMOWEB_API_KEY)
|
||||||
as a short and registered under that parent. Otherwise it is pushed as a
|
|
||||||
parent video project.
|
|
||||||
|
|
||||||
Parent project.json "shorts" field is a list of slugs (just an index):
|
|
||||||
"shorts": ["short_pixelated_universe", "short_planck_length"]
|
|
||||||
|
|
||||||
Short project.json has its own full config plus a parent_project field:
|
|
||||||
{
|
|
||||||
"id": "short_pixelated_universe",
|
|
||||||
"parent_project": "Video1",
|
|
||||||
"resolution": [1080, 1920],
|
|
||||||
"fps": 30,
|
|
||||||
"duration_seconds": 60,
|
|
||||||
...
|
|
||||||
}
|
|
||||||
|
|
||||||
Conflict detection:
|
|
||||||
- If server.updated_at > our recorded server_updated_at → server has newer changes
|
|
||||||
→ warn and abort unless --force
|
|
||||||
|
|
||||||
Configuration (from .env or environment):
|
Configuration (from .env or environment):
|
||||||
GNOMMOWEB_URL Base URL (e.g. http://localhost:3001)
|
GNOMMOEDITOR_URL Base URL for production (e.g. https://editor.glitch.university)
|
||||||
GNOMMOWEB_API_KEY Bearer token (CONTENT_API_KEY from gnommoweb)
|
INGEST_API_KEY Bearer token for gnommoeditor ingest endpoint
|
||||||
|
GNOMMOWEB_URL Base URL for local dev (e.g. http://localhost:3001)
|
||||||
|
GNOMMOWEB_API_KEY Bearer token for local gnommoweb
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import json
|
import json
|
||||||
@@ -85,43 +68,135 @@ def _write_sync(project_path: Path, data: dict, prod: bool = False):
|
|||||||
json.dump(data, f, indent=2)
|
json.dump(data, f, indent=2)
|
||||||
|
|
||||||
|
|
||||||
def _parse_ts(ts_str) -> datetime | None:
|
def _load_json_file(path: Path, label: str, verbose: bool) -> dict | list | None:
|
||||||
if not ts_str:
|
"""Load a JSON file, returning None if it doesn't exist."""
|
||||||
|
if not path.exists():
|
||||||
|
if verbose:
|
||||||
|
print(f" {label}: not found at {path}")
|
||||||
return None
|
return None
|
||||||
try:
|
try:
|
||||||
return datetime.fromisoformat(ts_str.replace("Z", "+00:00"))
|
with open(path) as f:
|
||||||
except ValueError:
|
return json.load(f)
|
||||||
|
except json.JSONDecodeError as e:
|
||||||
|
print(f" Warning: could not parse {label} ({path}): {e}", file=sys.stderr)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _load_text_file(path: Path, label: str) -> str | None:
|
||||||
|
"""Load a text file, returning None if it doesn't exist."""
|
||||||
|
if not path.exists():
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
return path.read_text(encoding="utf-8")
|
||||||
|
except UnicodeDecodeError:
|
||||||
|
return path.read_text(encoding="latin-1")
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_seconds(value) -> float | None:
|
||||||
|
"""Convert a time value like '30s', '1:30', or 30 into a plain float of seconds."""
|
||||||
|
if value is None:
|
||||||
|
return None
|
||||||
|
if isinstance(value, (int, float)):
|
||||||
|
return float(value)
|
||||||
|
value = str(value).strip()
|
||||||
|
if value.endswith("s"):
|
||||||
|
value = value[:-1]
|
||||||
|
if ":" in value:
|
||||||
|
parts = value.split(":")
|
||||||
|
if len(parts) == 2:
|
||||||
|
return float(parts[0]) * 60 + float(parts[1])
|
||||||
|
elif len(parts) == 3:
|
||||||
|
return float(parts[0]) * 3600 + float(parts[1]) * 60 + float(parts[2])
|
||||||
|
return float(value)
|
||||||
|
|
||||||
|
|
||||||
|
def _sanitize_time_fields(data: dict | None, fields: list[str]) -> dict | None:
|
||||||
|
"""Return a copy of dict with the given fields converted to plain floats."""
|
||||||
|
if not data:
|
||||||
|
return data
|
||||||
|
result = dict(data)
|
||||||
|
for field in fields:
|
||||||
|
if field in result and result[field] is not None:
|
||||||
|
try:
|
||||||
|
result[field] = _parse_seconds(result[field])
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
pass # leave invalid values for the server to reject with a clear error
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def _build_ingest_payload(project: dict, project_path: Path, verbose: bool) -> dict:
|
||||||
|
"""Build the rich ingest payload for gnommoeditor POST /api/ingest."""
|
||||||
|
|
||||||
|
# ── slides ────────────────────────────────────────────────────────────────
|
||||||
|
slides_path_str = project.get("slides", "slides.json")
|
||||||
|
slides_path = project_path / slides_path_str
|
||||||
|
slides = _load_json_file(slides_path, "slides", verbose)
|
||||||
|
if slides and verbose:
|
||||||
|
print(f" slides: {len(slides)} entries")
|
||||||
|
|
||||||
|
# ── manuscript ────────────────────────────────────────────────────────────
|
||||||
|
manuscript_path_str = project.get("manuscript", "manuscript.txt")
|
||||||
|
manuscript_path = project_path / manuscript_path_str
|
||||||
|
manuscript = _load_text_file(manuscript_path, "manuscript")
|
||||||
|
if manuscript:
|
||||||
|
print(f" manuscript: {len(manuscript)} chars")
|
||||||
|
elif verbose:
|
||||||
|
print(f" manuscript: not found at {manuscript_path}")
|
||||||
|
|
||||||
|
# ── narration ─────────────────────────────────────────────────────────────
|
||||||
|
narration_path_str = project.get("narration", "narration.json")
|
||||||
|
narration_path = project_path / narration_path_str
|
||||||
|
narration = _load_json_file(narration_path, "narration", verbose)
|
||||||
|
|
||||||
|
# ── audio ─────────────────────────────────────────────────────────────────
|
||||||
|
audio_path_str = project.get("audio_tracks", "audio.json")
|
||||||
|
audio_path = project_path / audio_path_str
|
||||||
|
audio = _load_json_file(audio_path, "audio", verbose)
|
||||||
|
|
||||||
|
# ── videos ────────────────────────────────────────────────────────────────
|
||||||
|
videos_path_str = project.get("videos", "videos.json")
|
||||||
|
videos_path = project_path / videos_path_str
|
||||||
|
videos = _load_json_file(videos_path, "videos", verbose)
|
||||||
|
|
||||||
|
# ── citations ─────────────────────────────────────────────────────────────
|
||||||
|
citations_path = project_path / "citations.json"
|
||||||
|
citations = _load_json_file(citations_path, "citations", verbose)
|
||||||
|
|
||||||
|
# Sanitize time fields — convert "30s", "1:30" etc. to plain floats
|
||||||
|
_VIDEO_TIME_FIELDS = ["duration", "pause_narration", "skip", "take"]
|
||||||
|
_NARRATION_TIME_FIELDS = ["skip", "take"]
|
||||||
|
_AUDIO_TIME_FIELDS = ["overlap", "duration"]
|
||||||
|
|
||||||
|
if videos:
|
||||||
|
videos = {
|
||||||
|
k: _sanitize_time_fields(v, _VIDEO_TIME_FIELDS) for k, v in videos.items()
|
||||||
|
}
|
||||||
|
if narration:
|
||||||
|
narration = {
|
||||||
|
k: _sanitize_time_fields(v, _NARRATION_TIME_FIELDS)
|
||||||
|
for k, v in narration.items()
|
||||||
|
}
|
||||||
|
if audio:
|
||||||
|
audio = {
|
||||||
|
k: _sanitize_time_fields(v, _AUDIO_TIME_FIELDS) for k, v in audio.items()
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
"project": project,
|
||||||
|
"slides": slides,
|
||||||
|
"manuscript": manuscript,
|
||||||
|
"narration": narration,
|
||||||
|
"audio": audio,
|
||||||
|
"videos": videos,
|
||||||
|
"citations": citations,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
def cmd_push(
|
def cmd_push(
|
||||||
project_path: Path, verbose: bool = False, force: bool = False, prod: bool = False
|
project_path: Path, verbose: bool = False, force: bool = False, prod: bool = False
|
||||||
) -> int:
|
) -> int:
|
||||||
_load_env_file()
|
_load_env_file()
|
||||||
|
|
||||||
if prod:
|
|
||||||
api_url = os.environ.get("GNOMMOWEB_PROD_URL", "").rstrip("/")
|
|
||||||
api_key = os.environ.get("GNOMMOWEB_PROD_API_KEY", "")
|
|
||||||
if not api_url:
|
|
||||||
print("Error: GNOMMOWEB_PROD_URL is not set.", file=sys.stderr)
|
|
||||||
return 1
|
|
||||||
if not api_key:
|
|
||||||
print("Error: GNOMMOWEB_PROD_API_KEY is not set.", file=sys.stderr)
|
|
||||||
return 1
|
|
||||||
else:
|
|
||||||
api_url = os.environ.get("GNOMMOWEB_URL", "").rstrip("/")
|
|
||||||
api_key = os.environ.get("GNOMMOWEB_API_KEY", "")
|
|
||||||
if not api_url:
|
|
||||||
print("Error: GNOMMOWEB_URL is not set.", file=sys.stderr)
|
|
||||||
return 1
|
|
||||||
if not api_key:
|
|
||||||
print("Error: GNOMMOWEB_API_KEY is not set.", file=sys.stderr)
|
|
||||||
return 1
|
|
||||||
|
|
||||||
if verbose:
|
|
||||||
target = "production" if prod else "local"
|
|
||||||
print(f" → {target}: {api_url}")
|
|
||||||
|
|
||||||
project_file = project_path / "project.json"
|
project_file = project_path / "project.json"
|
||||||
if not project_file.exists():
|
if not project_file.exists():
|
||||||
print(f"Error: {project_file} not found", file=sys.stderr)
|
print(f"Error: {project_file} not found", file=sys.stderr)
|
||||||
@@ -136,9 +211,90 @@ def cmd_push(
|
|||||||
print("Error: project.json must have 'id' and 'name' fields.", file=sys.stderr)
|
print("Error: project.json must have 'id' and 'name' fields.", file=sys.stderr)
|
||||||
return 1
|
return 1
|
||||||
|
|
||||||
|
if prod:
|
||||||
|
return _push_prod(project, project_path, verbose)
|
||||||
|
else:
|
||||||
|
return _push_local(project, project_path, verbose, force)
|
||||||
|
|
||||||
|
|
||||||
|
# ── Production: gnommoeditor POST /api/ingest ─────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
def _push_prod(project: dict, project_path: Path, verbose: bool) -> int:
|
||||||
|
api_url = os.environ.get("GNOMMOEDITOR_URL", "").rstrip("/")
|
||||||
|
api_key = os.environ.get("INGEST_API_KEY", "")
|
||||||
|
if not api_url:
|
||||||
|
print("Error: GNOMMOEDITOR_URL is not set.", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
if not api_key:
|
||||||
|
print("Error: INGEST_API_KEY is not set.", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
project_id = project["id"]
|
||||||
|
payload = _build_ingest_payload(project, project_path, verbose)
|
||||||
|
|
||||||
|
# Attach sync state so the server can record it
|
||||||
|
sync = _read_sync(project_path, prod=True)
|
||||||
|
if sync:
|
||||||
|
payload["sync"] = sync
|
||||||
|
|
||||||
|
print(f" → {api_url}/api/ingest")
|
||||||
|
|
||||||
|
try:
|
||||||
|
r = requests.post(
|
||||||
|
f"{api_url}/api/ingest",
|
||||||
|
json=payload,
|
||||||
|
headers={"Authorization": f"Bearer {api_key}"},
|
||||||
|
timeout=30,
|
||||||
|
)
|
||||||
|
except requests.exceptions.ConnectionError:
|
||||||
|
print(f"✗ Could not connect to {api_url}")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
if not r.ok:
|
||||||
|
try:
|
||||||
|
body = r.json()
|
||||||
|
except Exception:
|
||||||
|
body = r.text[:500]
|
||||||
|
print(f"✗ Server returned {r.status_code}: {body}")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
result = r.json()
|
||||||
|
video_id = result.get("video_id")
|
||||||
|
slides_upserted = result.get("slides_upserted", 0)
|
||||||
|
|
||||||
|
# Update sync state
|
||||||
|
now_iso = datetime.now(tz=timezone.utc).isoformat(timespec="seconds")
|
||||||
|
existing_sync = _read_sync(project_path, prod=True)
|
||||||
|
_write_sync(
|
||||||
|
project_path,
|
||||||
|
{**existing_sync, "last_pushed_at": now_iso},
|
||||||
|
prod=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
print(f"✓ {project_id} → video #{video_id} ({slides_upserted} slides)")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
# ── Local dev: gnommoweb POST /api/projects/push ──────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
def _push_local(project: dict, project_path: Path, verbose: bool, force: bool) -> int:
|
||||||
|
api_url = os.environ.get("GNOMMOWEB_URL", "").rstrip("/")
|
||||||
|
api_key = os.environ.get("GNOMMOWEB_API_KEY", "")
|
||||||
|
if not api_url:
|
||||||
|
print("Error: GNOMMOWEB_URL is not set.", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
if not api_key:
|
||||||
|
print("Error: GNOMMOWEB_API_KEY is not set.", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
if verbose:
|
||||||
|
print(f" → local: {api_url}")
|
||||||
|
|
||||||
|
project_id = project["id"]
|
||||||
parent_project = project.get("parent_project")
|
parent_project = project.get("parent_project")
|
||||||
|
|
||||||
# ── Build payload ─────────────────────────────────────────────────────────
|
|
||||||
if parent_project:
|
if parent_project:
|
||||||
payload = _build_short_payload(project, project_path, verbose)
|
payload = _build_short_payload(project, project_path, verbose)
|
||||||
else:
|
else:
|
||||||
@@ -148,7 +304,6 @@ def cmd_push(
|
|||||||
kind = "short" if parent_project else "parent video"
|
kind = "short" if parent_project else "parent video"
|
||||||
print(f"Pushing {project_id} ({kind}) to {api_url}")
|
print(f"Pushing {project_id} ({kind}) to {api_url}")
|
||||||
|
|
||||||
# ── POST ──────────────────────────────────────────────────────────────────
|
|
||||||
try:
|
try:
|
||||||
r = requests.post(
|
r = requests.post(
|
||||||
f"{api_url}/api/projects/push",
|
f"{api_url}/api/projects/push",
|
||||||
@@ -171,9 +326,8 @@ def cmd_push(
|
|||||||
result = r.json()
|
result = r.json()
|
||||||
server_updated_at = result.get("server_updated_at")
|
server_updated_at = result.get("server_updated_at")
|
||||||
|
|
||||||
# ── Write sync state ──────────────────────────────────────────────────────
|
|
||||||
now_iso = datetime.now(tz=timezone.utc).isoformat(timespec="seconds")
|
now_iso = datetime.now(tz=timezone.utc).isoformat(timespec="seconds")
|
||||||
existing_sync = _read_sync(project_path, prod)
|
existing_sync = _read_sync(project_path, prod=False)
|
||||||
_write_sync(
|
_write_sync(
|
||||||
project_path,
|
project_path,
|
||||||
{
|
{
|
||||||
@@ -181,10 +335,9 @@ def cmd_push(
|
|||||||
"last_pushed_at": now_iso,
|
"last_pushed_at": now_iso,
|
||||||
"server_updated_at": server_updated_at,
|
"server_updated_at": server_updated_at,
|
||||||
},
|
},
|
||||||
prod,
|
prod=False,
|
||||||
)
|
)
|
||||||
|
|
||||||
# ── Print summary ─────────────────────────────────────────────────────────
|
|
||||||
asset = result.get("asset", {})
|
asset = result.get("asset", {})
|
||||||
if result.get("type") == "short":
|
if result.get("type") == "short":
|
||||||
print(f"✓ {project_id} → gn_asset #{asset.get('id')} [{asset.get('status')}]")
|
print(f"✓ {project_id} → gn_asset #{asset.get('id')} [{asset.get('status')}]")
|
||||||
@@ -202,7 +355,6 @@ def cmd_push(
|
|||||||
|
|
||||||
|
|
||||||
def _build_parent_payload(project: dict, project_path: Path, verbose: bool) -> dict:
|
def _build_parent_payload(project: dict, project_path: Path, verbose: bool) -> dict:
|
||||||
# Read the manuscript file if one is specified
|
|
||||||
script_content = None
|
script_content = None
|
||||||
manuscript_str = project.get("manuscript")
|
manuscript_str = project.get("manuscript")
|
||||||
if manuscript_str:
|
if manuscript_str:
|
||||||
@@ -238,7 +390,6 @@ def _build_parent_payload(project: dict, project_path: Path, verbose: bool) -> d
|
|||||||
|
|
||||||
|
|
||||||
def _build_short_payload(project: dict, project_path: Path, verbose: bool) -> dict:
|
def _build_short_payload(project: dict, project_path: Path, verbose: bool) -> dict:
|
||||||
# Read the script file if one is specified
|
|
||||||
script_content = None
|
script_content = None
|
||||||
script_path_str = project.get("script")
|
script_path_str = project.get("script")
|
||||||
if script_path_str:
|
if script_path_str:
|
||||||
|
|||||||
+178
-100
@@ -237,8 +237,27 @@ def _resolve_video_path(
|
|||||||
source_path = base_dir / video_source.source_file
|
source_path = base_dir / video_source.source_file
|
||||||
if project_path:
|
if project_path:
|
||||||
resolved, _ = resolve_with_cache(source_path, project_path)
|
resolved, _ = resolve_with_cache(source_path, project_path)
|
||||||
return resolved
|
else:
|
||||||
return source_path
|
resolved = source_path
|
||||||
|
|
||||||
|
if not resolved.exists():
|
||||||
|
# File not found anywhere — substitute PlaceholderVideo so FFmpeg doesn't crash
|
||||||
|
placeholder = None
|
||||||
|
if shared_assets_dir:
|
||||||
|
p = shared_assets_dir / "PlaceholderVideo.mp4"
|
||||||
|
if project_path:
|
||||||
|
p, _ = resolve_with_cache(p, project_path)
|
||||||
|
if p.exists():
|
||||||
|
placeholder = p
|
||||||
|
if placeholder:
|
||||||
|
import sys
|
||||||
|
print(
|
||||||
|
f" Warning: {video_source.source_file} not found — using PlaceholderVideo",
|
||||||
|
file=sys.stderr,
|
||||||
|
)
|
||||||
|
return placeholder
|
||||||
|
|
||||||
|
return resolved
|
||||||
|
|
||||||
|
|
||||||
def _has_audio_stream(video_path: Path) -> bool:
|
def _has_audio_stream(video_path: Path) -> bool:
|
||||||
@@ -303,6 +322,14 @@ def build_ffmpeg_command(plan: RenderPlan, output_path: Path) -> list[str]:
|
|||||||
"""Build the complete FFmpeg command as a list of arguments."""
|
"""Build the complete FFmpeg command as a list of arguments."""
|
||||||
cmd = ["ffmpeg", "-y"] # -y to overwrite output
|
cmd = ["ffmpeg", "-y"] # -y to overwrite output
|
||||||
|
|
||||||
|
# Global thread limits before any -i. Without this, each format=rgba conversion
|
||||||
|
# in the filter graph (one per video layer) spawns one swscaler thread per CPU core,
|
||||||
|
# causing OOM on Apple Silicon where av_cpu_count() returns 10-11.
|
||||||
|
from .cache import get_ffmpeg_thread_count
|
||||||
|
|
||||||
|
_tc = str(get_ffmpeg_thread_count())
|
||||||
|
cmd.extend(["-threads", _tc, "-filter_threads", _tc])
|
||||||
|
|
||||||
# Resolve paths to absolute
|
# Resolve paths to absolute
|
||||||
project_path = plan.project_path.resolve()
|
project_path = plan.project_path.resolve()
|
||||||
output_path = output_path.resolve()
|
output_path = output_path.resolve()
|
||||||
@@ -354,6 +381,7 @@ def build_ffmpeg_command(plan: RenderPlan, output_path: Path) -> list[str]:
|
|||||||
f"Background handle '{bg_handle}' not found in shared_assets/videos.json"
|
f"Background handle '{bg_handle}' not found in shared_assets/videos.json"
|
||||||
)
|
)
|
||||||
bg_path = shared_assets_dir / bg_videos[bg_handle]["source_file"]
|
bg_path = shared_assets_dir / bg_videos[bg_handle]["source_file"]
|
||||||
|
bg_path, _ = resolve_with_cache(bg_path, plan.project_path)
|
||||||
if not bg_path.exists():
|
if not bg_path.exists():
|
||||||
raise RenderError(
|
raise RenderError(
|
||||||
f"Background file not found: {bg_path} (from handle '{bg_handle}')"
|
f"Background file not found: {bg_path} (from handle '{bg_handle}')"
|
||||||
@@ -395,13 +423,30 @@ def build_ffmpeg_command(plan: RenderPlan, output_path: Path) -> list[str]:
|
|||||||
video_path = _resolve_video_path(
|
video_path = _resolve_video_path(
|
||||||
videos_dir, event.video_source, shared_assets_dir, project_path
|
videos_dir, event.video_source, shared_assets_dir, project_path
|
||||||
)
|
)
|
||||||
skip = event.video_source.skip
|
skip = event.video_source.skip or 0.0
|
||||||
|
|
||||||
|
# How long this clip needs to play in the output
|
||||||
|
clip_duration = event.end_time - event.start_time
|
||||||
|
if event.video_source.take is not None:
|
||||||
|
clip_duration = min(clip_duration, event.video_source.take)
|
||||||
|
|
||||||
|
# Loop the clip if the file is shorter than the display window.
|
||||||
|
# Don't loop pause-narration videos — they intentionally play once and stop.
|
||||||
|
needs_loop = False
|
||||||
|
if event.video_source.duration is not None and not event.video_source.pause_narration:
|
||||||
|
remaining = event.video_source.duration - skip
|
||||||
|
needs_loop = remaining < clip_duration - 0.1 # 0.1 s tolerance
|
||||||
|
|
||||||
|
if needs_loop:
|
||||||
|
cmd.extend(["-stream_loop", "-1"])
|
||||||
if skip > 0:
|
if skip > 0:
|
||||||
cmd.extend(["-ss", f"{skip:.3f}"])
|
cmd.extend(["-ss", f"{skip:.3f}"])
|
||||||
cmd.extend(["-analyzeduration", "0", "-probesize", "1000"])
|
cmd.extend(["-analyzeduration", "0", "-probesize", "1000"])
|
||||||
# Use pre-probed duration to tell FFmpeg exactly how much to read,
|
# Use pre-probed duration (or loop-limited duration) to tell FFmpeg exactly
|
||||||
# preventing scans of ghost audio tracks on empty MP4 audio streams.
|
# how much to read, preventing scans of ghost audio tracks on empty streams.
|
||||||
if event.video_source.duration is not None:
|
if needs_loop:
|
||||||
|
cmd.extend(["-t", f"{clip_duration:.3f}"])
|
||||||
|
elif event.video_source.duration is not None:
|
||||||
remaining = event.video_source.duration - skip
|
remaining = event.video_source.duration - skip
|
||||||
if remaining > 0:
|
if remaining > 0:
|
||||||
cmd.extend(["-t", f"{remaining:.3f}"])
|
cmd.extend(["-t", f"{remaining:.3f}"])
|
||||||
@@ -410,7 +455,9 @@ def build_ffmpeg_command(plan: RenderPlan, output_path: Path) -> list[str]:
|
|||||||
input_idx += 1
|
input_idx += 1
|
||||||
has_audio = event.video_source.has_audio
|
has_audio = event.video_source.has_audio
|
||||||
if has_audio is None:
|
if has_audio is None:
|
||||||
print(f" Warning: no cached metadata for '{event.video_source.source_file}' — run 'gnommo import' to avoid slow probing")
|
print(
|
||||||
|
f" Warning: no cached metadata for '{event.video_source.source_file}' — run 'gnommo import' to avoid slow probing"
|
||||||
|
)
|
||||||
has_audio = _has_audio_stream(video_path)
|
has_audio = _has_audio_stream(video_path)
|
||||||
if has_audio:
|
if has_audio:
|
||||||
video_events_with_audio.add(i)
|
video_events_with_audio.add(i)
|
||||||
@@ -423,7 +470,7 @@ def build_ffmpeg_command(plan: RenderPlan, output_path: Path) -> list[str]:
|
|||||||
video_path = _resolve_video_path(
|
video_path = _resolve_video_path(
|
||||||
videos_dir, event.video_source, shared_assets_dir, project_path
|
videos_dir, event.video_source, shared_assets_dir, project_path
|
||||||
)
|
)
|
||||||
skip = event.video_source.skip
|
skip = event.video_source.skip or 0.0
|
||||||
if skip > 0:
|
if skip > 0:
|
||||||
cmd.extend(["-ss", f"{skip:.3f}"])
|
cmd.extend(["-ss", f"{skip:.3f}"])
|
||||||
cmd.extend(["-analyzeduration", "0", "-probesize", "1000"])
|
cmd.extend(["-analyzeduration", "0", "-probesize", "1000"])
|
||||||
@@ -436,7 +483,9 @@ def build_ffmpeg_command(plan: RenderPlan, output_path: Path) -> list[str]:
|
|||||||
input_idx += 1
|
input_idx += 1
|
||||||
has_audio = event.video_source.has_audio
|
has_audio = event.video_source.has_audio
|
||||||
if has_audio is None:
|
if has_audio is None:
|
||||||
print(f" Warning: no cached metadata for '{event.video_source.source_file}' — run 'gnommo import' to avoid slow probing")
|
print(
|
||||||
|
f" Warning: no cached metadata for '{event.video_source.source_file}' — run 'gnommo import' to avoid slow probing"
|
||||||
|
)
|
||||||
has_audio = _has_audio_stream(video_path)
|
has_audio = _has_audio_stream(video_path)
|
||||||
if has_audio:
|
if has_audio:
|
||||||
outro_events_with_audio.add(i)
|
outro_events_with_audio.add(i)
|
||||||
@@ -451,7 +500,12 @@ def build_ffmpeg_command(plan: RenderPlan, output_path: Path) -> list[str]:
|
|||||||
|
|
||||||
for event in plan.audio_events:
|
for event in plan.audio_events:
|
||||||
if event.audio_id not in audio_inputs:
|
if event.audio_id not in audio_inputs:
|
||||||
audio_path = audio_dir / event.audio_def.file
|
if event.audio_def.is_shared and plan.shared_assets_dir:
|
||||||
|
audio_path = (
|
||||||
|
plan.shared_assets_dir / "media" / "audio" / event.audio_def.file
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
audio_path = audio_dir / event.audio_def.file
|
||||||
audio_path, _ = resolve_with_cache(audio_path, project_path)
|
audio_path, _ = resolve_with_cache(audio_path, project_path)
|
||||||
# Use pre-probed duration from audio.json if available (set by import).
|
# Use pre-probed duration from audio.json if available (set by import).
|
||||||
# For MP3 without Xing/VBRI headers this is critical — FFmpeg otherwise
|
# For MP3 without Xing/VBRI headers this is critical — FFmpeg otherwise
|
||||||
@@ -468,7 +522,8 @@ def build_ffmpeg_command(plan: RenderPlan, output_path: Path) -> list[str]:
|
|||||||
# Cache duration for crossfade loop filter
|
# Cache duration for crossfade loop filter
|
||||||
if event.audio_def.loop and event.audio_def.overlap:
|
if event.audio_def.loop and event.audio_def.overlap:
|
||||||
audio_durations[event.audio_id] = (
|
audio_durations[event.audio_id] = (
|
||||||
file_duration if file_duration is not None
|
file_duration
|
||||||
|
if file_duration is not None
|
||||||
else _get_audio_duration(audio_path)
|
else _get_audio_duration(audio_path)
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -797,13 +852,14 @@ def build_filter_complex(
|
|||||||
"""
|
"""
|
||||||
Build the filter_complex string for FFmpeg.
|
Build the filter_complex string for FFmpeg.
|
||||||
|
|
||||||
Layer structure:
|
Layer structure (bottom to top):
|
||||||
- Layer 1: Background (solid color, image, or video)
|
- Layer 1: Background (solid color, image, or video)
|
||||||
- Layer 2: Always visible videos (like talking head) in cutouts
|
- Layer 2: "below" triggered videos (vfb/vf2b/vsb) — behind slides, use with slide on top to mask
|
||||||
- Layer 3: Slides (with time-based enable)
|
- Layer 3: Slides (transparent in talking-head cutout area)
|
||||||
- Layer 4: Triggered videos in cutouts (with time-based enable)
|
- Layer 4: Always visible videos (talking head) — above slides, visible through cutout
|
||||||
- Layer 5: Camera transform
|
- Layer 5: "above" triggered videos (vft/vf2t/vst) — topmost, covers everything including talking head
|
||||||
- Layer 6: Outro videos (fullscreen, after narration ends)
|
- Layer 6: Camera transform
|
||||||
|
- Layer 7: Outro videos (fullscreen, after narration ends)
|
||||||
- Audio: Main audio mixed with triggered sound effects and outro audio
|
- Audio: Main audio mixed with triggered sound effects and outro audio
|
||||||
"""
|
"""
|
||||||
outro_inputs = outro_inputs or {}
|
outro_inputs = outro_inputs or {}
|
||||||
@@ -830,70 +886,7 @@ def build_filter_complex(
|
|||||||
|
|
||||||
current_label = "bg"
|
current_label = "bg"
|
||||||
|
|
||||||
# Overlay always_visible videos (like talking head)
|
# Layer 2: "below" triggered video overlays (vfb/vsb) — behind slides and talking head
|
||||||
# If there are narration pauses, we need to segment the video
|
|
||||||
for i, (video_id, video_source, cutout) in enumerate(plan.narration_videos):
|
|
||||||
input_idx = always_visible_inputs[i]
|
|
||||||
cut_x, cut_y, cut_width, cut_height = _calculate_cutout_position(
|
|
||||||
cutout, width, height
|
|
||||||
)
|
|
||||||
|
|
||||||
# Apply zoom factor to cutout dimensions
|
|
||||||
zoom = video_source.zoom
|
|
||||||
zoomed_width = int(cut_width * zoom)
|
|
||||||
zoomed_height = int(cut_height * zoom)
|
|
||||||
|
|
||||||
if not plan.narration_pauses:
|
|
||||||
# Simple case: no pauses, continuous overlay
|
|
||||||
# fps+setpts normalise the source to a constant frame rate and reset
|
|
||||||
# the timeline to 0 so the video stays locked to the audio track.
|
|
||||||
video_label = f"av{i}"
|
|
||||||
filters.append(
|
|
||||||
f"[{input_idx}:v]fps={plan.config.fps},setpts=PTS-STARTPTS,"
|
|
||||||
f"format=yuva444p10le,"
|
|
||||||
f"scale={zoomed_width}:{zoomed_height}:force_original_aspect_ratio=increase,"
|
|
||||||
f"crop={cut_width}:{cut_height}:(iw-{cut_width})/2:(ih-{cut_height})/2,"
|
|
||||||
f"format=rgba[{video_label}]"
|
|
||||||
)
|
|
||||||
|
|
||||||
next_label = f"avbase{i}"
|
|
||||||
filters.append(
|
|
||||||
f"[{current_label}][{video_label}]overlay=x={cut_x}:y={cut_y}[{next_label}]"
|
|
||||||
)
|
|
||||||
current_label = next_label
|
|
||||||
else:
|
|
||||||
# Complex case: narration pauses - segment the video
|
|
||||||
# Each segment is trimmed from source and positioned in output timeline
|
|
||||||
segments = _build_narration_segments(
|
|
||||||
plan.narration_pauses, plan.total_duration
|
|
||||||
)
|
|
||||||
|
|
||||||
for seg_idx, (src_start, src_end, out_start, out_end) in enumerate(
|
|
||||||
segments
|
|
||||||
):
|
|
||||||
seg_label = f"av{i}_seg{seg_idx}"
|
|
||||||
# Trim to source range, then shift PTS to output position
|
|
||||||
# setpts=PTS-STARTPTS puts segment at 0, then +offset/TB shifts to output time
|
|
||||||
pts_offset = out_start
|
|
||||||
filters.append(
|
|
||||||
f"[{input_idx}:v]trim={src_start:.3f}:{src_end:.3f},"
|
|
||||||
f"setpts=PTS-STARTPTS+{pts_offset:.3f}/TB,"
|
|
||||||
f"format=yuva444p10le,"
|
|
||||||
f"scale={zoomed_width}:{zoomed_height}:force_original_aspect_ratio=increase,"
|
|
||||||
f"crop={cut_width}:{cut_height}:(iw-{cut_width})/2:(ih-{cut_height})/2,"
|
|
||||||
f"format=rgba[{seg_label}]"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Overlay with enable for this segment's output time range
|
|
||||||
next_label = f"avbase{i}_seg{seg_idx}"
|
|
||||||
enable_expr = f"between(t\\,{out_start:.3f}\\,{out_end:.3f})"
|
|
||||||
filters.append(
|
|
||||||
f"[{current_label}][{seg_label}]overlay=x={cut_x}:y={cut_y}:"
|
|
||||||
f"enable={enable_expr}[{next_label}]"
|
|
||||||
)
|
|
||||||
current_label = next_label
|
|
||||||
|
|
||||||
# Add "below-slides" triggered video overlays (vfb/vsb or layer="below")
|
|
||||||
for i, event in enumerate(plan.video_events):
|
for i, event in enumerate(plan.video_events):
|
||||||
if event.layer != "below":
|
if event.layer != "below":
|
||||||
continue
|
continue
|
||||||
@@ -925,23 +918,112 @@ def build_filter_complex(
|
|||||||
enable_expr = f"between(t\\,{event.start_time:.3f}\\,{effective_end:.3f})"
|
enable_expr = f"between(t\\,{event.start_time:.3f}\\,{effective_end:.3f})"
|
||||||
filters.append(
|
filters.append(
|
||||||
f"[{current_label}][{video_label}]overlay="
|
f"[{current_label}][{video_label}]overlay="
|
||||||
f"x={cut_x}:y={cut_y}:enable={enable_expr}"
|
f"x={cut_x}:y={cut_y}:enable={enable_expr}:eof_action=pass"
|
||||||
f"[{next_label}]"
|
f"[{next_label}]"
|
||||||
)
|
)
|
||||||
current_label = next_label
|
current_label = next_label
|
||||||
|
|
||||||
# Add slide overlays with time-based enable
|
# Layer 3: Talking head — above below-videos, but under slides so fullscreen slides cover it
|
||||||
|
for i, (video_id, video_source, cutout) in enumerate(plan.narration_videos):
|
||||||
|
input_idx = always_visible_inputs[i]
|
||||||
|
cut_x, cut_y, cut_width, cut_height = _calculate_cutout_position(
|
||||||
|
cutout, width, height
|
||||||
|
)
|
||||||
|
|
||||||
|
zoom = video_source.zoom
|
||||||
|
zoomed_width = int(cut_width * zoom)
|
||||||
|
zoomed_height = int(cut_height * zoom)
|
||||||
|
|
||||||
|
if not plan.narration_pauses:
|
||||||
|
video_label = f"av{i}"
|
||||||
|
filters.append(
|
||||||
|
f"[{input_idx}:v]fps={plan.config.fps},setpts=PTS-STARTPTS,"
|
||||||
|
f"format=yuva444p10le,"
|
||||||
|
f"scale={zoomed_width}:{zoomed_height}:force_original_aspect_ratio=increase,"
|
||||||
|
f"crop={cut_width}:{cut_height}:(iw-{cut_width})/2:(ih-{cut_height})/2,"
|
||||||
|
f"format=rgba[{video_label}]"
|
||||||
|
)
|
||||||
|
|
||||||
|
next_label = f"avbase{i}"
|
||||||
|
filters.append(
|
||||||
|
f"[{current_label}][{video_label}]overlay=x={cut_x}:y={cut_y}[{next_label}]"
|
||||||
|
)
|
||||||
|
current_label = next_label
|
||||||
|
else:
|
||||||
|
segments = _build_narration_segments(
|
||||||
|
plan.narration_pauses, plan.total_duration
|
||||||
|
)
|
||||||
|
|
||||||
|
for seg_idx, (src_start, src_end, out_start, out_end) in enumerate(
|
||||||
|
segments
|
||||||
|
):
|
||||||
|
seg_label = f"av{i}_seg{seg_idx}"
|
||||||
|
pts_offset = out_start
|
||||||
|
filters.append(
|
||||||
|
f"[{input_idx}:v]trim={src_start:.3f}:{src_end:.3f},"
|
||||||
|
f"setpts=PTS-STARTPTS+{pts_offset:.3f}/TB,"
|
||||||
|
f"format=yuva444p10le,"
|
||||||
|
f"scale={zoomed_width}:{zoomed_height}:force_original_aspect_ratio=increase,"
|
||||||
|
f"crop={cut_width}:{cut_height}:(iw-{cut_width})/2:(ih-{cut_height})/2,"
|
||||||
|
f"format=rgba[{seg_label}]"
|
||||||
|
)
|
||||||
|
|
||||||
|
next_label = f"avbase{i}_seg{seg_idx}"
|
||||||
|
enable_expr = f"between(t\\,{out_start:.3f}\\,{out_end:.3f})"
|
||||||
|
filters.append(
|
||||||
|
f"[{current_label}][{seg_label}]overlay=x={cut_x}:y={cut_y}:"
|
||||||
|
f"enable={enable_expr}[{next_label}]"
|
||||||
|
)
|
||||||
|
current_label = next_label
|
||||||
|
|
||||||
|
# Layer 4: "mid" triggered videos (vfm/vsm) — above talking head, below slides
|
||||||
|
# Use case: content that should show through a slide's transparent "screen hole"
|
||||||
|
for i, event in enumerate(plan.video_events):
|
||||||
|
if event.layer != "mid":
|
||||||
|
continue
|
||||||
|
video_idx = video_inputs[i]
|
||||||
|
cut_x, cut_y, cut_width, cut_height = _calculate_cutout_position(
|
||||||
|
event.cutout, width, height
|
||||||
|
)
|
||||||
|
|
||||||
|
duration = event.end_time - event.start_time
|
||||||
|
if event.video_source.take is not None:
|
||||||
|
duration = min(duration, event.video_source.take)
|
||||||
|
effective_end = event.start_time + duration
|
||||||
|
|
||||||
|
zoom = event.video_source.zoom
|
||||||
|
zoomed_width = int(cut_width * zoom)
|
||||||
|
zoomed_height = int(cut_height * zoom)
|
||||||
|
|
||||||
|
video_label = f"tvm{i}"
|
||||||
|
start_pts = event.start_time
|
||||||
|
filters.append(
|
||||||
|
f"[{video_idx}:v]format=yuva444p10le,"
|
||||||
|
f"setpts=PTS-STARTPTS+{start_pts:.3f}/TB,"
|
||||||
|
f"scale={zoomed_width}:{zoomed_height}:force_original_aspect_ratio=increase,"
|
||||||
|
f"crop={cut_width}:{cut_height}:(iw-{cut_width})/2:(ih-{cut_height})/2,"
|
||||||
|
f"format=rgba[{video_label}]"
|
||||||
|
)
|
||||||
|
|
||||||
|
next_label = f"tvmbase{i}"
|
||||||
|
enable_expr = f"between(t\\,{event.start_time:.3f}\\,{effective_end:.3f})"
|
||||||
|
filters.append(
|
||||||
|
f"[{current_label}][{video_label}]overlay="
|
||||||
|
f"x={cut_x}:y={cut_y}:enable={enable_expr}:eof_action=pass"
|
||||||
|
f"[{next_label}]"
|
||||||
|
)
|
||||||
|
current_label = next_label
|
||||||
|
|
||||||
|
# Layer 5: Slides — on top of talking head so fullscreen slides cover the narrator
|
||||||
for i, event in enumerate(plan.slide_events):
|
for i, event in enumerate(plan.slide_events):
|
||||||
slide_idx = slide_inputs[event.slide_id]
|
slide_idx = slide_inputs[event.slide_id]
|
||||||
|
|
||||||
# Scale slide to full frame size (transparent areas show through)
|
|
||||||
slide_label = f"s{i}"
|
slide_label = f"s{i}"
|
||||||
filters.append(
|
filters.append(
|
||||||
f"[{slide_idx}:v]scale={width}:{height}:"
|
f"[{slide_idx}:v]scale={width}:{height}:"
|
||||||
f"force_original_aspect_ratio=decrease,pad={width}:{height}:(ow-iw)/2:(oh-ih)/2:color=0x00000000[{slide_label}]"
|
f"force_original_aspect_ratio=decrease,pad={width}:{height}:(ow-iw)/2:(oh-ih)/2:color=0x00000000[{slide_label}]"
|
||||||
)
|
)
|
||||||
|
|
||||||
# Overlay at 0,0 (full frame) with time-based enable
|
|
||||||
next_label = f"sbase{i}"
|
next_label = f"sbase{i}"
|
||||||
enable_expr = f"between(t\\,{event.start_time:.3f}\\,{event.end_time:.3f})"
|
enable_expr = f"between(t\\,{event.start_time:.3f}\\,{event.end_time:.3f})"
|
||||||
filters.append(
|
filters.append(
|
||||||
@@ -949,10 +1031,10 @@ def build_filter_complex(
|
|||||||
f"x=0:y=0:enable={enable_expr}"
|
f"x=0:y=0:enable={enable_expr}"
|
||||||
f"[{next_label}]"
|
f"[{next_label}]"
|
||||||
)
|
)
|
||||||
|
|
||||||
current_label = next_label
|
current_label = next_label
|
||||||
|
|
||||||
# Add "above-slides" triggered video overlays (vft/vst or layer="above")
|
# Layer 6: "above" triggered videos (vft/vf2t/vst) — topmost, covers slides and talking head
|
||||||
|
# Use case: fullscreen video that intentionally masks the narrator
|
||||||
for i, event in enumerate(plan.video_events):
|
for i, event in enumerate(plan.video_events):
|
||||||
if event.layer != "above":
|
if event.layer != "above":
|
||||||
continue
|
continue
|
||||||
@@ -961,22 +1043,15 @@ def build_filter_complex(
|
|||||||
event.cutout, width, height
|
event.cutout, width, height
|
||||||
)
|
)
|
||||||
|
|
||||||
# Calculate effective end time (respecting 'take' parameter)
|
|
||||||
duration = event.end_time - event.start_time
|
duration = event.end_time - event.start_time
|
||||||
if event.video_source.take is not None:
|
if event.video_source.take is not None:
|
||||||
duration = min(duration, event.video_source.take)
|
duration = min(duration, event.video_source.take)
|
||||||
effective_end = event.start_time + duration
|
effective_end = event.start_time + duration
|
||||||
|
|
||||||
# Apply zoom factor to cutout dimensions
|
|
||||||
zoom = event.video_source.zoom
|
zoom = event.video_source.zoom
|
||||||
zoomed_width = int(cut_width * zoom)
|
zoomed_width = int(cut_width * zoom)
|
||||||
zoomed_height = int(cut_height * zoom)
|
zoomed_height = int(cut_height * zoom)
|
||||||
|
|
||||||
# Scale to cover the zoomed area (like CSS object-fit: cover)
|
|
||||||
# Then crop to cutout dimensions (centered)
|
|
||||||
# Use setpts to sync video start with overlay enable time
|
|
||||||
# IMPORTANT: convert to rgba FIRST (before scale/crop) so the alpha channel
|
|
||||||
# is preserved throughout. scale in yuva444p10le can silently strip alpha.
|
|
||||||
video_label = f"tv{i}"
|
video_label = f"tv{i}"
|
||||||
start_pts = event.start_time
|
start_pts = event.start_time
|
||||||
filters.append(
|
filters.append(
|
||||||
@@ -987,16 +1062,13 @@ def build_filter_complex(
|
|||||||
f"[{video_label}]"
|
f"[{video_label}]"
|
||||||
)
|
)
|
||||||
|
|
||||||
# Overlay with time-based enable; format=auto lets FFmpeg pick the right
|
|
||||||
# compositing format so the RGBA alpha channel is respected.
|
|
||||||
next_label = f"tvbase{i}"
|
next_label = f"tvbase{i}"
|
||||||
enable_expr = f"between(t\\,{event.start_time:.3f}\\,{effective_end:.3f})"
|
enable_expr = f"between(t\\,{event.start_time:.3f}\\,{effective_end:.3f})"
|
||||||
filters.append(
|
filters.append(
|
||||||
f"[{current_label}][{video_label}]overlay="
|
f"[{current_label}][{video_label}]overlay="
|
||||||
f"x={cut_x}:y={cut_y}:enable={enable_expr}:format=auto"
|
f"x={cut_x}:y={cut_y}:enable={enable_expr}:format=auto:eof_action=pass"
|
||||||
f"[{next_label}]"
|
f"[{next_label}]"
|
||||||
)
|
)
|
||||||
|
|
||||||
current_label = next_label
|
current_label = next_label
|
||||||
|
|
||||||
# Scene composition complete - now apply camera transform
|
# Scene composition complete - now apply camera transform
|
||||||
@@ -1262,10 +1334,13 @@ def build_filter_complex(
|
|||||||
delay_ms = int(event.start_time * 1000)
|
delay_ms = int(event.start_time * 1000)
|
||||||
label = f"tvaud{i}"
|
label = f"tvaud{i}"
|
||||||
|
|
||||||
|
vol = event.video_source.volume
|
||||||
|
vol_filter = f",volume={vol:.2f}" if vol != 1.0 else ""
|
||||||
filters.append(
|
filters.append(
|
||||||
f"[{video_idx}:a]atrim=0:{duration:.3f},"
|
f"[{video_idx}:a]atrim=0:{duration:.3f},"
|
||||||
f"asetpts=PTS-STARTPTS,"
|
f"asetpts=PTS-STARTPTS,"
|
||||||
f"adelay={delay_ms}|{delay_ms}[{label}]"
|
f"adelay={delay_ms}|{delay_ms}"
|
||||||
|
f"{vol_filter}[{label}]"
|
||||||
)
|
)
|
||||||
audio_labels_to_mix.append(f"[{label}]")
|
audio_labels_to_mix.append(f"[{label}]")
|
||||||
|
|
||||||
@@ -1281,10 +1356,13 @@ def build_filter_complex(
|
|||||||
delay_ms = int(event.start_time * 1000)
|
delay_ms = int(event.start_time * 1000)
|
||||||
label = f"outroaud{i}"
|
label = f"outroaud{i}"
|
||||||
|
|
||||||
|
vol = event.video_source.volume
|
||||||
|
vol_filter = f",volume={vol:.2f}" if vol != 1.0 else ""
|
||||||
filters.append(
|
filters.append(
|
||||||
f"[{video_idx}:a]atrim=0:{duration:.3f},"
|
f"[{video_idx}:a]atrim=0:{duration:.3f},"
|
||||||
f"asetpts=PTS-STARTPTS,"
|
f"asetpts=PTS-STARTPTS,"
|
||||||
f"adelay={delay_ms}|{delay_ms}[{label}]"
|
f"adelay={delay_ms}|{delay_ms}"
|
||||||
|
f"{vol_filter}[{label}]"
|
||||||
)
|
)
|
||||||
audio_labels_to_mix.append(f"[{label}]")
|
audio_labels_to_mix.append(f"[{label}]")
|
||||||
|
|
||||||
|
|||||||
+379
-179
@@ -22,12 +22,38 @@ from .models import (
|
|||||||
VideoEvent,
|
VideoEvent,
|
||||||
VideoSource,
|
VideoSource,
|
||||||
)
|
)
|
||||||
from .parser import get_video_duration
|
from .parser import get_video_duration, resolve_missing_videos
|
||||||
from .transcriber import TranscribedWord
|
from .transcriber import TranscribedWord
|
||||||
|
|
||||||
# Audio trigger offset: play sound this many seconds before the marker
|
# Audio trigger offset: play sound this many seconds before the marker
|
||||||
AUDIO_OFFSET_SECONDS = 1.0
|
AUDIO_OFFSET_SECONDS = 1.0
|
||||||
|
|
||||||
|
# Shorthand marker prefix → (cutout_name, layer).
|
||||||
|
# These are the ETL source-of-truth: when a manuscript contains [vft:X],
|
||||||
|
# that projects cutout="fullscreen" and layer="above" into videos.json for X.
|
||||||
|
# The pause-variant entries (vftp: etc.) carry a third element "pause_narration"
|
||||||
|
# which is a per-event property, not stored in videos.json.
|
||||||
|
_SHORTHAND_PREFIXES: dict[str, tuple] = {
|
||||||
|
"vft:": ("fullscreen", "above"),
|
||||||
|
"vfb:": ("fullscreen", "below"),
|
||||||
|
"vfm:": ("fullscreen", "mid"),
|
||||||
|
"vf2t:": ("fullscreen2", "above"),
|
||||||
|
"vf2b:": ("fullscreen2", "below"),
|
||||||
|
"vf2m:": ("fullscreen2", "mid"),
|
||||||
|
"vst:": ("square", "above"),
|
||||||
|
"vsb:": ("square", "below"),
|
||||||
|
"vsm:": ("square", "mid"),
|
||||||
|
"vftp:": ("fullscreen", "above"),
|
||||||
|
"vfbp:": ("fullscreen", "below"),
|
||||||
|
"vfmp:": ("fullscreen", "mid"),
|
||||||
|
"vf2tp:": ("fullscreen2", "above"),
|
||||||
|
"vf2bp:": ("fullscreen2", "below"),
|
||||||
|
"vf2mp:": ("fullscreen2", "mid"),
|
||||||
|
"vstp:": ("square", "above"),
|
||||||
|
"vsbp:": ("square", "below"),
|
||||||
|
"vsmp:": ("square", "mid"),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class MarkerTiming:
|
class MarkerTiming:
|
||||||
@@ -134,7 +160,16 @@ def _is_known_marker(
|
|||||||
return True
|
return True
|
||||||
|
|
||||||
# Video/narration triggers (all supported prefixes)
|
# Video/narration triggers (all supported prefixes)
|
||||||
_VIDEO_PREFIXES = ("video:", "narration:", "vft:", "vfb:", "vst:", "vsb:", "vftp:", "vfbp:", "vstp:", "vsbp:")
|
_VIDEO_PREFIXES = (
|
||||||
|
"video:",
|
||||||
|
"narration:",
|
||||||
|
"vft:", "vfb:", "vfm:",
|
||||||
|
"vf2t:", "vf2b:", "vf2m:",
|
||||||
|
"vst:", "vsb:", "vsm:",
|
||||||
|
"vftp:", "vfbp:", "vfmp:",
|
||||||
|
"vf2tp:", "vf2bp:", "vf2mp:",
|
||||||
|
"vstp:", "vsbp:", "vsmp:",
|
||||||
|
)
|
||||||
if any(marker_id.startswith(p) for p in _VIDEO_PREFIXES):
|
if any(marker_id.startswith(p) for p in _VIDEO_PREFIXES):
|
||||||
return True
|
return True
|
||||||
|
|
||||||
@@ -142,11 +177,15 @@ def _is_known_marker(
|
|||||||
if marker_id in CAMERA_PRESETS:
|
if marker_id in CAMERA_PRESETS:
|
||||||
return True
|
return True
|
||||||
|
|
||||||
# Audio markers (A followed by id)
|
# Audio markers (A followed by id, e.g., Awoosh) or audio: prefix (e.g., audio:woosh)
|
||||||
if marker_id.startswith("A") and len(marker_id) > 1:
|
if marker_id.startswith("A") and len(marker_id) > 1:
|
||||||
audio_id = marker_id[1:]
|
audio_id = marker_id[1:]
|
||||||
if audio_id in audio or audio_id.isdigit():
|
if audio_id in audio or audio_id.isdigit():
|
||||||
return True
|
return True
|
||||||
|
if marker_id.startswith("audio:") and audio is not None:
|
||||||
|
audio_id = marker_id[6:]
|
||||||
|
if audio_id in audio:
|
||||||
|
return True
|
||||||
|
|
||||||
return False
|
return False
|
||||||
|
|
||||||
@@ -167,78 +206,81 @@ def _extract_marker_contexts(
|
|||||||
slides: dict = None,
|
slides: dict = None,
|
||||||
videos: dict = None,
|
videos: dict = None,
|
||||||
audio: dict = None,
|
audio: dict = None,
|
||||||
) -> list[tuple[str, str]]:
|
) -> list[tuple[str, str, bool, str]]:
|
||||||
"""
|
"""
|
||||||
Extract known markers and the text immediately following them from manuscript.
|
Extract known markers and the text immediately following them from manuscript.
|
||||||
|
|
||||||
Unknown markers are filtered out and stripped from following text.
|
Unknown markers are filtered out and stripped from following text.
|
||||||
Note: [cite:...] markers are already stripped at parse time.
|
Note: [cite:...] markers are already stripped at parse time.
|
||||||
|
|
||||||
Returns list of (marker_id, following_text) tuples for known markers only.
|
Returns list of (marker_id, anchor_text, is_borrowed, anchor_type) tuples.
|
||||||
|
anchor_type is "before" (default — place before the matched phrase) or
|
||||||
|
"after" (place at the end of the matched phrase — used for markers that
|
||||||
|
trail a narration block and have no following text of their own).
|
||||||
"""
|
"""
|
||||||
slides = slides or {}
|
slides = slides or {}
|
||||||
videos = videos or {}
|
videos = videos or {}
|
||||||
audio = audio or {}
|
audio = audio or {}
|
||||||
|
|
||||||
# Split by markers, keeping the markers — broad pattern handles any content
|
|
||||||
# including paths with / and - (e.g. [vfb:pexels/7670835-uhd_3840_2160_30fps])
|
|
||||||
parts = re.split(r"\[([^\]]+)\]", manuscript_text)
|
parts = re.split(r"\[([^\]]+)\]", manuscript_text)
|
||||||
|
|
||||||
# parts: [text_before, marker1, text_after1, marker2, text_after2, ...]
|
|
||||||
raw_contexts = []
|
raw_contexts = []
|
||||||
for i in range(1, len(parts), 2):
|
for i in range(1, len(parts), 2):
|
||||||
marker_id = parts[i]
|
marker_id = parts[i]
|
||||||
|
|
||||||
# Skip unknown markers entirely
|
|
||||||
if not _is_known_marker(marker_id, slides, videos, audio):
|
if not _is_known_marker(marker_id, slides, videos, audio):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Collect all following text, looking past unknown markers until the
|
|
||||||
# next known marker. This handles [S1][segment:1] text... where the
|
|
||||||
# text lives two parts ahead rather than immediately after S1.
|
|
||||||
text_pieces = []
|
text_pieces = []
|
||||||
j = i + 1
|
j = i + 1
|
||||||
while j < len(parts):
|
while j < len(parts):
|
||||||
chunk = parts[j].strip()
|
chunk = parts[j].strip()
|
||||||
if chunk:
|
if chunk:
|
||||||
text_pieces.append(chunk)
|
text_pieces.append(chunk)
|
||||||
j += 1 # advance to the marker after this text chunk
|
j += 1
|
||||||
if j >= len(parts):
|
if j >= len(parts):
|
||||||
break
|
break
|
||||||
if _is_known_marker(parts[j], slides, videos, audio):
|
if _is_known_marker(parts[j], slides, videos, audio):
|
||||||
break # stop at the next known marker
|
break
|
||||||
j += 1 # skip the unknown marker; its following text is next
|
j += 1
|
||||||
|
|
||||||
following_text = " ".join(text_pieces)
|
following_text = " ".join(text_pieces)
|
||||||
following_text = " ".join(following_text.split()) # collapse whitespace
|
following_text = " ".join(following_text.split())
|
||||||
following_text = _strip_unknown_markers(following_text, slides, videos, audio)
|
following_text = _strip_unknown_markers(following_text, slides, videos, audio)
|
||||||
following_text = " ".join(following_text.split())
|
following_text = " ".join(following_text.split())
|
||||||
raw_contexts.append((marker_id, following_text))
|
raw_contexts.append((marker_id, following_text))
|
||||||
|
|
||||||
# For markers with no following text (consecutive markers), look ahead
|
|
||||||
# Return (marker_id, following_text, is_borrowed) - is_borrowed=True means text came from look-ahead
|
|
||||||
contexts = []
|
contexts = []
|
||||||
for i, (marker_id, following_text) in enumerate(raw_contexts):
|
for i, (marker_id, following_text) in enumerate(raw_contexts):
|
||||||
if following_text:
|
if following_text:
|
||||||
# Take first ~10 words for matching
|
|
||||||
words = following_text.split()[:10]
|
words = following_text.split()[:10]
|
||||||
contexts.append((marker_id, " ".join(words), False))
|
contexts.append((marker_id, " ".join(words), False, "before"))
|
||||||
else:
|
else:
|
||||||
# Look ahead for next marker with text, but never borrow from another
|
|
||||||
# slide marker — slides must align independently to avoid two consecutive
|
|
||||||
# slides matching the same transcription position simultaneously.
|
|
||||||
borrowed = False
|
borrowed = False
|
||||||
for j in range(i + 1, len(raw_contexts)):
|
for j in range(i + 1, len(raw_contexts)):
|
||||||
next_marker_id, next_text = raw_contexts[j]
|
next_marker_id, next_text = raw_contexts[j]
|
||||||
if next_text:
|
if next_text:
|
||||||
if next_marker_id in (slides or {}):
|
if next_marker_id in (slides or {}):
|
||||||
break # Slide owns this text; give up borrowing
|
break
|
||||||
words = next_text.split()[:10]
|
words = next_text.split()[:10]
|
||||||
contexts.append((marker_id, " ".join(words), True)) # Borrowed
|
contexts.append((marker_id, " ".join(words), True, "before"))
|
||||||
borrowed = True
|
borrowed = True
|
||||||
break
|
break
|
||||||
if not borrowed:
|
if not borrowed:
|
||||||
contexts.append((marker_id, "", False))
|
# No following text and blocked by a slide boundary — look
|
||||||
|
# backward for the tail of the preceding narration block and
|
||||||
|
# anchor to the END of those words instead of extrapolating.
|
||||||
|
preceding_text = ""
|
||||||
|
for k in range(i - 1, -1, -1):
|
||||||
|
if raw_contexts[k][1]:
|
||||||
|
preceding_text = raw_contexts[k][1]
|
||||||
|
break
|
||||||
|
if preceding_text:
|
||||||
|
words = preceding_text.split()
|
||||||
|
tail = " ".join(words[-6:])
|
||||||
|
contexts.append((marker_id, tail, False, "after"))
|
||||||
|
else:
|
||||||
|
contexts.append((marker_id, "", False, "before"))
|
||||||
|
|
||||||
return contexts
|
return contexts
|
||||||
|
|
||||||
@@ -248,13 +290,18 @@ def _fuzzy_match_ratio(
|
|||||||
transcription: list[TranscribedWord],
|
transcription: list[TranscribedWord],
|
||||||
start_idx: int,
|
start_idx: int,
|
||||||
window_size: int = 10,
|
window_size: int = 10,
|
||||||
|
pre_filler: int = 30,
|
||||||
|
inter_filler: int = 3,
|
||||||
) -> tuple[float, int, int]:
|
) -> tuple[float, int, int]:
|
||||||
"""
|
"""
|
||||||
Calculate how many words from phrase match the transcription at start_idx.
|
Calculate how many words from phrase match the transcription at start_idx.
|
||||||
|
|
||||||
Words are matched sequentially: each phrase word must appear at or after
|
Words are matched sequentially. Two separate filler tolerances:
|
||||||
the position of the previous match. This prevents false matches where
|
- pre_filler: max words before the FIRST phrase word (absorbs ad-libs)
|
||||||
phrase words appear out of order or far into the window.
|
- inter_filler: max words between consecutive phrase words (keeps the
|
||||||
|
match tight so common words don't stretch the window far
|
||||||
|
into later text, which would push last_idx past subsequent
|
||||||
|
markers' positions)
|
||||||
|
|
||||||
Returns (ratio, first_match_offset, last_match_end_offset) where offsets
|
Returns (ratio, first_match_offset, last_match_end_offset) where offsets
|
||||||
are relative to start_idx. last_match_end_offset points past the last
|
are relative to start_idx. last_match_end_offset points past the last
|
||||||
@@ -263,40 +310,44 @@ def _fuzzy_match_ratio(
|
|||||||
if not phrase_words:
|
if not phrase_words:
|
||||||
return 0.0, 0, 0
|
return 0.0, 0, 0
|
||||||
|
|
||||||
words_to_check = min(len(phrase_words), window_size)
|
|
||||||
transcript_end = min(
|
|
||||||
start_idx + words_to_check + 5, len(transcription)
|
|
||||||
) # +5 for flexibility (speaker may add filler words)
|
|
||||||
|
|
||||||
if start_idx >= len(transcription):
|
if start_idx >= len(transcription):
|
||||||
return 0.0, 0, 0
|
return 0.0, 0, 0
|
||||||
|
|
||||||
|
words_to_check = min(len(phrase_words), window_size)
|
||||||
|
# Window only needs to cover pre_filler + phrase words + inter_filler slack
|
||||||
|
transcript_end = min(
|
||||||
|
start_idx + pre_filler + words_to_check + inter_filler, len(transcription)
|
||||||
|
)
|
||||||
|
|
||||||
transcript_words = [
|
transcript_words = [
|
||||||
_normalize_token(transcription[j].word)
|
_normalize_token(transcription[j].word)
|
||||||
for j in range(start_idx, transcript_end)
|
for j in range(start_idx, transcript_end)
|
||||||
]
|
]
|
||||||
|
|
||||||
# Match phrase words sequentially against transcript window
|
|
||||||
matches = 0
|
matches = 0
|
||||||
words_checked = 0
|
words_checked = 0
|
||||||
t_pos = 0 # Current search position in transcript window
|
t_pos = 0
|
||||||
first_match_offset = 0
|
first_match_offset = 0
|
||||||
last_match_end_offset = 0
|
last_match_end_offset = 0
|
||||||
|
|
||||||
for phrase_word in phrase_words[:words_to_check]:
|
for phrase_word in phrase_words[:words_to_check]:
|
||||||
normalized = _normalize_token(phrase_word)
|
normalized = _normalize_token(phrase_word)
|
||||||
if len(normalized) < 2:
|
if len(normalized) < 2:
|
||||||
continue # skip very short words (a, I, etc.) - don't count them
|
continue
|
||||||
words_checked += 1
|
words_checked += 1
|
||||||
|
|
||||||
# Search forward from current position (preserves word order)
|
# First phrase word may be preceded by a long ad-lib; subsequent words
|
||||||
for j in range(t_pos, len(transcript_words)):
|
# should appear within a few positions of each other.
|
||||||
|
if matches == 0:
|
||||||
|
search_end = min(t_pos + pre_filler + 1, len(transcript_words))
|
||||||
|
else:
|
||||||
|
search_end = min(t_pos + inter_filler + 1, len(transcript_words))
|
||||||
|
|
||||||
|
for j in range(t_pos, search_end):
|
||||||
t_word = transcript_words[j]
|
t_word = transcript_words[j]
|
||||||
matched = False
|
matched = False
|
||||||
# Exact match
|
|
||||||
if normalized == t_word:
|
if normalized == t_word:
|
||||||
matched = True
|
matched = True
|
||||||
# Allow substring match for words 4+ chars (handles plurals, tenses)
|
|
||||||
elif len(normalized) >= 4 and len(t_word) >= 4:
|
elif len(normalized) >= 4 and len(t_word) >= 4:
|
||||||
if normalized in t_word or t_word in normalized:
|
if normalized in t_word or t_word in normalized:
|
||||||
matched = True
|
matched = True
|
||||||
@@ -306,7 +357,7 @@ def _fuzzy_match_ratio(
|
|||||||
first_match_offset = j
|
first_match_offset = j
|
||||||
matches += 1
|
matches += 1
|
||||||
last_match_end_offset = j + 1
|
last_match_end_offset = j + 1
|
||||||
t_pos = j + 1 # Next word must appear after this one
|
t_pos = j + 1
|
||||||
break
|
break
|
||||||
|
|
||||||
ratio = matches / words_checked if words_checked > 0 else 0.0
|
ratio = matches / words_checked if words_checked > 0 else 0.0
|
||||||
@@ -326,11 +377,6 @@ def _find_phrase_timestamp(
|
|||||||
(-1, -1.0, 0.0, -1) if not found. word_index points to the first
|
(-1, -1.0, 0.0, -1) if not found. word_index points to the first
|
||||||
matched word. match_end_idx points past the last matched word.
|
matched word. match_end_idx points past the last matched word.
|
||||||
"""
|
"""
|
||||||
# Normalize each word individually — same method as transcript tokens.
|
|
||||||
# This keeps contractions as single tokens ("haven't" stays "haven't") so
|
|
||||||
# phrase and transcript word counts stay in sync. Using _normalize_text on
|
|
||||||
# the whole phrase would expand "haven't" → "have not" (2 words), creating
|
|
||||||
# a phantom "not" that fails to match the transcript and corrupts the window.
|
|
||||||
phrase_words = [tok for tok in (_normalize_token(w) for w in phrase.split()) if tok]
|
phrase_words = [tok for tok in (_normalize_token(w) for w in phrase.split()) if tok]
|
||||||
|
|
||||||
if not phrase_words:
|
if not phrase_words:
|
||||||
@@ -341,7 +387,6 @@ def _find_phrase_timestamp(
|
|||||||
best_first_offset = 0
|
best_first_offset = 0
|
||||||
best_end_offset = 0
|
best_end_offset = 0
|
||||||
|
|
||||||
# Slide through transcription looking for best match
|
|
||||||
for i in range(start_from, len(transcription)):
|
for i in range(start_from, len(transcription)):
|
||||||
ratio, first_offset, end_offset = _fuzzy_match_ratio(
|
ratio, first_offset, end_offset = _fuzzy_match_ratio(
|
||||||
phrase_words, transcription, i
|
phrase_words, transcription, i
|
||||||
@@ -352,13 +397,14 @@ def _find_phrase_timestamp(
|
|||||||
best_first_offset = first_offset
|
best_first_offset = first_offset
|
||||||
best_end_offset = end_offset
|
best_end_offset = end_offset
|
||||||
|
|
||||||
# If we found a very good match, stop early
|
# Sequential alignment: stop at the first position that clears the
|
||||||
if ratio >= 0.95:
|
# threshold. Continuing to scan the full transcript risks jumping
|
||||||
|
# to a higher-ratio match much later and skipping over subsequent
|
||||||
|
# markers' positions entirely.
|
||||||
|
if best_ratio >= fuzzy_threshold:
|
||||||
break
|
break
|
||||||
|
|
||||||
if best_ratio >= fuzzy_threshold and best_idx >= 0:
|
if best_ratio >= fuzzy_threshold and best_idx >= 0:
|
||||||
# Use the actual first matched word position for the timestamp,
|
|
||||||
# not the window start position
|
|
||||||
actual_idx = best_idx + best_first_offset
|
actual_idx = best_idx + best_first_offset
|
||||||
match_end_idx = best_idx + best_end_offset
|
match_end_idx = best_idx + best_end_offset
|
||||||
return actual_idx, transcription[actual_idx].start, best_ratio, match_end_idx
|
return actual_idx, transcription[actual_idx].start, best_ratio, match_end_idx
|
||||||
@@ -375,21 +421,25 @@ def align_markers_to_transcription(
|
|||||||
fuzzy_threshold: float = 0.6,
|
fuzzy_threshold: float = 0.6,
|
||||||
) -> list[MarkerTiming]:
|
) -> list[MarkerTiming]:
|
||||||
"""
|
"""
|
||||||
Align manuscript markers to transcription timestamps using fuzzy matching.
|
Align manuscript markers to transcription timestamps using fuzzy phrase matching.
|
||||||
|
|
||||||
This is the core alignment function that matches markers in manuscript.txt
|
For each known marker, extracts the text immediately following it in the
|
||||||
to their corresponding timecodes in the whisper transcription.
|
manuscript and searches for that phrase in the Whisper transcript. Markers are
|
||||||
|
matched in manuscript order, each starting its search after the previous match.
|
||||||
|
|
||||||
Unknown markers are filtered out - they aren't pronounced and shouldn't
|
The filler-word window is intentionally large (+30 words) so that ad-libbed
|
||||||
be in the render plan. Note: [cite:...] markers are stripped at parse time.
|
words spoken before or between the manuscript cue words do not prevent a match.
|
||||||
|
|
||||||
|
Unknown markers are filtered out — they aren't pronounced and shouldn't be in
|
||||||
|
the render plan. Note: [cite:...] markers are stripped at parse time.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
manuscript_text: Full manuscript with [S1], [video:xxx], etc.
|
manuscript_text: Full manuscript with [S1], [video:xxx], etc.
|
||||||
transcription: Word-level timestamps from whisper
|
transcription: Word-level timestamps from Whisper
|
||||||
slides: Slide definitions (to identify valid slide markers)
|
slides: Slide definitions (to identify valid slide markers)
|
||||||
videos: Video definitions (to identify valid video markers)
|
videos: Video definitions (to identify valid video markers)
|
||||||
audio: Audio definitions (to identify valid audio markers)
|
audio: Audio definitions (to identify valid audio markers)
|
||||||
fuzzy_threshold: Minimum match ratio (default 0.6 = 60% of words)
|
fuzzy_threshold: Minimum match ratio (default 0.6 = 60% of words must match)
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
List of MarkerTiming with timestamps and confidence (known markers only)
|
List of MarkerTiming with timestamps and confidence (known markers only)
|
||||||
@@ -398,13 +448,10 @@ def align_markers_to_transcription(
|
|||||||
timings: list[MarkerTiming] = []
|
timings: list[MarkerTiming] = []
|
||||||
|
|
||||||
last_idx = 0
|
last_idx = 0
|
||||||
last_end_time = 0.0 # Track end time of last matched phrase
|
last_end_time = 0.0
|
||||||
|
|
||||||
for marker_id, following_text, is_borrowed in contexts:
|
for marker_id, anchor_text, is_borrowed, anchor_type in contexts:
|
||||||
# If no text (empty context), place 1 second after the previous marker/phrase
|
if not anchor_text.strip():
|
||||||
# This handles markers like [video:xxx] that appear after text
|
|
||||||
if not following_text.strip():
|
|
||||||
# Use 1 second after the previous end time
|
|
||||||
marker_time = last_end_time + 1.0
|
marker_time = last_end_time + 1.0
|
||||||
timings.append(
|
timings.append(
|
||||||
MarkerTiming(
|
MarkerTiming(
|
||||||
@@ -414,48 +461,169 @@ def align_markers_to_transcription(
|
|||||||
confidence=1.0,
|
confidence=1.0,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
# Update last_end_time so subsequent markers without text continue to offset
|
|
||||||
last_end_time = marker_time
|
last_end_time = marker_time
|
||||||
continue
|
continue
|
||||||
|
|
||||||
idx, timestamp, confidence, match_end_idx = _find_phrase_timestamp(
|
idx, timestamp, confidence, match_end_idx = _find_phrase_timestamp(
|
||||||
following_text,
|
anchor_text,
|
||||||
transcription,
|
transcription,
|
||||||
start_from=last_idx,
|
start_from=last_idx,
|
||||||
fuzzy_threshold=fuzzy_threshold,
|
fuzzy_threshold=fuzzy_threshold,
|
||||||
)
|
)
|
||||||
|
|
||||||
if idx >= 0:
|
if idx >= 0:
|
||||||
# Apply offset: marker should appear slightly before the words
|
if anchor_type == "after":
|
||||||
adjusted_time = max(0.0, timestamp - 0.5)
|
# Marker trails a narration block — place it at the END of the
|
||||||
timings.append(
|
# matched phrase (when those words finish being spoken).
|
||||||
MarkerTiming(
|
end_idx = min(match_end_idx - 1, len(transcription) - 1)
|
||||||
marker_id=marker_id,
|
marker_time = transcription[end_idx].end if transcription else 0.0
|
||||||
timestamp=adjusted_time,
|
timings.append(
|
||||||
context=following_text[:50],
|
MarkerTiming(
|
||||||
confidence=confidence,
|
marker_id=marker_id,
|
||||||
|
timestamp=marker_time,
|
||||||
|
context=f"(end of: {anchor_text[:40]})",
|
||||||
|
confidence=confidence,
|
||||||
|
)
|
||||||
)
|
)
|
||||||
)
|
|
||||||
# Only advance last_idx if this marker owns its text (not borrowed)
|
|
||||||
# If borrowed, the next marker needs to match the same text
|
|
||||||
if not is_borrowed:
|
|
||||||
last_idx = match_end_idx
|
last_idx = match_end_idx
|
||||||
# Calculate end time of this phrase for markers with no text
|
last_end_time = marker_time
|
||||||
if last_idx > 0 and last_idx <= len(transcription):
|
else:
|
||||||
last_end_time = transcription[last_idx - 1].end
|
adjusted_time = max(0.0, timestamp - 0.5)
|
||||||
else:
|
timings.append(
|
||||||
last_end_time = transcription[-1].end if transcription else 0.0
|
MarkerTiming(
|
||||||
|
marker_id=marker_id,
|
||||||
|
timestamp=adjusted_time,
|
||||||
|
context=anchor_text[:50],
|
||||||
|
confidence=confidence,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
if not is_borrowed:
|
||||||
|
last_idx = match_end_idx
|
||||||
|
if last_idx > 0 and last_idx <= len(transcription):
|
||||||
|
last_end_time = transcription[last_idx - 1].end
|
||||||
|
else:
|
||||||
|
last_end_time = transcription[-1].end if transcription else 0.0
|
||||||
else:
|
else:
|
||||||
timings.append(
|
timings.append(
|
||||||
MarkerTiming(
|
MarkerTiming(
|
||||||
marker_id=marker_id,
|
marker_id=marker_id,
|
||||||
timestamp=-1.0,
|
timestamp=-1.0,
|
||||||
context=following_text[:50],
|
context=anchor_text[:50],
|
||||||
confidence=0.0,
|
confidence=0.0,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
return timings
|
# Repair pass: retry INTERPOLATED markers that the forward scan missed.
|
||||||
|
# Root cause of cascade failures: one bad match advances last_idx past
|
||||||
|
# the true positions of several subsequent markers. Fix: search in a
|
||||||
|
# bounded window [prev_marker_time - 1s, next_marker_time + 2s] so we
|
||||||
|
# avoid false early matches while still recovering from cascade failures.
|
||||||
|
if any(t.timestamp < 0 for t in timings):
|
||||||
|
for i, timing in enumerate(timings):
|
||||||
|
if timing.timestamp >= 0:
|
||||||
|
continue
|
||||||
|
|
||||||
|
marker_id, anchor_text, is_borrowed, anchor_type = contexts[i]
|
||||||
|
if not anchor_text.strip():
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Lower bound: previous matched marker's timestamp → word index.
|
||||||
|
# Repairs processed in order, so already-repaired markers count too.
|
||||||
|
prev_time = 0.0
|
||||||
|
for j in range(i - 1, -1, -1):
|
||||||
|
if timings[j].timestamp >= 0:
|
||||||
|
prev_time = max(0.0, timings[j].timestamp - 1.0)
|
||||||
|
break
|
||||||
|
win_start = next(
|
||||||
|
(j for j, w in enumerate(transcription) if w.start >= prev_time),
|
||||||
|
0,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Upper bound: next matched marker in the timings list (+2s padding)
|
||||||
|
next_time = float("inf")
|
||||||
|
for j in range(i + 1, len(timings)):
|
||||||
|
if timings[j].timestamp >= 0:
|
||||||
|
next_time = timings[j].timestamp + 2.0
|
||||||
|
break
|
||||||
|
|
||||||
|
win_end = (
|
||||||
|
next(
|
||||||
|
(j for j, w in enumerate(transcription) if w.start > next_time),
|
||||||
|
len(transcription),
|
||||||
|
)
|
||||||
|
if next_time < float("inf")
|
||||||
|
else len(transcription)
|
||||||
|
)
|
||||||
|
|
||||||
|
if win_end <= win_start:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Search in the bounded window with a relaxed threshold
|
||||||
|
sub = transcription[win_start:win_end]
|
||||||
|
idx, timestamp, confidence, match_end_idx = _find_phrase_timestamp(
|
||||||
|
anchor_text,
|
||||||
|
sub,
|
||||||
|
start_from=0,
|
||||||
|
fuzzy_threshold=max(0.4, fuzzy_threshold - 0.1),
|
||||||
|
)
|
||||||
|
|
||||||
|
if idx >= 0:
|
||||||
|
if anchor_type == "after" and match_end_idx > 0:
|
||||||
|
end_word = sub[min(match_end_idx - 1, len(sub) - 1)]
|
||||||
|
marker_time = end_word.end
|
||||||
|
else:
|
||||||
|
marker_time = max(0.0, timestamp - 0.5)
|
||||||
|
timings[i] = MarkerTiming(
|
||||||
|
marker_id=marker_id,
|
||||||
|
timestamp=marker_time,
|
||||||
|
context=f"(repaired: {anchor_text[:40]})",
|
||||||
|
confidence=confidence,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Deduplicate slide markers. The manuscript pattern [SN]\n\n[SN] text... is
|
||||||
|
# common: the first blank occurrence is a visual-transition cue and the second
|
||||||
|
# carries the narration text used for alignment. We keep the first entry in
|
||||||
|
# order (preserving manuscript position) but upgrade its timestamp to the
|
||||||
|
# best-matched value found for that ID, then drop subsequent duplicates.
|
||||||
|
slides_set = set(slides or {})
|
||||||
|
seen: dict[str, int] = {} # marker_id → index in deduped list
|
||||||
|
deduped: list[MarkerTiming] = []
|
||||||
|
for timing in timings:
|
||||||
|
if timing.marker_id not in slides_set:
|
||||||
|
deduped.append(timing)
|
||||||
|
continue
|
||||||
|
if timing.marker_id not in seen:
|
||||||
|
seen[timing.marker_id] = len(deduped)
|
||||||
|
deduped.append(timing)
|
||||||
|
else:
|
||||||
|
prev_idx = seen[timing.marker_id]
|
||||||
|
prev = deduped[prev_idx]
|
||||||
|
# Upgrade if: previous was a placeholder/interpolated and the new one is better.
|
||||||
|
# Also upgrade if previous used the backward-looking "after" anchor —
|
||||||
|
# that heuristic gives end-of-preceding-section timing, but a direct
|
||||||
|
# "before" match on the second occurrence (start-of-new-section − 0.5s)
|
||||||
|
# is more accurate for when the slide should appear.
|
||||||
|
should_upgrade = (
|
||||||
|
prev.context == "(after previous)"
|
||||||
|
and timing.context != "(after previous)"
|
||||||
|
) or (
|
||||||
|
prev.timestamp < 0
|
||||||
|
and timing.timestamp >= 0
|
||||||
|
) or (
|
||||||
|
prev.context.startswith("(end of:")
|
||||||
|
and timing.timestamp >= 0
|
||||||
|
and timing.context != "(after previous)"
|
||||||
|
and not timing.context.startswith("(end of:")
|
||||||
|
)
|
||||||
|
if should_upgrade:
|
||||||
|
deduped[prev_idx] = MarkerTiming(
|
||||||
|
marker_id=prev.marker_id,
|
||||||
|
timestamp=timing.timestamp,
|
||||||
|
context=timing.context,
|
||||||
|
confidence=timing.confidence,
|
||||||
|
)
|
||||||
|
|
||||||
|
return deduped
|
||||||
|
|
||||||
|
|
||||||
def build_render_plan(
|
def build_render_plan(
|
||||||
@@ -566,7 +734,31 @@ def build_render_plan(
|
|||||||
time_range=(time_offset, render_end_time) if slide_range else None,
|
time_range=(time_offset, render_end_time) if slide_range else None,
|
||||||
)
|
)
|
||||||
|
|
||||||
video_events = _extract_video_events(
|
# Before extracting video events, resolve any referenced videos that are missing
|
||||||
|
# from the project's videos.json by looking them up in shared_assets/videos.json.
|
||||||
|
_VIDEO_MARKER_PREFIXES = (
|
||||||
|
"video:",
|
||||||
|
"narration:",
|
||||||
|
"vft:", "vfb:", "vfm:",
|
||||||
|
"vf2t:", "vf2b:", "vf2m:",
|
||||||
|
"vst:", "vsb:", "vsm:",
|
||||||
|
"vftp:", "vfbp:", "vfmp:",
|
||||||
|
"vf2tp:", "vf2bp:", "vf2mp:",
|
||||||
|
"vstp:", "vsbp:", "vsmp:",
|
||||||
|
)
|
||||||
|
missing_video_ids = [
|
||||||
|
timing.marker_id[len(prefix) :]
|
||||||
|
for timing in marker_timings
|
||||||
|
if timing.timestamp >= 0
|
||||||
|
for prefix in _VIDEO_MARKER_PREFIXES
|
||||||
|
if timing.marker_id.startswith(prefix)
|
||||||
|
and timing.marker_id[len(prefix) :] not in videos
|
||||||
|
]
|
||||||
|
if missing_video_ids:
|
||||||
|
found = resolve_missing_videos(missing_video_ids, project_path, config)
|
||||||
|
videos.update(found)
|
||||||
|
|
||||||
|
video_events, video_warnings = _extract_video_events(
|
||||||
marker_timings,
|
marker_timings,
|
||||||
videos,
|
videos,
|
||||||
config.cutouts,
|
config.cutouts,
|
||||||
@@ -574,6 +766,13 @@ def build_render_plan(
|
|||||||
effective_duration,
|
effective_duration,
|
||||||
time_range=(time_offset, render_end_time) if slide_range else None,
|
time_range=(time_offset, render_end_time) if slide_range else None,
|
||||||
)
|
)
|
||||||
|
if video_warnings:
|
||||||
|
import sys
|
||||||
|
|
||||||
|
print("\nWarnings:", file=sys.stderr)
|
||||||
|
for w in video_warnings:
|
||||||
|
print(f" ⚠ {w}", file=sys.stderr)
|
||||||
|
print("", file=sys.stderr)
|
||||||
|
|
||||||
# Track cached files for triggered videos
|
# Track cached files for triggered videos
|
||||||
for event in video_events:
|
for event in video_events:
|
||||||
@@ -640,7 +839,10 @@ def build_render_plan(
|
|||||||
slide_event.end_time += pause_duration
|
slide_event.end_time += pause_duration
|
||||||
|
|
||||||
for vid_event in video_events:
|
for vid_event in video_events:
|
||||||
if vid_event.start_time > narration_time:
|
if vid_event is event:
|
||||||
|
# Don't shift the pause event by its own pause
|
||||||
|
continue
|
||||||
|
if vid_event.start_time >= narration_time:
|
||||||
vid_event.start_time += pause_duration
|
vid_event.start_time += pause_duration
|
||||||
if vid_event.end_time > narration_time:
|
if vid_event.end_time > narration_time:
|
||||||
vid_event.end_time += pause_duration
|
vid_event.end_time += pause_duration
|
||||||
@@ -661,9 +863,27 @@ def build_render_plan(
|
|||||||
# Save narration end time (before outro)
|
# Save narration end time (before outro)
|
||||||
narration_end_time = total_duration
|
narration_end_time = total_duration
|
||||||
|
|
||||||
|
# Include outro only when rendering to the end of the video.
|
||||||
|
# A slide_range with an explicit end slide (e.g. S1:S10) is a middle chunk —
|
||||||
|
# skip the outro so it doesn't appear on every chunk, only the last one.
|
||||||
|
is_last_chunk = not slide_range or slide_range[1] is None
|
||||||
|
|
||||||
|
# Resolve any outro videos missing from videos.json via shared_assets.
|
||||||
|
if config.outro and is_last_chunk:
|
||||||
|
missing_outro_ids = [vid_id for vid_id in config.outro if vid_id not in videos]
|
||||||
|
if missing_outro_ids:
|
||||||
|
found = resolve_missing_videos(missing_outro_ids, project_path, config)
|
||||||
|
videos.update(found)
|
||||||
|
still_missing = [vid_id for vid_id in config.outro if vid_id not in videos]
|
||||||
|
for vid_id in still_missing:
|
||||||
|
print(
|
||||||
|
f" WARNING: outro video '{vid_id}' not found in videos.json or shared_assets — skipped",
|
||||||
|
flush=True,
|
||||||
|
)
|
||||||
|
|
||||||
# Build outro events (plays after narration ends)
|
# Build outro events (plays after narration ends)
|
||||||
outro_events = _extract_outro_events(
|
outro_events = _extract_outro_events(
|
||||||
config.outro,
|
config.outro if is_last_chunk else [],
|
||||||
videos,
|
videos,
|
||||||
config.cutouts,
|
config.cutouts,
|
||||||
total_duration,
|
total_duration,
|
||||||
@@ -677,8 +897,8 @@ def build_render_plan(
|
|||||||
if outro_events:
|
if outro_events:
|
||||||
total_duration = outro_events[-1].end_time
|
total_duration = outro_events[-1].end_time
|
||||||
|
|
||||||
# Derive slides directory
|
# Derive slides directory — lowercase path for case-sensitive filesystems (WSL/Linux).
|
||||||
slides_json_path = project_path / config.slides_path
|
slides_json_path = project_path / config.slides_path.lower()
|
||||||
slides_dir = slides_json_path.parent
|
slides_dir = slides_json_path.parent
|
||||||
|
|
||||||
plan = RenderPlan(
|
plan = RenderPlan(
|
||||||
@@ -805,8 +1025,8 @@ def _extract_slide_events(
|
|||||||
|
|
||||||
events: list[SlideEvent] = []
|
events: list[SlideEvent] = []
|
||||||
for i, (marker_time, marker_id) in enumerate(resolved):
|
for i, (marker_time, marker_id) in enumerate(resolved):
|
||||||
# Each slide starts at its own marker time
|
# First slide always starts at 0 — it's the opening state of the presentation.
|
||||||
start_time = marker_time
|
start_time = 0.0 if i == 0 else marker_time
|
||||||
|
|
||||||
# End time is when the NEXT slide's marker appears, or end of video
|
# End time is when the NEXT slide's marker appears, or end of video
|
||||||
if i + 1 < len(resolved):
|
if i + 1 < len(resolved):
|
||||||
@@ -839,13 +1059,16 @@ def _extract_video_events(
|
|||||||
slides: dict[str, SlideDefinition],
|
slides: dict[str, SlideDefinition],
|
||||||
total_duration: float,
|
total_duration: float,
|
||||||
time_range: Optional[tuple[float, float]] = None,
|
time_range: Optional[tuple[float, float]] = None,
|
||||||
) -> list[VideoEvent]:
|
) -> tuple[list[VideoEvent], list[str]]:
|
||||||
"""
|
"""
|
||||||
Extract video events from aligned marker timings.
|
Extract video events from aligned marker timings.
|
||||||
|
|
||||||
- [video:xxx] events end at the next SLIDE marker
|
- [video:xxx] events end at the next SLIDE marker
|
||||||
- [narration:xxx] events run until end
|
- [narration:xxx] events run until end
|
||||||
|
|
||||||
|
Returns (events, warnings). Invalid markers are skipped and reported in warnings.
|
||||||
"""
|
"""
|
||||||
|
warnings: list[str] = []
|
||||||
range_start, range_end = time_range if time_range else (0.0, float("inf"))
|
range_start, range_end = time_range if time_range else (0.0, float("inf"))
|
||||||
|
|
||||||
# Collect slide times for video: end time calculation
|
# Collect slide times for video: end time calculation
|
||||||
@@ -857,22 +1080,14 @@ def _extract_video_events(
|
|||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
# Mapping from shorthand marker prefix → (implied_cutout_name, implied_layer)
|
# Pause-variant prefixes — the only thing the render pass still needs from
|
||||||
# These are the defaults; videos.json values act as a base but the marker wins.
|
# shorthand markers at event-build time (pause_narration is per-event, not stored in videos.json).
|
||||||
_SHORTHAND: dict[str, tuple[str, str]] = {
|
_PAUSE_PREFIXES = {"vftp:", "vfbp:", "vfmp:", "vf2tp:", "vf2bp:", "vf2mp:", "vstp:", "vsbp:", "vsmp:"}
|
||||||
"vft:": ("fullscreen", "above"),
|
|
||||||
"vfb:": ("fullscreen", "below"),
|
|
||||||
"vst:": ("square", "above"),
|
|
||||||
"vsb:": ("square", "below"),
|
|
||||||
"vftp:": ("fullscreen", "above", "pause_narration"),
|
|
||||||
"vfbp:": ("fullscreen", "below", "pause_narration"),
|
|
||||||
"vstp:": ("square", "above", "pause_narration"),
|
|
||||||
"vsbp:": ("square", "below", "pause_narration"),
|
|
||||||
}
|
|
||||||
|
|
||||||
# Collect video markers: (time, video_id, event_type, cutout_name_override, layer_override)
|
# Collect video markers: (time, video_id, event_type, pause_narration)
|
||||||
# event_type is "video" (ends at next slide) or "narration" (runs to end)
|
# video_markers: (timestamp, video_id, marker_type, pause_narration)
|
||||||
video_markers: list[tuple[float, str, str, str | None, str | None]] = []
|
# cutout and layer are read from videos.json (projected there by _project_markers_to_videos)
|
||||||
|
video_markers: list[tuple[float, str, str, bool]] = []
|
||||||
|
|
||||||
for timing in marker_timings:
|
for timing in marker_timings:
|
||||||
if timing.timestamp < 0:
|
if timing.timestamp < 0:
|
||||||
@@ -880,97 +1095,79 @@ def _extract_video_events(
|
|||||||
|
|
||||||
mid = timing.marker_id
|
mid = timing.marker_id
|
||||||
|
|
||||||
# --- shorthand markers: vft/vfb/vst/vsb ---
|
# --- shorthand markers (vft:/vfb:/vst:/vsb: and pause variants) ---
|
||||||
shorthand_match = next((p for p in _SHORTHAND if mid.startswith(p)), None)
|
shorthand_match = next(
|
||||||
|
(p for p in _SHORTHAND_PREFIXES if mid.startswith(p)), None
|
||||||
|
)
|
||||||
if shorthand_match:
|
if shorthand_match:
|
||||||
video_id = mid[len(shorthand_match) :]
|
video_id = mid[len(shorthand_match) :]
|
||||||
if video_id not in videos:
|
if video_id not in videos:
|
||||||
raise ValueError(
|
warnings.append(
|
||||||
f"Marker [{mid}] references unknown video '{video_id}'. "
|
f"[{mid}] references unknown video '{video_id}' — skipped. "
|
||||||
f"Add it to videos.json or remove the marker."
|
f"Add it to videos.json or remove the marker."
|
||||||
)
|
)
|
||||||
implied_cutout, implied_layer = _SHORTHAND[shorthand_match]
|
continue
|
||||||
if implied_cutout not in cutouts:
|
# Validate that videos.json has the correct cutout (written by ETL)
|
||||||
raise ValueError(
|
video_source = videos[video_id]
|
||||||
f"Marker [{mid}] uses shorthand '{shorthand_match}' which requires "
|
if not video_source.cutout or video_source.cutout not in cutouts:
|
||||||
f"cutout '{implied_cutout}' but it is not defined in project config. "
|
warnings.append(
|
||||||
f"Available cutouts: {list(cutouts.keys())}"
|
f"[{mid}] video '{video_id}' has no valid cutout in videos.json — "
|
||||||
|
f"run render once to project values, or set cutout manually."
|
||||||
)
|
)
|
||||||
video_markers.append(
|
continue
|
||||||
(timing.timestamp, video_id, "video", implied_cutout, implied_layer)
|
pause_narration = shorthand_match in _PAUSE_PREFIXES
|
||||||
)
|
video_markers.append((timing.timestamp, video_id, "video", pause_narration))
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# --- legacy [video:xxx] ---
|
# --- legacy [video:xxx] ---
|
||||||
if mid.startswith("video:"):
|
if mid.startswith("video:"):
|
||||||
video_id = mid[6:]
|
video_id = mid[6:]
|
||||||
if video_id not in videos:
|
if video_id not in videos:
|
||||||
raise ValueError(
|
warnings.append(
|
||||||
f"Marker [video:{video_id}] references unknown video '{video_id}'. "
|
f"[video:{video_id}] references unknown video '{video_id}' — skipped."
|
||||||
f"Add it to videos.json or remove the marker."
|
|
||||||
)
|
)
|
||||||
|
continue
|
||||||
video_source = videos[video_id]
|
video_source = videos[video_id]
|
||||||
if not video_source.cutout:
|
if not video_source.cutout or video_source.cutout not in cutouts:
|
||||||
raise ValueError(
|
warnings.append(
|
||||||
f"Marker [video:{video_id}] — video '{video_id}' has no 'cutout' set in videos.json."
|
f"[video:{video_id}] has no valid cutout in videos.json — skipped."
|
||||||
)
|
)
|
||||||
if video_source.cutout not in cutouts:
|
continue
|
||||||
raise ValueError(
|
video_markers.append((timing.timestamp, video_id, "video", False))
|
||||||
f"Marker [video:{video_id}] — cutout '{video_source.cutout}' is not defined in project config. "
|
|
||||||
f"Available: {list(cutouts.keys())}"
|
|
||||||
)
|
|
||||||
video_markers.append(
|
|
||||||
(timing.timestamp, video_id, "video", None, None)
|
|
||||||
)
|
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# --- [narration:xxx] ---
|
# --- [narration:xxx] ---
|
||||||
if mid.startswith("narration:"):
|
if mid.startswith("narration:"):
|
||||||
video_id = mid[10:]
|
video_id = mid[10:]
|
||||||
if video_id not in videos:
|
if video_id not in videos:
|
||||||
raise ValueError(
|
warnings.append(
|
||||||
f"Marker [narration:{video_id}] references unknown video '{video_id}'. "
|
f"[narration:{video_id}] references unknown video '{video_id}' — skipped."
|
||||||
f"Add it to videos.json or remove the marker."
|
|
||||||
)
|
)
|
||||||
|
continue
|
||||||
video_source = videos[video_id]
|
video_source = videos[video_id]
|
||||||
if not video_source.cutout:
|
if not video_source.cutout or video_source.cutout not in cutouts:
|
||||||
raise ValueError(
|
warnings.append(
|
||||||
f"Marker [narration:{video_id}] — video '{video_id}' has no 'cutout' set in videos.json."
|
f"[narration:{video_id}] has no valid cutout in videos.json — skipped."
|
||||||
)
|
)
|
||||||
if video_source.cutout not in cutouts:
|
continue
|
||||||
raise ValueError(
|
video_markers.append((timing.timestamp, video_id, "narration", False))
|
||||||
f"Marker [narration:{video_id}] — cutout '{video_source.cutout}' is not defined in project config. "
|
|
||||||
f"Available: {list(cutouts.keys())}"
|
|
||||||
)
|
|
||||||
video_markers.append(
|
|
||||||
(timing.timestamp, video_id, "narration", None, None)
|
|
||||||
)
|
|
||||||
|
|
||||||
events: list[VideoEvent] = []
|
events: list[VideoEvent] = []
|
||||||
for (
|
for start_time, video_id, marker_type, pause_narration in video_markers:
|
||||||
start_time,
|
|
||||||
video_id,
|
|
||||||
marker_type,
|
|
||||||
cutout_override,
|
|
||||||
layer_override,
|
|
||||||
) in video_markers:
|
|
||||||
video_source = videos[video_id]
|
video_source = videos[video_id]
|
||||||
|
|
||||||
# Resolve cutout: marker override > videos.json cutout
|
# Read cutout and layer directly from videos.json (projected by ETL)
|
||||||
# (validation already ensured cutout exists — this is a safety assertion)
|
cutout_name = video_source.cutout
|
||||||
cutout_name = cutout_override or video_source.cutout
|
|
||||||
cutout = cutouts[cutout_name]
|
cutout = cutouts[cutout_name]
|
||||||
|
layer = video_source.layer
|
||||||
# Resolve layer: marker override > videos.json layer
|
|
||||||
layer = layer_override if layer_override is not None else video_source.layer
|
|
||||||
|
|
||||||
end_on = video_source.end_on
|
end_on = video_source.end_on
|
||||||
if end_on == "take" and video_source.take is not None:
|
if end_on == "take" and video_source.take is not None:
|
||||||
end_time = start_time + video_source.take
|
end_time = start_time + video_source.take
|
||||||
elif end_on == "end":
|
elif end_on == "end":
|
||||||
end_time = total_duration
|
end_time = total_duration
|
||||||
elif end_on == "next_slide" or (end_on is None and marker_type == "video"):
|
elif end_on in ("next_slide", "slide") or (end_on is None and marker_type == "video"):
|
||||||
# End at next slide marker
|
# End at next slide marker ("slide" is a recognised alias for "next_slide")
|
||||||
end_time = total_duration
|
end_time = total_duration
|
||||||
for slide_time in slide_times:
|
for slide_time in slide_times:
|
||||||
if slide_time > start_time:
|
if slide_time > start_time:
|
||||||
@@ -997,7 +1194,7 @@ def _extract_video_events(
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
return events
|
return events, warnings
|
||||||
|
|
||||||
|
|
||||||
def _extract_audio_events(
|
def _extract_audio_events(
|
||||||
@@ -1014,19 +1211,22 @@ def _extract_audio_events(
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
marker_id = timing.marker_id
|
marker_id = timing.marker_id
|
||||||
|
audio_id = None
|
||||||
if marker_id.startswith("A") and len(marker_id) > 1:
|
if marker_id.startswith("A") and len(marker_id) > 1:
|
||||||
audio_id = marker_id[1:]
|
audio_id = marker_id[1:]
|
||||||
if audio_id in audio:
|
elif marker_id.startswith("audio:"):
|
||||||
if timing.timestamp < range_start or timing.timestamp >= range_end:
|
audio_id = marker_id[6:]
|
||||||
continue
|
if audio_id is not None and audio_id in audio:
|
||||||
start_time = max(0, timing.timestamp - AUDIO_OFFSET_SECONDS)
|
if timing.timestamp < range_start or timing.timestamp >= range_end:
|
||||||
events.append(
|
continue
|
||||||
AudioEvent(
|
start_time = max(0, timing.timestamp - AUDIO_OFFSET_SECONDS)
|
||||||
audio_id=audio_id,
|
events.append(
|
||||||
start_time=start_time,
|
AudioEvent(
|
||||||
audio_def=audio[audio_id],
|
audio_id=audio_id,
|
||||||
)
|
start_time=start_time,
|
||||||
|
audio_def=audio[audio_id],
|
||||||
)
|
)
|
||||||
|
)
|
||||||
|
|
||||||
return events
|
return events
|
||||||
|
|
||||||
|
|||||||
+72
-11
@@ -4,7 +4,7 @@ from pathlib import Path
|
|||||||
|
|
||||||
from .cache import resolve_with_cache
|
from .cache import resolve_with_cache
|
||||||
from .errors import ValidationError, ValidationIssue
|
from .errors import ValidationError, ValidationIssue
|
||||||
from .parser import _read_json
|
from .parser import _read_json, resolve_missing_videos
|
||||||
from .models import (
|
from .models import (
|
||||||
ProjectConfig,
|
ProjectConfig,
|
||||||
SlideDefinition,
|
SlideDefinition,
|
||||||
@@ -38,6 +38,24 @@ def validate_project(
|
|||||||
issues: list[ValidationIssue] = []
|
issues: list[ValidationIssue] = []
|
||||||
warnings: list[ValidationIssue] = []
|
warnings: list[ValidationIssue] = []
|
||||||
|
|
||||||
|
# Collect video IDs actually referenced in the manuscript (for file-existence checks)
|
||||||
|
_VIDEO_PREFIXES = {
|
||||||
|
"video:": 6,
|
||||||
|
"vft:": 4, "vfb:": 4, "vfm:": 4,
|
||||||
|
"vf2t:": 5, "vf2b:": 5, "vf2m:": 5,
|
||||||
|
"vst:": 4, "vsb:": 4, "vsm:": 4,
|
||||||
|
"vftp:": 5, "vfbp:": 5, "vfmp:": 5,
|
||||||
|
"vf2tp:": 6, "vf2bp:": 6, "vf2mp:": 6,
|
||||||
|
"vstp:": 5, "vsbp:": 5, "vsmp:": 5,
|
||||||
|
}
|
||||||
|
referenced_video_ids: set[str] = set()
|
||||||
|
for marker in manuscript_markers:
|
||||||
|
prefix = next((p for p in _VIDEO_PREFIXES if marker.startswith(p)), None)
|
||||||
|
if prefix is not None:
|
||||||
|
referenced_video_ids.add(marker[_VIDEO_PREFIXES[prefix]:])
|
||||||
|
elif marker.startswith("narration:"):
|
||||||
|
referenced_video_ids.add(marker[10:])
|
||||||
|
|
||||||
# Check for malformed markers first (these are likely typos)
|
# Check for malformed markers first (these are likely typos)
|
||||||
if malformed_markers:
|
if malformed_markers:
|
||||||
for line_num, marker_text in malformed_markers:
|
for line_num, marker_text in malformed_markers:
|
||||||
@@ -57,15 +75,11 @@ def validate_project(
|
|||||||
# Skip audio markers (start with 'A' followed by audio id, e.g., Awoosh)
|
# Skip audio markers (start with 'A' followed by audio id, e.g., Awoosh)
|
||||||
if marker.startswith("A") and len(marker) > 1 and marker[1:].isalnum():
|
if marker.startswith("A") and len(marker) > 1 and marker[1:].isalnum():
|
||||||
continue
|
continue
|
||||||
|
# Skip audio: prefix markers (e.g., audio:woosh)
|
||||||
|
if marker.startswith("audio:"):
|
||||||
|
continue
|
||||||
# Validate video trigger markers — both legacy [video:xxx] and
|
# Validate video trigger markers — both legacy [video:xxx] and
|
||||||
# shorthand [vft:xxx] / [vfb:xxx] / [vst:xxx] / [vsb:xxx].
|
# shorthand [vft:xxx] / [vfb:xxx] / [vst:xxx] / [vsb:xxx].
|
||||||
_VIDEO_PREFIXES = {
|
|
||||||
"video:": 6,
|
|
||||||
"vft:": 4,
|
|
||||||
"vfb:": 4,
|
|
||||||
"vst:": 4,
|
|
||||||
"vsb:": 4,
|
|
||||||
}
|
|
||||||
matched_prefix = next(
|
matched_prefix = next(
|
||||||
(p for p in _VIDEO_PREFIXES if marker.startswith(p)), None
|
(p for p in _VIDEO_PREFIXES if marker.startswith(p)), None
|
||||||
)
|
)
|
||||||
@@ -83,6 +97,16 @@ def validate_project(
|
|||||||
project_path / "manuscript.txt",
|
project_path / "manuscript.txt",
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
else:
|
||||||
|
vs = videos[video_id]
|
||||||
|
if not vs.cutout or vs.cutout not in config.cutouts:
|
||||||
|
warnings.append(
|
||||||
|
ValidationIssue(
|
||||||
|
f"[{marker}] video '{video_id}' has no valid cutout in videos.json — "
|
||||||
|
f"run 'gnommo import' to project values, or set cutout manually.",
|
||||||
|
project_path / "manuscript.txt",
|
||||||
|
)
|
||||||
|
)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Validate narration trigger markers (narration:xxx) - continuous videos
|
# Validate narration trigger markers (narration:xxx) - continuous videos
|
||||||
@@ -95,6 +119,16 @@ def validate_project(
|
|||||||
project_path / "manuscript.txt",
|
project_path / "manuscript.txt",
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
else:
|
||||||
|
vs = videos[video_id]
|
||||||
|
if not vs.cutout or vs.cutout not in config.cutouts:
|
||||||
|
warnings.append(
|
||||||
|
ValidationIssue(
|
||||||
|
f"[{marker}] video '{video_id}' has no valid cutout in videos.json — "
|
||||||
|
f"run 'gnommo import' to project values, or set cutout manually.",
|
||||||
|
project_path / "manuscript.txt",
|
||||||
|
)
|
||||||
|
)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Segment markers are structural annotations, not slide references
|
# Segment markers are structural annotations, not slide references
|
||||||
@@ -120,8 +154,10 @@ def validate_project(
|
|||||||
)
|
)
|
||||||
|
|
||||||
# Check all slide images exist
|
# Check all slide images exist
|
||||||
# Slides are in the same directory as the slides.json file
|
# Slides are in the same directory as the slides.json file.
|
||||||
slides_json_path = project_path / config.slides_path
|
# Lowercase the configured path so capital-cased project names (e.g.
|
||||||
|
# "media/slides/Video2/slides.json") resolve on case-sensitive filesystems.
|
||||||
|
slides_json_path = project_path / config.slides_path.lower()
|
||||||
slides_dir = slides_json_path.parent
|
slides_dir = slides_json_path.parent
|
||||||
|
|
||||||
for slide_id, slide_def in slides.items():
|
for slide_id, slide_def in slides.items():
|
||||||
@@ -155,6 +191,10 @@ def validate_project(
|
|||||||
shared_assets_dir = project_path.parent / "shared_assets"
|
shared_assets_dir = project_path.parent / "shared_assets"
|
||||||
|
|
||||||
for video_id, video_source in videos.items():
|
for video_id, video_source in videos.items():
|
||||||
|
# Only check files for videos actually used in this manuscript
|
||||||
|
if video_id not in referenced_video_ids:
|
||||||
|
continue
|
||||||
|
|
||||||
# Determine base directory based on is_shared flag
|
# Determine base directory based on is_shared flag
|
||||||
if video_source.is_shared:
|
if video_source.is_shared:
|
||||||
if shared_assets_dir:
|
if shared_assets_dir:
|
||||||
@@ -173,9 +213,15 @@ def validate_project(
|
|||||||
video_path = base_dir / video_source.source_file
|
video_path = base_dir / video_source.source_file
|
||||||
video_path, _ = resolve_with_cache(video_path, project_path)
|
video_path, _ = resolve_with_cache(video_path, project_path)
|
||||||
if not video_path.exists():
|
if not video_path.exists():
|
||||||
|
sf = video_source.source_file
|
||||||
|
hint = (
|
||||||
|
" — run 'gnommo pexels' to download"
|
||||||
|
if sf.startswith("pexels/")
|
||||||
|
else " — falling back to PlaceholderVideo"
|
||||||
|
)
|
||||||
warnings.append(
|
warnings.append(
|
||||||
ValidationIssue(
|
ValidationIssue(
|
||||||
f"Video file not found: {video_source.source_file} — falling back to PlaceholderVideo",
|
f"Video file not found: {sf}{hint}",
|
||||||
videos_json_path,
|
videos_json_path,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
@@ -216,6 +262,7 @@ def validate_project(
|
|||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
bg_path = shared_assets_dir / bg_videos[bg_handle]["source_file"]
|
bg_path = shared_assets_dir / bg_videos[bg_handle]["source_file"]
|
||||||
|
bg_path, _ = resolve_with_cache(bg_path, project_path)
|
||||||
if not bg_path.exists():
|
if not bg_path.exists():
|
||||||
issues.append(
|
issues.append(
|
||||||
ValidationIssue(
|
ValidationIssue(
|
||||||
@@ -259,6 +306,20 @@ def validate_project(
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Check outro videos exist in videos.json or shared_assets
|
||||||
|
if config.outro:
|
||||||
|
missing_outro = [vid_id for vid_id in config.outro if vid_id not in videos]
|
||||||
|
if missing_outro:
|
||||||
|
found = resolve_missing_videos(missing_outro, project_path, config)
|
||||||
|
still_missing = [vid_id for vid_id in missing_outro if vid_id not in found]
|
||||||
|
for vid_id in still_missing:
|
||||||
|
warnings.append(
|
||||||
|
ValidationIssue(
|
||||||
|
f"Outro video '{vid_id}' not found in videos.json or shared_assets — will be skipped at render",
|
||||||
|
project_path / "project.json",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
# If any issues, raise ValidationError
|
# If any issues, raise ValidationError
|
||||||
if issues:
|
if issues:
|
||||||
raise ValidationError(issues)
|
raise ValidationError(issues)
|
||||||
|
|||||||
Executable
+10
@@ -0,0 +1,10 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
|
||||||
|
|
||||||
|
./gnommo.sh -p video1 all
|
||||||
|
./gnommo.sh -p video2 all
|
||||||
|
./gnommo.sh -p video3 all
|
||||||
|
./gnommo.sh -p video4 all
|
||||||
|
./gnommo.sh -p video5 all
|
||||||
|
./gnommo.sh -p video6 all
|
||||||
|
|
||||||
Reference in New Issue
Block a user