magicciv/tools/audio-fetch-batch.sh

160 lines
5.3 KiB
Bash
Raw Permalink Normal View History

#!/usr/bin/env bash
# Audio asset acquisition batch driver. Reads a `mapping` file with rows
# of: <output_path>|<source_url>|<licence>|<attribution>|<edits_note>
# Each row triggers: curl → ffmpeg loudnorm + Ogg Vorbis encode →
# write to public/games/age-of-dwarves/assets/<output_path> →
# append to sources.csv. Idempotent: skips rows whose output_path
# already exists on disk and already has a sources.csv row.
#
# After all rows: re-renders LICENSES.md and runs audio-validate.py.
#
# Usage:
# bash tools/audio-fetch-batch.sh tools/audio-batch-01.tsv
#
# Mapping file format (tab-separated):
# audio/sfx/city/city_grew.ogg<TAB>https://...wav<TAB>CC0-1.0<TAB>Kenney (Calinou repackage)<TAB>loudnorm I=-16/TP=-3+wav→ogg 128kbps
set -uo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
REPO_ROOT="$(dirname "$SCRIPT_DIR")"
# Audio assets live in the shared cross-theme pool, not under any one theme.
ASSETS_ROOT="$REPO_ROOT/public/resources"
SOURCES_CSV="$ASSETS_ROOT/audio/sources.csv"
STAGING="$REPO_ROOT/.local/audio-staging"
TODAY="$(date -u +%Y-%m-%d)"
if [ $# -lt 1 ]; then
echo "Usage: $0 <mapping.tsv>" >&2
exit 1
fi
MAPPING="$1"
if [ ! -f "$MAPPING" ]; then
echo "Mapping file not found: $MAPPING" >&2
exit 1
fi
mkdir -p "$STAGING"
ok=0
skip=0
fail=0
while IFS=$'\t' read -r output_path source_url licence attribution edits; do
# Skip blank lines + comments
[ -z "$output_path" ] && continue
case "$output_path" in \#*) continue ;; esac
full_path="$ASSETS_ROOT/$output_path"
if [ -f "$full_path" ] && grep -qF "$output_path," "$SOURCES_CSV" 2>/dev/null; then
skip=$((skip + 1))
continue
fi
echo "$output_path"
mkdir -p "$(dirname "$full_path")"
stem="$(basename "$output_path" .ogg)"
# Source URL may be one of:
# 1. direct file URL ending in .wav/.ogg/.mp3/.flac
# 2. github.com blob URL (auto-converted to raw)
# 3. ZIP archive with an inner path: "<zip_url>#<inner/path/inside.wav>"
fetch_url="$source_url"
inner_path=""
if [[ "$source_url" == *"#"* ]]; then
fetch_url="${source_url%%#*}"
inner_path="${source_url#*#}"
fi
case "$fetch_url" in
https://github.com/*/blob/*)
fetch_url="$(echo "$fetch_url" | sed -e 's|github.com|raw.githubusercontent.com|' -e 's|/blob/|/|')"
;;
esac
if [ -n "$inner_path" ]; then
# ZIP path: cache the archive once per URL, extract a single
# member into staging.
zip_hash="$(printf '%s' "$fetch_url" | shasum | cut -c1-8)"
zip_cache="$STAGING/_zip_${zip_hash}.zip"
zip_extract_dir="$STAGING/_zip_${zip_hash}"
if [ ! -f "$zip_cache" ]; then
if ! curl -sfL -o "$zip_cache" "$fetch_url"; then
echo " ✗ ZIP download failed: $fetch_url" >&2
fail=$((fail + 1))
continue
fi
fi
if [ ! -d "$zip_extract_dir" ]; then
mkdir -p "$zip_extract_dir"
if ! unzip -q -o "$zip_cache" -d "$zip_extract_dir"; then
echo " ✗ ZIP extract failed: $zip_cache" >&2
fail=$((fail + 1))
continue
fi
fi
staged="$zip_extract_dir/$inner_path"
if [ ! -f "$staged" ]; then
echo " ✗ ZIP missing inner file: $inner_path" >&2
fail=$((fail + 1))
continue
fi
else
src_ext="${fetch_url##*.}"
case "$src_ext" in
wav|ogg|mp3|flac) ;;
*) src_ext="bin" ;;
esac
staged="$STAGING/${stem}.${src_ext}"
if ! curl -sfL -o "$staged" "$fetch_url"; then
echo " ✗ download failed: $fetch_url" >&2
fail=$((fail + 1))
continue
fi
fi
# loudnorm two-pass would be more accurate, but for SFX one-pass is fine
# at this scale. Music tracks should be normalised manually with two-pass.
# `-nostdin` is critical: without it ffmpeg consumes characters from the
# mapping file (the script's stdin), corrupting subsequent iterations'
# output_path values. This caused dirs like `c/`, `sic/`, `music/`,
# `udio/` to appear instead of the intended `audio/...` paths.
if ! ffmpeg -y -nostdin -hide_banner -loglevel error \
-i "$staged" \
-af "loudnorm=I=-16:TP=-3:LRA=11,aresample=44100" \
-c:a libvorbis -b:a 128k \
"$full_path"; then
echo " ✗ encode failed" >&2
fail=$((fail + 1))
continue
fi
# Append to sources.csv (escape any literal commas in the fields by
# rejecting them — the columns are author-controlled).
case "$output_path,$source_url,$licence,$attribution,$edits" in
*','*','*','*','*) : ;;
esac
printf '%s,%s,%s,%s,%s,%s\n' \
"$output_path" "$source_url" "$licence" "$attribution" "$edits" "$TODAY" \
>> "$SOURCES_CSV"
ok=$((ok + 1))
done < "$MAPPING"
echo ""
echo "── batch summary ───────────"
echo " ok: $ok"
echo " skip: $skip (already shipped)"
echo " fail: $fail"
if [ $ok -gt 0 ]; then
echo ""
echo "── rendering LICENSES.md ───"
python3 "$SCRIPT_DIR/audio-licenses-render.py"
echo ""
echo "── validating ──────────────"
python3 "$SCRIPT_DIR/audio-validate.py"
fi