#!/bin/bash # getPlaylist.sh — Fetch all video IDs from a YouTube playlist via Data API # # Reads the playlistId and optional max from data/playlists.json, pages through # the YouTube Data API, sorts entries by date parsed from the video title, and # writes videos/{cat}/list.txt in date\tvideoId\ttitle format (oldest first). # # Usage: getPlaylist.sh # playlist-id — the id field in playlists.json (maps to videos/{id}/ folder) cat=$1 SCRIPT_DIR="$(dirname "$(readlink -f "$0")")" BASE_DIR="$(dirname "$SCRIPT_DIR")" KEY_FILE="$BASE_DIR/data/yt_key.txt" if [ ! -f "$KEY_FILE" ]; then echo "ERROR: Key file not found: $KEY_FILE" exit 1 fi API_KEY=$(tr -d '[:space:]' < "$KEY_FILE") if [ -z "$API_KEY" ]; then echo "ERROR: yt_key.txt is empty" exit 1 fi PLAYLISTS_JSON="$BASE_DIR/data/playlists.json" fl="$BASE_DIR/videos/$cat" # Look up the YouTube playlistId and optional max from playlists.json read pl max_count < <(python3 -c " import json, sys data = json.load(open('$PLAYLISTS_JSON')) entry = next((p for p in data['playlists'] if p['id'] == '$cat'), None) if not entry: print(f'ERROR: no entry for id=$cat', file=sys.stderr) sys.exit(1) pl = entry.get('source', {}).get('playlistId', '') if not pl: print(f'ERROR: no youtube playlistId for id=$cat', file=sys.stderr) sys.exit(1) mx = entry.get('source', {}).get('max', '') print(pl, mx) ") if [ $? -ne 0 ]; then echo "Cannot find playlistId for $cat in $PLAYLISTS_JSON" exit 1 fi mkdir -p "$fl" echo getPlaylist.sh for cat $cat folder $fl PlayListId $pl # Fetch playlist video IDs, titles, and dates from YouTube Data API CAT=$cat FL=$fl PL=$pl API_KEY=$API_KEY MAX=$max_count python3 << 'PYEOF' import json, os, re, urllib.request from datetime import datetime cat = os.environ['CAT'] fl = os.environ['FL'] pl = os.environ['PL'] apikey = os.environ['API_KEY'] max_count = int(os.environ['MAX']) if os.environ.get('MAX', '').strip() else 0 DATE_RE = re.compile( r'\b(Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?' r'|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)' r'\s+(\d{1,2}),?\s+(\d{4})\b', re.IGNORECASE ) def date_from_title(title): m = DATE_RE.search(title) if m: try: return datetime.strptime(f'{m.group(1)} {m.group(2)} {m.group(3)}', '%B %d %Y').date() except ValueError: try: return datetime.strptime(f'{m.group(1)} {m.group(2)} {m.group(3)}', '%b %d %Y').date() except ValueError: pass return None entries = [] page_token = '' while True: url = f'https://www.googleapis.com/youtube/v3/playlistItems?key={apikey}&playlistId={pl}&part=snippet&maxResults=50' print(f" {url}") if page_token: url += f'&pageToken={page_token}' with urllib.request.urlopen(url) as r: data = json.load(r) if 'error' in data: print(f"API ERROR: {data['error']['message']}") raise SystemExit(1) for item in data.get('items', []): snip = item['snippet'] vid = snip['resourceId']['videoId'] title = snip.get('title', '') date = date_from_title(title) or datetime.fromisoformat(snip.get('publishedAt', '1970-01-01')[:10]).date() entries.append((date, vid, title)) page_token = data.get('nextPageToken', '') if not page_token: break entries.sort(key=lambda e: e[0]) if max_count > 0 and len(entries) > max_count: entries = entries[-max_count:] print(f"Limiting to {max_count} most recent videos") tmp = f'{fl}/list.txt.tmp' with open(tmp, 'w') as f: for date, vid, title in entries: f.write(f'{date}\t{vid}\t{title}\n') os.replace(tmp, f'{fl}/list.txt') print(f"Fetched {len(entries)} videos from playlist (sorted by date in title)") for date, vid, title in entries: print(f" {date} {vid} {title}") PYEOF echo getPlaylist.sh end at $(date) output $fl/list.txt