#!/bin/bash
# getPlaylist.sh — Fetch all video IDs from a YouTube playlist via Data API
#
# Reads the playlistId and optional max from data/playlists.json, pages through
# the YouTube Data API, sorts entries by date parsed from the video title, and
# writes videos/{cat}/list.txt in date\tvideoId\ttitle format (oldest first).
#
# Usage: getPlaylist.sh <playlist-id>
#   playlist-id — the id field in playlists.json (maps to videos/{id}/ folder)

cat=$1

SCRIPT_DIR="$(dirname "$(readlink -f "$0")")"
BASE_DIR="$(dirname "$SCRIPT_DIR")"

KEY_FILE="$BASE_DIR/data/yt_key.txt"

if [ ! -f "$KEY_FILE" ]; then
    echo "ERROR: Key file not found: $KEY_FILE"
    exit 1
fi

API_KEY=$(tr -d '[:space:]' < "$KEY_FILE")

if [ -z "$API_KEY" ]; then
    echo "ERROR: yt_key.txt is empty"
    exit 1
fi

PLAYLISTS_JSON="$BASE_DIR/data/playlists.json"
fl="$BASE_DIR/videos/$cat"

# Look up the YouTube playlistId and optional max from playlists.json
read pl max_count < <(python3 -c "
import json, sys
data = json.load(open('$PLAYLISTS_JSON'))
entry = next((p for p in data['playlists'] if p['id'] == '$cat'), None)
if not entry:
    print(f'ERROR: no entry for id=$cat', file=sys.stderr)
    sys.exit(1)
pl = entry.get('source', {}).get('playlistId', '')
if not pl:
    print(f'ERROR: no youtube playlistId for id=$cat', file=sys.stderr)
    sys.exit(1)
mx = entry.get('source', {}).get('max', '')
print(pl, mx)
")
if [ $? -ne 0 ]; then
    echo "Cannot find playlistId for $cat in $PLAYLISTS_JSON"
    exit 1
fi

mkdir -p "$fl"
echo getPlaylist.sh for cat $cat folder $fl PlayListId $pl

# Fetch playlist video IDs, titles, and dates from YouTube Data API
CAT=$cat FL=$fl PL=$pl API_KEY=$API_KEY MAX=$max_count python3 << 'PYEOF'
import json, os, re, urllib.request
from datetime import datetime

cat    = os.environ['CAT']
fl     = os.environ['FL']
pl     = os.environ['PL']
apikey = os.environ['API_KEY']
max_count = int(os.environ['MAX']) if os.environ.get('MAX', '').strip() else 0

DATE_RE = re.compile(
    r'\b(Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?'
    r'|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)'
    r'\s+(\d{1,2}),?\s+(\d{4})\b',
    re.IGNORECASE
)

def date_from_title(title):
    m = DATE_RE.search(title)
    if m:
        try:
            return datetime.strptime(f'{m.group(1)} {m.group(2)} {m.group(3)}', '%B %d %Y').date()
        except ValueError:
            try:
                return datetime.strptime(f'{m.group(1)} {m.group(2)} {m.group(3)}', '%b %d %Y').date()
            except ValueError:
                pass
    return None

entries = []
page_token = ''
while True:
    url = f'https://www.googleapis.com/youtube/v3/playlistItems?key={apikey}&playlistId={pl}&part=snippet&maxResults=50'

    print(f"  {url}")
    if page_token:
        url += f'&pageToken={page_token}'
    with urllib.request.urlopen(url) as r:
        data = json.load(r)
    if 'error' in data:
        print(f"API ERROR: {data['error']['message']}")
        raise SystemExit(1)
    for item in data.get('items', []):
        snip  = item['snippet']
        vid   = snip['resourceId']['videoId']
        title = snip.get('title', '')
        date  = date_from_title(title) or datetime.fromisoformat(snip.get('publishedAt', '1970-01-01')[:10]).date()
        entries.append((date, vid, title))
    page_token = data.get('nextPageToken', '')
    if not page_token:
        break

entries.sort(key=lambda e: e[0])

if max_count > 0 and len(entries) > max_count:
    entries = entries[-max_count:]
    print(f"Limiting to {max_count} most recent videos")

tmp = f'{fl}/list.txt.tmp'
with open(tmp, 'w') as f:
    for date, vid, title in entries:
        f.write(f'{date}\t{vid}\t{title}\n')
os.replace(tmp, f'{fl}/list.txt')

print(f"Fetched {len(entries)} videos from playlist (sorted by date in title)")
for date, vid, title in entries:
    print(f"  {date}  {vid}  {title}")
PYEOF

echo getPlaylist.sh end at $(date) output $fl/list.txt





