first commit
This commit is contained in:
146
fetch_card.py
Normal file
146
fetch_card.py
Normal file
@@ -0,0 +1,146 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Download Pokémon TCG card images with embedded JSON metadata.
|
||||
|
||||
Uses the TCGdex SDK to:
|
||||
1. List all sets (with configurable limit)
|
||||
2. For each set, list all cards (with configurable limit)
|
||||
3. Download each card image (PNG) and embed full card data as PNG metadata
|
||||
"""
|
||||
|
||||
import json
|
||||
import io
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
from dataclasses import asdict, is_dataclass
|
||||
from pathlib import Path
|
||||
|
||||
from PIL import Image, PngImagePlugin
|
||||
from tcgdexsdk import TCGdex, Language
|
||||
from tcgdexsdk.enums import Quality, Extension
|
||||
|
||||
# ── Configuration ──────────────────────────────────────────────
|
||||
MAX_SETS = 10000 # Number of sets to process (None = all)
|
||||
MAX_CARDS_PER_SET = 10000 # Number of cards per set (None = all)
|
||||
OUTPUT_DIR = Path(__file__).resolve().parent / "cards"
|
||||
IMAGE_QUALITY = Quality.HIGH
|
||||
MAX_WORKERS = 8 # Parallel download threads
|
||||
# ───────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def card_to_dict(card) -> dict:
|
||||
"""Convert a card object to a JSON-serialisable dict, skipping SDK internals."""
|
||||
data = {}
|
||||
skip = {"sdk", "get_image", "get_image_url"}
|
||||
for attr in dir(card):
|
||||
if attr.startswith("_") or attr in skip:
|
||||
continue
|
||||
val = getattr(card, attr, None)
|
||||
if callable(val):
|
||||
continue
|
||||
data[attr] = _serialise(val)
|
||||
return data
|
||||
|
||||
|
||||
def _serialise(obj):
|
||||
"""Recursively convert dataclass / nested objects to plain dicts."""
|
||||
if obj is None or isinstance(obj, (str, int, float, bool)):
|
||||
return obj
|
||||
if is_dataclass(obj) and not isinstance(obj, type):
|
||||
return {
|
||||
k: _serialise(v)
|
||||
for k, v in asdict(obj).items()
|
||||
if k != "sdk"
|
||||
}
|
||||
if isinstance(obj, list):
|
||||
return [_serialise(i) for i in obj]
|
||||
if isinstance(obj, dict):
|
||||
return {k: _serialise(v) for k, v in obj.items()}
|
||||
# Fallback: try dataclass-style attribute extraction
|
||||
if hasattr(obj, "__dict__"):
|
||||
return {
|
||||
k: _serialise(v)
|
||||
for k, v in obj.__dict__.items()
|
||||
if k != "sdk"
|
||||
}
|
||||
return str(obj)
|
||||
|
||||
|
||||
def save_image_with_metadata(image_bytes: bytes, metadata: dict, path: Path):
|
||||
"""Save a PNG image with JSON metadata embedded in a tEXt chunk."""
|
||||
img = Image.open(io.BytesIO(image_bytes))
|
||||
|
||||
png_info = PngImagePlugin.PngInfo()
|
||||
png_info.add_text("pokemon_metadata", json.dumps(metadata, ensure_ascii=False))
|
||||
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
img.save(str(path), "PNG", pnginfo=png_info)
|
||||
|
||||
|
||||
def process_card(card_id: str, set_dir: Path) -> str | None:
|
||||
"""Fetch card data + image and save. Returns card description on success."""
|
||||
sdk = TCGdex(Language.EN)
|
||||
card = sdk.card.getSync(card_id)
|
||||
if not card:
|
||||
return None
|
||||
|
||||
resp = card.get_image(IMAGE_QUALITY, Extension.PNG)
|
||||
image_bytes = resp.read()
|
||||
|
||||
metadata = card_to_dict(card)
|
||||
filename = f"{card.localId}.png"
|
||||
save_image_with_metadata(image_bytes, metadata, set_dir / filename)
|
||||
|
||||
return f"{card.name} ({card.id})"
|
||||
|
||||
|
||||
def main():
|
||||
sdk = TCGdex(Language.EN)
|
||||
|
||||
# 1. Get sets
|
||||
all_sets = sdk.set.listSync()
|
||||
if not all_sets:
|
||||
print("No sets returned.")
|
||||
return
|
||||
|
||||
sets_to_process = all_sets[:MAX_SETS] if MAX_SETS else all_sets
|
||||
print(f"Processing {len(sets_to_process)} / {len(all_sets)} sets\n")
|
||||
|
||||
total_downloaded = 0
|
||||
|
||||
for si, set_resume in enumerate(sets_to_process, 1):
|
||||
full_set = sdk.set.getSync(set_resume.id)
|
||||
if not full_set or not full_set.cards:
|
||||
print(f"[{si}] {set_resume.name}: no cards, skipping")
|
||||
continue
|
||||
|
||||
cards = full_set.cards[:MAX_CARDS_PER_SET] if MAX_CARDS_PER_SET else full_set.cards
|
||||
card_total = full_set.cardCount.total if full_set.cardCount else len(full_set.cards)
|
||||
print(f"[{si}/{len(sets_to_process)}] {set_resume.name} — {len(cards)}/{card_total} cards")
|
||||
|
||||
set_dir = OUTPUT_DIR / set_resume.id
|
||||
|
||||
with ThreadPoolExecutor(max_workers=MAX_WORKERS) as pool:
|
||||
futures = {
|
||||
pool.submit(process_card, cr.id, set_dir): cr.id
|
||||
for cr in cards
|
||||
}
|
||||
for future in as_completed(futures):
|
||||
card_id = futures[future]
|
||||
try:
|
||||
result = future.result()
|
||||
if result:
|
||||
total_downloaded += 1
|
||||
print(f" {result} ✓")
|
||||
else:
|
||||
print(f" {card_id}: skipped")
|
||||
except Exception as e:
|
||||
print(f" {card_id}: failed ({e})")
|
||||
|
||||
print()
|
||||
|
||||
print(f"Done — {total_downloaded} cards saved to {OUTPUT_DIR}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
Reference in New Issue
Block a user