import re from pathlib import Path import httpx from rich.progress import BarColumn, MofNCompleteColumn, Progress, TextColumn _HEADERS = { "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 Chrome/121.0 Safari/537.36", "Referer": "https://www.tmdn.org/tmview/", } _CONTENT_TYPE_EXT = { "image/jpeg": ".jpg", "image/png": ".png", "image/gif": ".gif", "image/svg+xml": ".svg", "image/webp": ".webp", } _SAFE = re.compile(r"[^\w\-.]") def _safe_name(s: str) -> str: return _SAFE.sub("_", s) def _ext_from_content_type(ct: str) -> str: base = ct.split(";")[0].strip().lower() return _CONTENT_TYPE_EXT.get(base, ".bin") def download_logos(trademarks: list, output_dir: str, console) -> tuple[int, int]: dest = Path(output_dir) dest.mkdir(parents=True, exist_ok=True) downloadable = [tm for tm in trademarks if tm.get("image_url")] ok = skipped = 0 with Progress( TextColumn("[progress.description]{task.description}"), BarColumn(), MofNCompleteColumn(), console=console, transient=True, ) as progress: task = progress.add_task("Downloading logos", total=len(downloadable)) with httpx.Client(timeout=30, headers=_HEADERS, follow_redirects=True) as client: for tm in downloadable: label = f"{tm.get('office','')}-{_safe_name(tm.get('application_number',''))}" progress.update(task, description=f"[dim]{label}[/dim]") try: resp = client.get(tm["image_url"]) resp.raise_for_status() ext = _ext_from_content_type(resp.headers.get("content-type", "")) path = dest / f"{label}{ext}" path.write_bytes(resp.content) ok += 1 except (httpx.HTTPError, OSError) as exc: console.print(f"[yellow] skip {label}: {exc}[/yellow]") skipped += 1 finally: progress.advance(task) return ok, skipped