#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Quick smoke test for content image URL token extraction.

Usage:
  python test_content_token_extract_samples.py
"""

from __future__ import annotations

from typing import Callable


def _run(name: str, html: str, extract: Callable[[str], list[str]]) -> None:
    toks = extract(html)
    print(f"\n=== {name} ===")
    print(f"tokens({len(toks)}):")
    for t in toks:
        print(" -", t)


def main() -> None:
    # Prefer the batch tokenizer (it is the canonical policy for DB tokens)
    import Z_Batch_Create_Content_Token as z  # noqa: N812

    extract = lambda s: z._extract_img_url_tokens_from_html(s)  # type: ignore[attr-defined]

    samples: list[tuple[str, str]] = [
        (
            "escaped_quotes_and_no_ext",
            r'<img src=\"https://daewon29.diskn.com/U821Kqi7zD\"><br>'
            r'<img src=\"https://daewon29.diskn.com/o821KqittZ\">',
        ),
        (
            "unquoted_src",
            r"<center><img src=https://iracom.shopon.biz/data/goods_img/goods_img/GT/GTS52447/GTS52447.jpg></center>",
        ),
        (
            "comma_broken_img_tag",
            r'<TABLE,cellSpacing=0><TR><TD><IMG,src="https://example.com/p/ABC123.jpg"></TD></TR></TABLE>',
        ),
        (
            "srcset",
            r'<img srcset="https://example.com/a/photo_2.jpg 2x, https://example.com/a/photo_1.jpg 1x">',
        ),
        (
            "relative_path",
            r'<img src="/shopimages/perzoom/20251125175249phpIbS3x6.jpg">',
        ),
        (
            "broken_scheme",
            r'<img src="http:////akcdn-cskdisplay1.cafe24img.com/%EA%B8%B0%ED%83%80/%EB%A7%88%EA%B7%B8.jpg">',
        ),
        (
            "scheme_less",
            r'<img src="//cdn.example.com/a/b/c.jpg">',
        ),
        (
            "data_uri_should_be_dropped_by_normalize",
            r'<img src="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAA">',
        ),
    ]

    for name, html in samples:
        _run(name, html, extract)


if __name__ == "__main__":
    main()

