diff --git a/examples/idna/README.md b/examples/idna/README.md new file mode 100644 index 0000000..b8d37f4 --- /dev/null +++ b/examples/idna/README.md @@ -0,0 +1,18 @@ +# idna Examples + +Each sub-directory contains a self-contained example. The order in +which the examples are to appear is specified in `order.json` (an +array of directory names in the expected order). + +In each example directory you'll find: + +* `config.toml` - must conform to the specification outlined here: + https://docs.pyscript.net/latest/user-guide/configuration/ This is + parsed and ultimately turned into a JSON representation as part of + the package's API object. +* `setup.py` - Python code for contextual and environmental setup, + NOT SEEN BY THE END USER, but is run before the `code.py` code is + evaluated. Allows us to create useful (IPython) shims, avoid + repeating boilerplate and whatnot. +* `code.py` - the actual code added to the editor which forms the + practical example of using the package. diff --git a/examples/idna/encode_and_decode/code.py b/examples/idna/encode_and_decode/code.py new file mode 100644 index 0000000..9fe3747 --- /dev/null +++ b/examples/idna/encode_and_decode/code.py @@ -0,0 +1,45 @@ +""" +A first look at the `idna` package. + +Internationalized domain names use characters outside ASCII (think +Japanese, Cyrillic, Arabic). DNS itself only speaks ASCII, so each +Unicode label gets translated into an ASCII-compatible form starting +with `xn--`. The `idna` package handles that translation in both +directions, following the modern IDNA 2008 specification. + +Docs: https://github.com/kjd/idna +""" +from IPython.core.display import display, HTML + +import idna + +# A small address book of domains in their human-readable Unicode form. +unicode_domains = [ + "ドメイン.テスト", # Japanese: "domain.test" + "пример.рф", # Russian: "example.rf" + "παράδειγμα.δοκιμή", # Greek: "example.test" + "例え.テスト", # Japanese mixed scripts +] + +heading("Encoding Unicode domains to ASCII (A-labels)") +note( + "DNS resolvers only understand ASCII, so each Unicode domain " + "is encoded into a Punycode form prefixed with xn--." +) + +rows = ["" + ""] +for domain in unicode_domains: + ascii_form = idna.encode(domain).decode("ascii") + rows.append(f"") +rows.append("
Unicode (U-label)ASCII (A-label)
{domain}{ascii_form}
") +display(HTML("".join(rows)), append=True) + +heading("Decoding ASCII domains back to Unicode") +note("Round-tripping through idna.decode recovers the original.") + +ascii_input = "xn--eckwd4c7c.xn--zckzah" +recovered = idna.decode(ascii_input) +display(HTML( + f"

{ascii_input}{recovered}

" +), append=True) diff --git a/examples/idna/encode_and_decode/config.toml b/examples/idna/encode_and_decode/config.toml new file mode 100644 index 0000000..43418a0 --- /dev/null +++ b/examples/idna/encode_and_decode/config.toml @@ -0,0 +1 @@ +packages = ["idna"] diff --git a/examples/idna/encode_and_decode/setup.py b/examples/idna/encode_and_decode/setup.py new file mode 100644 index 0000000..17b1db0 --- /dev/null +++ b/examples/idna/encode_and_decode/setup.py @@ -0,0 +1,37 @@ +"""Shim setup for the first example. Includes the full IPython shim.""" +import sys +import types +import js +from pyscript import window, HTML, display as _display + +js.alert = window.alert + + +def display(*args, **kwargs): + return _display( + *args, **kwargs, target=__pyscript_display_target__, + ) + + +ipython = types.ModuleType("IPython") +core = types.ModuleType("IPython.core") +core_display = types.ModuleType("IPython.core.display") +core_display.display = display +core_display.HTML = HTML +ipython.core = core +core.display = core_display +ipython.get_ipython = lambda: None +ipython.display = core_display +sys.modules["IPython"] = ipython +sys.modules["IPython.core"] = core +sys.modules["IPython.core.display"] = core_display +sys.modules["IPython.display"] = core_display + + +def heading(text, level=2): + display(HTML(f"{text}"), append=True) + + +def note(text): + display(HTML(f"

{text}

"), append=True) + diff --git a/examples/idna/labels_and_validation/code.py b/examples/idna/labels_and_validation/code.py new file mode 100644 index 0000000..b497248 --- /dev/null +++ b/examples/idna/labels_and_validation/code.py @@ -0,0 +1,68 @@ +# --------------------------------------------------------------------- +# A domain is a sequence of labels separated by dots. The `alabel` and +# `ulabel` helpers operate on a single label at a time, which is handy +# when you're building or inspecting domain names piece by piece. The +# specific exception subclasses tell you *why* a label was rejected. +# --------------------------------------------------------------------- + +heading("Converting individual labels") + +# Build a domain label-by-label. Each call validates one piece. +parts = ["शॉप", "मुंबई", "भारत"] # Hindi: "shop.mumbai.bharat" +encoded_parts = [idna.alabel(p).decode("ascii") for p in parts] +domain_ascii = ".".join(encoded_parts) + +note("Hindi labels encoded individually with idna.alabel:") +rows = ["" + ""] +for unicode_part, ascii_part in zip(parts, encoded_parts): + rows.append( + f"" + ) +rows.append("
Unicode labelA-label
{unicode_part}{ascii_part}
") +display(HTML("".join(rows)), append=True) + +display(HTML(f"

Joined domain: {domain_ascii}

"), append=True) + +# And the round-trip back via ulabel: +recovered = ".".join(idna.ulabel(p) for p in encoded_parts) +display(HTML(f"

Decoded back: {recovered}

"), append=True) + +# --------------------------------------------------------------------- +# Things that should fail -- and the exception types that tell you why. +# --------------------------------------------------------------------- + +heading("How invalid input is rejected") + +bad_inputs = [ + ("hello world", "spaces are not valid in domain labels"), + ("café.com", "U+00E9 is not allowed under strict IDNA 2008"), + ("a" * 64, "labels longer than 63 octets are rejected"), + ("☃.example", "symbols (here, a snowman) are forbidden"), +] + +rows = ["" + "" + ""] +for value, why in bad_inputs: + try: + idna.encode(value) + outcome = "(unexpectedly accepted)" + except idna.IDNAError as err: + # Every idna error inherits from IDNAError; the specific + # subclass narrows down the cause. + outcome = f"{type(err).__name__}: {err}" + shown = value if len(value) < 30 else value[:27] + "..." + rows.append( + f"" + f"" + ) +rows.append("
InputWhy it might failException
{shown}{why}{outcome}
") +display(HTML("".join(rows)), append=True) + +note( + "Catch idna.IDNAError to handle any conversion " + "failure, or catch a specific subclass like " + "InvalidCodepoint or IDNABidiError " + "for finer control." +) diff --git a/examples/idna/labels_and_validation/config.toml b/examples/idna/labels_and_validation/config.toml new file mode 100644 index 0000000..43418a0 --- /dev/null +++ b/examples/idna/labels_and_validation/config.toml @@ -0,0 +1 @@ +packages = ["idna"] diff --git a/examples/idna/labels_and_validation/setup.py b/examples/idna/labels_and_validation/setup.py new file mode 100644 index 0000000..fe19918 --- /dev/null +++ b/examples/idna/labels_and_validation/setup.py @@ -0,0 +1,20 @@ +"""Lighter setup for example 3. No IPython shim; cell 1 already ran.""" +import js +from pyscript import window, HTML, display as _display + +js.alert = window.alert + + +def display(*args, **kwargs): + return _display(*args, **kwargs, target=__pyscript_display_target__) + + +def heading(text, level=2): + display(HTML(f"{text}"), append=True) + + +def note(text): + display(HTML(f"

{text}

"), append=True) + + +import idna diff --git a/examples/idna/order.json b/examples/idna/order.json new file mode 100644 index 0000000..9ffc47f --- /dev/null +++ b/examples/idna/order.json @@ -0,0 +1,5 @@ +[ + "encode_and_decode", + "uts46_mapping", + "labels_and_validation" +] diff --git a/examples/idna/uts46_mapping/code.py b/examples/idna/uts46_mapping/code.py new file mode 100644 index 0000000..ea0a947 --- /dev/null +++ b/examples/idna/uts46_mapping/code.py @@ -0,0 +1,43 @@ +# --------------------------------------------------------------------- +# Real-world input is messy: users type capital letters, paste from +# documents with full-width characters, and so on. Strict IDNA 2008 +# rejects most of this. UTS #46 ("Unicode IDNA Compatibility +# Processing") normalizes input first -- lowercasing, mapping +# compatibility characters -- and then applies IDNA encoding. +# --------------------------------------------------------------------- + +heading("Strict IDNA vs. UTS #46 compatibility mapping") + +messy_inputs = [ + "Königsgäßchen", # Mixed case German + "ΠΑΡΆΔΕΙΓΜΑ.ΕΛ", # Upper-case Greek + "Bücher.DE", # Mixed case with umlaut +] + +rows = ["" + "" + ""] + +for raw in messy_inputs: + # Try the strict path first; capture the error message if it fails. + try: + strict = idna.encode(raw).decode("ascii") + strict_cell = f"{strict}" + except idna.IDNAError as err: + strict_cell = f"{type(err).__name__}" + + # The UTS #46 path lowercases and maps compatibility characters + # before encoding, so it handles user-friendly input gracefully. + lenient = idna.encode(raw, uts46=True).decode("ascii") + rows.append( + f"" + f"" + ) +rows.append("
InputStrict IDNA 2008With uts46=True
{raw}{strict_cell}{lenient}
") +display(HTML("".join(rows)), append=True) + +note( + "Strict mode follows IDNA 2008 verbatim and rejects capital " + "letters and certain compatibility characters. UTS #46 mapping " + "is what most browsers and resolvers actually do." +) diff --git a/examples/idna/uts46_mapping/config.toml b/examples/idna/uts46_mapping/config.toml new file mode 100644 index 0000000..43418a0 --- /dev/null +++ b/examples/idna/uts46_mapping/config.toml @@ -0,0 +1 @@ +packages = ["idna"] diff --git a/examples/idna/uts46_mapping/setup.py b/examples/idna/uts46_mapping/setup.py new file mode 100644 index 0000000..c44e2e5 --- /dev/null +++ b/examples/idna/uts46_mapping/setup.py @@ -0,0 +1,20 @@ +"""Lighter setup for example 2. No IPython shim; cell 1 already ran.""" +import js +from pyscript import window, HTML, display as _display + +js.alert = window.alert + + +def display(*args, **kwargs): + return _display(*args, **kwargs, target=__pyscript_display_target__) + + +def heading(text, level=2): + display(HTML(f"{text}"), append=True) + + +def note(text): + display(HTML(f"

{text}

"), append=True) + + +import idna