diff --git a/examples/idna/README.md b/examples/idna/README.md
new file mode 100644
index 0000000..b8d37f4
--- /dev/null
+++ b/examples/idna/README.md
@@ -0,0 +1,18 @@
+# idna Examples
+
+Each sub-directory contains a self-contained example. The order in
+which the examples are to appear is specified in `order.json` (an
+array of directory names in the expected order).
+
+In each example directory you'll find:
+
+* `config.toml` - must conform to the specification outlined here:
+ https://docs.pyscript.net/latest/user-guide/configuration/ This is
+ parsed and ultimately turned into a JSON representation as part of
+ the package's API object.
+* `setup.py` - Python code for contextual and environmental setup,
+ NOT SEEN BY THE END USER, but is run before the `code.py` code is
+ evaluated. Allows us to create useful (IPython) shims, avoid
+ repeating boilerplate and whatnot.
+* `code.py` - the actual code added to the editor which forms the
+ practical example of using the package.
diff --git a/examples/idna/encode_and_decode/code.py b/examples/idna/encode_and_decode/code.py
new file mode 100644
index 0000000..9fe3747
--- /dev/null
+++ b/examples/idna/encode_and_decode/code.py
@@ -0,0 +1,45 @@
+"""
+A first look at the `idna` package.
+
+Internationalized domain names use characters outside ASCII (think
+Japanese, Cyrillic, Arabic). DNS itself only speaks ASCII, so each
+Unicode label gets translated into an ASCII-compatible form starting
+with `xn--`. The `idna` package handles that translation in both
+directions, following the modern IDNA 2008 specification.
+
+Docs: https://github.com/kjd/idna
+"""
+from IPython.core.display import display, HTML
+
+import idna
+
+# A small address book of domains in their human-readable Unicode form.
+unicode_domains = [
+ "ドメイン.テスト", # Japanese: "domain.test"
+ "пример.рф", # Russian: "example.rf"
+ "παράδειγμα.δοκιμή", # Greek: "example.test"
+ "例え.テスト", # Japanese mixed scripts
+]
+
+heading("Encoding Unicode domains to ASCII (A-labels)")
+note(
+ "DNS resolvers only understand ASCII, so each Unicode domain "
+ "is encoded into a Punycode form prefixed with xn--."
+)
+
+rows = ["
| Unicode (U-label) | ASCII (A-label) |
|---|---|
| {domain} | {ascii_form} |
idna.decode recovers the original.")
+
+ascii_input = "xn--eckwd4c7c.xn--zckzah"
+recovered = idna.decode(ascii_input)
+display(HTML(
+ f"{ascii_input} → {recovered}
{text}
"), append=True) + diff --git a/examples/idna/labels_and_validation/code.py b/examples/idna/labels_and_validation/code.py new file mode 100644 index 0000000..b497248 --- /dev/null +++ b/examples/idna/labels_and_validation/code.py @@ -0,0 +1,68 @@ +# --------------------------------------------------------------------- +# A domain is a sequence of labels separated by dots. The `alabel` and +# `ulabel` helpers operate on a single label at a time, which is handy +# when you're building or inspecting domain names piece by piece. The +# specific exception subclasses tell you *why* a label was rejected. +# --------------------------------------------------------------------- + +heading("Converting individual labels") + +# Build a domain label-by-label. Each call validates one piece. +parts = ["शॉप", "मुंबई", "भारत"] # Hindi: "shop.mumbai.bharat" +encoded_parts = [idna.alabel(p).decode("ascii") for p in parts] +domain_ascii = ".".join(encoded_parts) + +note("Hindi labels encoded individually withidna.alabel:")
+rows = ["| Unicode label | A-label |
|---|---|
| {unicode_part} | {ascii_part} |
Joined domain: {domain_ascii}
Decoded back: {recovered}
"), append=True) + +# --------------------------------------------------------------------- +# Things that should fail -- and the exception types that tell you why. +# --------------------------------------------------------------------- + +heading("How invalid input is rejected") + +bad_inputs = [ + ("hello world", "spaces are not valid in domain labels"), + ("café.com", "U+00E9 is not allowed under strict IDNA 2008"), + ("a" * 64, "labels longer than 63 octets are rejected"), + ("☃.example", "symbols (here, a snowman) are forbidden"), +] + +rows = ["| Input | Why it might fail | " + "Exception |
|---|---|---|
{shown} | {why} | " + f"{outcome} |
idna.IDNAError to handle any conversion "
+ "failure, or catch a specific subclass like "
+ "InvalidCodepoint or IDNABidiError "
+ "for finer control."
+)
diff --git a/examples/idna/labels_and_validation/config.toml b/examples/idna/labels_and_validation/config.toml
new file mode 100644
index 0000000..43418a0
--- /dev/null
+++ b/examples/idna/labels_and_validation/config.toml
@@ -0,0 +1 @@
+packages = ["idna"]
diff --git a/examples/idna/labels_and_validation/setup.py b/examples/idna/labels_and_validation/setup.py
new file mode 100644
index 0000000..fe19918
--- /dev/null
+++ b/examples/idna/labels_and_validation/setup.py
@@ -0,0 +1,20 @@
+"""Lighter setup for example 3. No IPython shim; cell 1 already ran."""
+import js
+from pyscript import window, HTML, display as _display
+
+js.alert = window.alert
+
+
+def display(*args, **kwargs):
+ return _display(*args, **kwargs, target=__pyscript_display_target__)
+
+
+def heading(text, level=2):
+ display(HTML(f"{text}
"), append=True) + + +import idna diff --git a/examples/idna/order.json b/examples/idna/order.json new file mode 100644 index 0000000..9ffc47f --- /dev/null +++ b/examples/idna/order.json @@ -0,0 +1,5 @@ +[ + "encode_and_decode", + "uts46_mapping", + "labels_and_validation" +] diff --git a/examples/idna/uts46_mapping/code.py b/examples/idna/uts46_mapping/code.py new file mode 100644 index 0000000..ea0a947 --- /dev/null +++ b/examples/idna/uts46_mapping/code.py @@ -0,0 +1,43 @@ +# --------------------------------------------------------------------- +# Real-world input is messy: users type capital letters, paste from +# documents with full-width characters, and so on. Strict IDNA 2008 +# rejects most of this. UTS #46 ("Unicode IDNA Compatibility +# Processing") normalizes input first -- lowercasing, mapping +# compatibility characters -- and then applies IDNA encoding. +# --------------------------------------------------------------------- + +heading("Strict IDNA vs. UTS #46 compatibility mapping") + +messy_inputs = [ + "Königsgäßchen", # Mixed case German + "ΠΑΡΆΔΕΙΓΜΑ.ΕΛ", # Upper-case Greek + "Bücher.DE", # Mixed case with umlaut +] + +rows = ["| Input | Strict IDNA 2008 | " + "With uts46=True |
|---|---|---|
| {raw} | {strict_cell} | " + f"{lenient} |
{text}
"), append=True) + + +import idna