Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions examples/idna/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# idna Examples

Each sub-directory contains a self-contained example. The order in
which the examples are to appear is specified in `order.json` (an
array of directory names in the expected order).

In each example directory you'll find:

* `config.toml` - must conform to the specification outlined here:
https://docs.pyscript.net/latest/user-guide/configuration/ This is
parsed and ultimately turned into a JSON representation as part of
the package's API object.
* `setup.py` - Python code for contextual and environmental setup,
NOT SEEN BY THE END USER, but is run before the `code.py` code is
evaluated. Allows us to create useful (IPython) shims, avoid
repeating boilerplate and whatnot.
* `code.py` - the actual code added to the editor which forms the
practical example of using the package.
45 changes: 45 additions & 0 deletions examples/idna/encode_and_decode/code.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
"""
A first look at the `idna` package.

Internationalized domain names use characters outside ASCII (think
Japanese, Cyrillic, Arabic). DNS itself only speaks ASCII, so each
Unicode label gets translated into an ASCII-compatible form starting
with `xn--`. The `idna` package handles that translation in both
directions, following the modern IDNA 2008 specification.

Docs: https://github.com/kjd/idna
"""
from IPython.core.display import display, HTML

import idna

# A small address book of domains in their human-readable Unicode form.
unicode_domains = [
"ドメイン.テスト", # Japanese: "domain.test"
"пример.рф", # Russian: "example.rf"
"παράδειγμα.δοκιμή", # Greek: "example.test"
"例え.テスト", # Japanese mixed scripts
]

heading("Encoding Unicode domains to ASCII (A-labels)")
note(
"DNS resolvers only understand ASCII, so each Unicode domain "
"is encoded into a Punycode form prefixed with <code>xn--</code>."
)

rows = ["<table border='1' cellpadding='6'><tr>"
"<th>Unicode (U-label)</th><th>ASCII (A-label)</th></tr>"]
for domain in unicode_domains:
ascii_form = idna.encode(domain).decode("ascii")
rows.append(f"<tr><td>{domain}</td><td><code>{ascii_form}</code></td></tr>")
rows.append("</table>")
display(HTML("".join(rows)), append=True)

heading("Decoding ASCII domains back to Unicode")
note("Round-tripping through <code>idna.decode</code> recovers the original.")

ascii_input = "xn--eckwd4c7c.xn--zckzah"
recovered = idna.decode(ascii_input)
display(HTML(
f"<p><code>{ascii_input}</code> &rarr; <strong>{recovered}</strong></p>"
), append=True)
1 change: 1 addition & 0 deletions examples/idna/encode_and_decode/config.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
packages = ["idna"]
37 changes: 37 additions & 0 deletions examples/idna/encode_and_decode/setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
"""Shim setup for the first example. Includes the full IPython shim."""
import sys
import types
import js
from pyscript import window, HTML, display as _display

js.alert = window.alert


def display(*args, **kwargs):
return _display(
*args, **kwargs, target=__pyscript_display_target__,
)


ipython = types.ModuleType("IPython")
core = types.ModuleType("IPython.core")
core_display = types.ModuleType("IPython.core.display")
core_display.display = display
core_display.HTML = HTML
ipython.core = core
core.display = core_display
ipython.get_ipython = lambda: None
ipython.display = core_display
sys.modules["IPython"] = ipython
sys.modules["IPython.core"] = core
sys.modules["IPython.core.display"] = core_display
sys.modules["IPython.display"] = core_display


def heading(text, level=2):
display(HTML(f"<h{level}>{text}</h{level}>"), append=True)


def note(text):
display(HTML(f"<p>{text}</p>"), append=True)

68 changes: 68 additions & 0 deletions examples/idna/labels_and_validation/code.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
# ---------------------------------------------------------------------
# A domain is a sequence of labels separated by dots. The `alabel` and
# `ulabel` helpers operate on a single label at a time, which is handy
# when you're building or inspecting domain names piece by piece. The
# specific exception subclasses tell you *why* a label was rejected.
# ---------------------------------------------------------------------

heading("Converting individual labels")

# Build a domain label-by-label. Each call validates one piece.
parts = ["शॉप", "मुंबई", "भारत"] # Hindi: "shop.mumbai.bharat"
encoded_parts = [idna.alabel(p).decode("ascii") for p in parts]
domain_ascii = ".".join(encoded_parts)

note("Hindi labels encoded individually with <code>idna.alabel</code>:")
rows = ["<table border='1' cellpadding='6'><tr>"
"<th>Unicode label</th><th>A-label</th></tr>"]
for unicode_part, ascii_part in zip(parts, encoded_parts):
rows.append(
f"<tr><td>{unicode_part}</td><td><code>{ascii_part}</code></td></tr>"
)
rows.append("</table>")
display(HTML("".join(rows)), append=True)

display(HTML(f"<p>Joined domain: <code>{domain_ascii}</code></p>"), append=True)

# And the round-trip back via ulabel:
recovered = ".".join(idna.ulabel(p) for p in encoded_parts)
display(HTML(f"<p>Decoded back: <strong>{recovered}</strong></p>"), append=True)

# ---------------------------------------------------------------------
# Things that should fail -- and the exception types that tell you why.
# ---------------------------------------------------------------------

heading("How invalid input is rejected")

bad_inputs = [
("hello world", "spaces are not valid in domain labels"),
("café.com", "U+00E9 is not allowed under strict IDNA 2008"),
("a" * 64, "labels longer than 63 octets are rejected"),
("☃.example", "symbols (here, a snowman) are forbidden"),
]

rows = ["<table border='1' cellpadding='6'><tr>"
"<th>Input</th><th>Why it might fail</th>"
"<th>Exception</th></tr>"]
for value, why in bad_inputs:
try:
idna.encode(value)
outcome = "<em>(unexpectedly accepted)</em>"
except idna.IDNAError as err:
# Every idna error inherits from IDNAError; the specific
# subclass narrows down the cause.
outcome = f"<code>{type(err).__name__}</code>: {err}"
shown = value if len(value) < 30 else value[:27] + "..."
rows.append(
f"<tr><td><code>{shown}</code></td><td>{why}</td>"
f"<td>{outcome}</td></tr>"
)
rows.append("</table>")
display(HTML("".join(rows)), append=True)

note(
"Catch <code>idna.IDNAError</code> to handle any conversion "
"failure, or catch a specific subclass like "
"<code>InvalidCodepoint</code> or <code>IDNABidiError</code> "
"for finer control."
)
1 change: 1 addition & 0 deletions examples/idna/labels_and_validation/config.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
packages = ["idna"]
20 changes: 20 additions & 0 deletions examples/idna/labels_and_validation/setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
"""Lighter setup for example 3. No IPython shim; cell 1 already ran."""
import js
from pyscript import window, HTML, display as _display

js.alert = window.alert


def display(*args, **kwargs):
return _display(*args, **kwargs, target=__pyscript_display_target__)


def heading(text, level=2):
display(HTML(f"<h{level}>{text}</h{level}>"), append=True)


def note(text):
display(HTML(f"<p>{text}</p>"), append=True)


import idna
5 changes: 5 additions & 0 deletions examples/idna/order.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
[
"encode_and_decode",
"uts46_mapping",
"labels_and_validation"
]
43 changes: 43 additions & 0 deletions examples/idna/uts46_mapping/code.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# ---------------------------------------------------------------------
# Real-world input is messy: users type capital letters, paste from
# documents with full-width characters, and so on. Strict IDNA 2008
# rejects most of this. UTS #46 ("Unicode IDNA Compatibility
# Processing") normalizes input first -- lowercasing, mapping
# compatibility characters -- and then applies IDNA encoding.
# ---------------------------------------------------------------------

heading("Strict IDNA vs. UTS #46 compatibility mapping")

messy_inputs = [
"Königsgäßchen", # Mixed case German
"ΠΑΡΆΔΕΙΓΜΑ.ΕΛ", # Upper-case Greek
"Bücher.DE", # Mixed case with umlaut
]

rows = ["<table border='1' cellpadding='6'><tr>"
"<th>Input</th><th>Strict IDNA 2008</th>"
"<th>With <code>uts46=True</code></th></tr>"]

for raw in messy_inputs:
# Try the strict path first; capture the error message if it fails.
try:
strict = idna.encode(raw).decode("ascii")
strict_cell = f"<code>{strict}</code>"
except idna.IDNAError as err:
strict_cell = f"<em style='color:#b00'>{type(err).__name__}</em>"

# The UTS #46 path lowercases and maps compatibility characters
# before encoding, so it handles user-friendly input gracefully.
lenient = idna.encode(raw, uts46=True).decode("ascii")
rows.append(
f"<tr><td>{raw}</td><td>{strict_cell}</td>"
f"<td><code>{lenient}</code></td></tr>"
)
rows.append("</table>")
display(HTML("".join(rows)), append=True)

note(
"Strict mode follows IDNA 2008 verbatim and rejects capital "
"letters and certain compatibility characters. UTS #46 mapping "
"is what most browsers and resolvers actually do."
)
1 change: 1 addition & 0 deletions examples/idna/uts46_mapping/config.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
packages = ["idna"]
20 changes: 20 additions & 0 deletions examples/idna/uts46_mapping/setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
"""Lighter setup for example 2. No IPython shim; cell 1 already ran."""
import js
from pyscript import window, HTML, display as _display

js.alert = window.alert


def display(*args, **kwargs):
return _display(*args, **kwargs, target=__pyscript_display_target__)


def heading(text, level=2):
display(HTML(f"<h{level}>{text}</h{level}>"), append=True)


def note(text):
display(HTML(f"<p>{text}</p>"), append=True)


import idna