pyscript · ntoll · May 29, 2026 · Jun 4, 2026
diff --git a/examples/joblib/README.md b/examples/joblib/README.md
@@ -0,0 +1,18 @@
+# joblib Examples
+
+Each sub-directory contains a self-contained example. The order in
+which the examples are to appear is specified in `order.json` (an
+array of directory names in the expected order).
+
+In each example directory you'll find:
+
+* `config.toml` - must conform to the specification outlined here:
+  https://docs.pyscript.net/latest/user-guide/configuration/ This is
+  parsed and ultimately turned into a JSON representation as part of
+  the package's API object.
+* `setup.py` - Python code for contextual and environmental setup,
+  NOT SEEN BY THE END USER, but is run before the `code.py` code is
+  evaluated. Allows us to create useful (IPython) shims, avoid
+  repeating boilerplate and whatnot.
+* `code.py` - the actual code added to the editor which forms the
+  practical example of using the package.
diff --git a/examples/joblib/dump_and_load/code.py b/examples/joblib/dump_and_load/code.py
@@ -0,0 +1,66 @@
+# ---------------------------------------------------------------------
+# joblib.dump / joblib.load: serialize Python objects (especially
+# ones containing large NumPy arrays) to a single file. This is the
+# canonical way to save trained models or precomputed datasets.
+#
+# See https://joblib.readthedocs.io/en/stable/persistence.html
+# ---------------------------------------------------------------------
+
+import os
+import numpy as np
+import joblib
+
+rng = np.random.default_rng(7)
+
+
+heading("Serializing objects with joblib.dump and joblib.load")
+note(
+    "We'll build a small dictionary holding metadata and a couple "
+    "of NumPy arrays, save it to a file, then load it back and "
+    "compare. <code>joblib.dump</code> handles large arrays "
+    "efficiently and supports compression."
+)
+
+# A made-up "model artifact": some metadata plus learned parameters.
+artifact = {
+    "name": "linear-regressor",
+    "version": 3,
+    "feature_names": ["temperature", "humidity", "wind_speed"],
+    "weights": rng.normal(size=(3,)),
+    "training_samples": rng.normal(size=(1000, 3)),
+}
+
+# Save uncompressed and with compression to compare file sizes.
+joblib.dump(artifact, "artifact.joblib")
+joblib.dump(artifact, "artifact.joblib.gz", compress=("gzip", 3))
+
+uncompressed_size = os.path.getsize("artifact.joblib")
+compressed_size = os.path.getsize("artifact.joblib.gz")
+note(
+    f"Uncompressed file: <strong>{uncompressed_size:,}</strong> bytes. "
+    f"Gzip-compressed (level 3): <strong>{compressed_size:,}</strong> bytes."
+)
+
+# Load the artifact back. joblib auto-detects the compression.
+restored = joblib.load("artifact.joblib.gz")
+
+note(
+    f"Restored name: <code>{restored['name']}</code>, "
+    f"version <code>{restored['version']}</code>, "
+    f"features: <code>{restored['feature_names']}</code>."
+)
+
+weights_match = np.array_equal(restored["weights"], artifact["weights"])
+samples_match = np.array_equal(
+    restored["training_samples"], artifact["training_samples"],
+)
+note(
+    f"Weights round-tripped exactly: <strong>{weights_match}</strong>. "
+    f"Training samples round-tripped exactly: "
+    f"<strong>{samples_match}</strong>."
+)
+
+# joblib.hash gives a stable fingerprint for arbitrary Python objects,
+# which is handy for cache keys and equality checks across processes.
+fingerprint = joblib.hash(artifact)
+note(f"Stable fingerprint of the artifact: <code>{fingerprint}</code>.")
diff --git a/examples/joblib/dump_and_load/config.toml b/examples/joblib/dump_and_load/config.toml
@@ -0,0 +1 @@
+packages = ["joblib", "numpy"]
diff --git a/examples/joblib/dump_and_load/setup.py b/examples/joblib/dump_and_load/setup.py
@@ -0,0 +1,17 @@
+"""Setup for the dump/load example. No IPython shim here."""
+import js
+from pyscript import window, HTML, display as _display
+
+js.alert = window.alert
+
+
+def display(*args, **kwargs):
+    return _display(*args, **kwargs, target=__pyscript_display_target__)
+
+
+def heading(text, level=2):
+    display(HTML(f"<h{level}>{text}</h{level}>"), append=True)
+
+
+def note(text):
+    display(HTML(f"<p>{text}</p>"), append=True)
diff --git a/examples/joblib/memory_caching/code.py b/examples/joblib/memory_caching/code.py
@@ -0,0 +1,69 @@
+"""
+First look at joblib: cache expensive function results to disk
+(in Pyodide, an in-memory virtual file system) so repeated calls
+return instantly.
+
+See https://joblib.readthedocs.io/en/stable/memory.html
+"""
+from IPython.core.display import display, HTML
+
+import time
+import numpy as np
+from joblib import Memory
+
+
+# A Memory object is the entry point for caching. The `location`
+# is a directory where results are persisted; in Pyodide this is
+# the in-browser virtual file system, so caches survive within a
+# session.
+memory = Memory(location="./joblib_cache", verbose=0)
+
+
+@memory.cache
+def slow_square_sum(n):
+    """Pretend-expensive computation: sum of squares up to n."""
+    # Simulate a costly step so the cache benefit is obvious.
+    time.sleep(0.5)
+    arr = np.arange(n, dtype=np.int64)
+    return int((arr * arr).sum())
+
+
+heading("Caching with joblib.Memory")
+note(
+    "We decorate <code>slow_square_sum</code> with "
+    "<code>@memory.cache</code>. The first call computes and "
+    "stores the result; later calls with the same argument are "
+    "served from the cache."
+)
+
+# First call: actually computes (and writes to the cache).
+start = time.perf_counter()
+result_first = slow_square_sum(200_000)
+first_elapsed = time.perf_counter() - start
+
+# Second call with the same input: hits the cache.
+start = time.perf_counter()
+result_cached = slow_square_sum(200_000)
+cached_elapsed = time.perf_counter() - start
+
+# Different input: computes again, populating a new cache entry.
+start = time.perf_counter()
+result_other = slow_square_sum(50_000)
+other_elapsed = time.perf_counter() - start
+
+note(
+    f"First call (n=200,000): result={result_first:,}, "
+    f"took <strong>{first_elapsed:.3f}s</strong>."
+)
+note(
+    f"Repeat call (n=200,000): result={result_cached:,}, "
+    f"took <strong>{cached_elapsed:.3f}s</strong> (cache hit)."
+)
+note(
+    f"New input (n=50,000): result={result_other:,}, "
+    f"took <strong>{other_elapsed:.3f}s</strong>."
+)
+
+# You can wipe the cache when you want to force recomputation.
+memory.clear(warn=False)
+note("Called <code>memory.clear()</code> to remove all cached entries.")
diff --git a/examples/joblib/memory_caching/config.toml b/examples/joblib/memory_caching/config.toml
@@ -0,0 +1 @@
+packages = ["joblib", "numpy"]
diff --git a/examples/joblib/memory_caching/setup.py b/examples/joblib/memory_caching/setup.py
@@ -0,0 +1,40 @@
+"""
+Shim IPython's display API onto PyScript so example code written in a
+Jupyter/IPython idiom runs unmodified in the browser.
+"""
+
+import sys
+import types
+import js
+from pyscript import window, HTML, display as _display
+
+js.alert = window.alert
+
+
+def display(*args, **kwargs):
+    return _display(
+        *args, **kwargs, target=__pyscript_display_target__,
+    )
+
+
+ipython = types.ModuleType("IPython")
+core = types.ModuleType("IPython.core")
+core_display = types.ModuleType("IPython.core.display")
+core_display.display = display
+core_display.HTML = HTML
+ipython.core = core
+core.display = core_display
+ipython.get_ipython = lambda: None
+ipython.display = core_display
+sys.modules["IPython"] = ipython
+sys.modules["IPython.core"] = core
+sys.modules["IPython.core.display"] = core_display
+sys.modules["IPython.display"] = core_display
+
+
+def heading(text, level=2):
+    display(HTML(f"<h{level}>{text}</h{level}>"), append=True)
+
+
+def note(text):
+    display(HTML(f"<p>{text}</p>"), append=True)
diff --git a/examples/joblib/order.json b/examples/joblib/order.json
@@ -0,0 +1,5 @@
+[
+    "memory_caching",
+    "parallel_loops",
+    "dump_and_load"
+]
diff --git a/examples/joblib/parallel_loops/code.py b/examples/joblib/parallel_loops/code.py
@@ -0,0 +1,77 @@
+# ---------------------------------------------------------------------
+# joblib.Parallel: write a parallel loop with the same shape as a
+# sequential one. The recipe is:
+#
+#     Parallel(n_jobs=...)(delayed(func)(arg) for arg in args)
+#
+# `delayed` captures the call without invoking it, and `Parallel`
+# dispatches the captured calls. See:
+# https://joblib.readthedocs.io/en/stable/parallel.html
+# ---------------------------------------------------------------------
+import time
+import math
+import numpy as np
+import matplotlib.pyplot as plt
+from joblib import Parallel, delayed
+
+rng = np.random.default_rng(0)
+
+
+heading("Parallel loops with joblib")
+note(
+    "We'll estimate &pi; with a Monte Carlo simulation, splitting "
+    "the work across several batches. Each batch throws random "
+    "darts at the unit square and counts those landing inside the "
+    "quarter circle of radius 1."
+)
+
+
+def estimate_pi_batch(n_samples, seed):
+    """Return 4 * (fraction of points inside the unit circle)."""
+    local_rng = np.random.default_rng(seed)
+    xs = local_rng.random(n_samples)
+    ys = local_rng.random(n_samples)
+    inside = int(((xs * xs + ys * ys) <= 1.0).sum())
+    return 4.0 * inside / n_samples
+
+
+# Eight batches of 50,000 samples, each with its own seed.
+batch_size = 50_000
+seeds = list(range(8))
+
+# Sequential baseline: a plain list comprehension.
+start = time.perf_counter()
+sequential_estimates = [estimate_pi_batch(batch_size, s) for s in seeds]
+sequential_elapsed = time.perf_counter() - start
+
+# Parallel version: same shape, wrapped in Parallel/delayed.
+# n_jobs=2 keeps the demo lightweight; -1 would use all CPUs.
+start = time.perf_counter()
+parallel_estimates = Parallel(n_jobs=2)(
+    delayed(estimate_pi_batch)(batch_size, s) for s in seeds
+)
+parallel_elapsed = time.perf_counter() - start
+
+combined_pi = float(np.mean(parallel_estimates))
+note(
+    f"Combined estimate of &pi; from {len(seeds)} batches: "
+    f"<strong>{combined_pi:.5f}</strong> "
+    f"(error vs math.pi: {abs(combined_pi - math.pi):.5f})."
+)
+note(
+    f"Sequential loop: {sequential_elapsed:.3f}s. "
+    f"Parallel loop: {parallel_elapsed:.3f}s."
+)
+
+# Plot the per-batch estimates against the true value.
+fig, ax = plt.subplots(figsize=(8, 4))
+ax.plot(seeds, parallel_estimates, "o-", color="steelblue",
+        label="Per-batch estimate")
+ax.axhline(math.pi, color="crimson", linestyle="--",
+           label="math.pi")
+ax.set_xlabel("Batch (seed)")
+ax.set_ylabel("Estimate of \u03c0")
+ax.set_title("Monte Carlo estimates of \u03c0 across parallel batches")
+ax.legend()
+fig.tight_layout()
+display(fig, append=True)
diff --git a/examples/joblib/parallel_loops/config.toml b/examples/joblib/parallel_loops/config.toml
@@ -0,0 +1 @@
+packages = ["joblib", "numpy", "matplotlib"]
diff --git a/examples/joblib/parallel_loops/setup.py b/examples/joblib/parallel_loops/setup.py
@@ -0,0 +1,18 @@
+"""Setup for the Parallel example. No IPython shim here."""
+import js
+from pyscript import window, HTML, display as _display
+
+js.alert = window.alert
+
+
+def display(*args, **kwargs):
+    return _display(*args, **kwargs, target=__pyscript_display_target__)
+
+
+def heading(text, level=2):
+    display(HTML(f"<h{level}>{text}</h{level}>"), append=True)
+
+
+def note(text):
+    display(HTML(f"<p>{text}</p>"), append=True)
+