From d03864c1090ecd5eb2b697ffaeb78bf70c13e283 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tim=20Swe=C3=B1a?= <swast@google.com>
Date: Wed, 24 Jun 2026 21:31:09 +0000
Subject: [PATCH 1/7] chore: add test of all supported BigQuery data types

This should be helpful in more thorough testing of the new compiler.

Towards b/527509188
---
 .../bigframes/tests/data/nested_structs.jsonl |   8 +-
 .../tests/data/nested_structs_schema.json     |  73 ++++++
 packages/bigframes/tests/system/conftest.py   | 219 +++++++++++++++++-
 .../tests/system/small/test_pandas.py         |  78 +++++++
 4 files changed, 370 insertions(+), 8 deletions(-)

diff --git a/packages/bigframes/tests/data/nested_structs.jsonl b/packages/bigframes/tests/data/nested_structs.jsonl
index f57214b0b3c6..97e230c91974 100644
--- a/packages/bigframes/tests/data/nested_structs.jsonl
+++ b/packages/bigframes/tests/data/nested_structs.jsonl
@@ -1,2 +1,6 @@
-{"id": 1,   "person": {"name": "Alice", "age":30,   "address": {"city": "New York", "country": "USA"}}}
-{"id": 2,   "person": {"name": "Bob",   "age":25,   "address": {"city": "London",   "country": "UK"}}}
\ No newline at end of file
+{"id": 1, "person": {"name": "Alice", "age": 30, "address": {"city": "New York", "country": "USA"}}, "bool_col": true, "int64_col": "123456789", "float64_col": 1.25, "string_col": "Hello World", "json_col": {"a": 1, "b": [1, 2]}, "date_col": "2026-06-24", "time_col": "12:34:56.789012", "datetime_col": "2026-06-24 12:34:56.789012", "timestamp_col": "2026-06-24T12:34:56.789012Z", "bytes_col": "SGVsbG8=", "numeric_col": "123456.789", "bignumeric_col": "123456.7890123456789", "geography_col": "POINT(30 10)", "duration_col": "1000"}
+{"id": 2, "person": {"name": "", "age": -1, "address": {"city": "", "country": ""}}, "bool_col": false, "int64_col": "-9223372036854775808", "float64_col": "-Infinity", "string_col": "", "json_col": {}, "date_col": "0001-01-01", "time_col": "00:00:00", "datetime_col": "0001-01-02 00:00:00", "timestamp_col": "0001-01-02T00:00:00Z", "bytes_col": "", "numeric_col": "-99999999999999999999999999999.999999999", "bignumeric_col": "-99999999999999999999999999999999999999.99999999999999999999999999999999999999", "geography_col": "POINT(0 0)", "duration_col": "-9223372036854775"}
+{"id": 3, "person": {"name": "Very Long Name...", "age": 150, "address": {"city": "City", "country": "Country"}}, "bool_col": true, "int64_col": "9223372036854775807", "float64_col": "Infinity", "string_col": "Unicode: 🚀 Spark ✨", "json_col": {"max": true, "nested": {"val": 999}}, "date_col": "9999-12-31", "time_col": "23:59:59.999999", "datetime_col": "9999-12-31 23:59:59.999999", "timestamp_col": "9999-12-31T23:59:59.999999Z", "bytes_col": "dmVyeSBsb25nIGJ5dGVzIHZhbHVl", "numeric_col": "99999999999999999999999999999.999999999", "bignumeric_col": "99999999999999999999999999999999999999.99999999999999999999999999999999999999", "geography_col": "POLYGON((0 0, 10 0, 10 10, 0 10, 0 0))", "duration_col": "9223372036854775"}
+{"id": 4, "person": null, "bool_col": null, "int64_col": null, "float64_col": null, "string_col": null, "date_col": null, "time_col": null, "datetime_col": null, "timestamp_col": null, "bytes_col": null, "numeric_col": null, "bignumeric_col": null, "geography_col": null, "duration_col": null}
+{"id": 5, "person": {"name": "Bob", "age": 0, "address": null}, "bool_col": false, "int64_col": "0", "float64_col": "NaN", "string_col": "Line 1\nLine 2\n\"Quotes\"", "json_col": [1, "two", null], "date_col": "1970-01-01", "time_col": "12:00:00", "datetime_col": "1970-01-01 12:00:00", "timestamp_col": "1970-01-01T12:00:00Z", "bytes_col": "AA==", "numeric_col": "0", "bignumeric_col": "0", "geography_col": "LINESTRING(0 0, 1 1, 2 2)", "duration_col": "0"}
+{"id": 6, "person": null, "bool_col": null, "int64_col": null, "float64_col": null, "string_col": null, "json_col": null, "date_col": null, "time_col": null, "datetime_col": null, "timestamp_col": null, "bytes_col": null, "numeric_col": null, "bignumeric_col": null, "geography_col": null, "duration_col": null}
diff --git a/packages/bigframes/tests/data/nested_structs_schema.json b/packages/bigframes/tests/data/nested_structs_schema.json
index 6692615ceffa..06e4a3e5275c 100644
--- a/packages/bigframes/tests/data/nested_structs_schema.json
+++ b/packages/bigframes/tests/data/nested_structs_schema.json
@@ -7,6 +7,7 @@
     {
         "name": "person",
         "type": "RECORD",
+        "mode": "NULLABLE",
         "fields": [
             {
                 "name": "name",
@@ -21,6 +22,7 @@
             {
                 "name": "address",
                 "type": "RECORD",
+                "mode": "NULLABLE",
                 "fields": [
                     {
                         "name": "city",
@@ -35,5 +37,76 @@
                 ]
             }
         ]
+    },
+    {
+        "name": "bool_col",
+        "type": "BOOLEAN",
+        "mode": "NULLABLE"
+    },
+    {
+        "name": "int64_col",
+        "type": "INTEGER",
+        "mode": "NULLABLE"
+    },
+    {
+        "name": "float64_col",
+        "type": "FLOAT",
+        "mode": "NULLABLE"
+    },
+    {
+        "name": "string_col",
+        "type": "STRING",
+        "mode": "NULLABLE"
+    },
+    {
+        "name": "json_col",
+        "type": "JSON",
+        "mode": "NULLABLE"
+    },
+    {
+        "name": "date_col",
+        "type": "DATE",
+        "mode": "NULLABLE"
+    },
+    {
+        "name": "time_col",
+        "type": "TIME",
+        "mode": "NULLABLE"
+    },
+    {
+        "name": "datetime_col",
+        "type": "DATETIME",
+        "mode": "NULLABLE"
+    },
+    {
+        "name": "timestamp_col",
+        "type": "TIMESTAMP",
+        "mode": "NULLABLE"
+    },
+    {
+        "name": "bytes_col",
+        "type": "BYTES",
+        "mode": "NULLABLE"
+    },
+    {
+        "name": "numeric_col",
+        "type": "NUMERIC",
+        "mode": "NULLABLE"
+    },
+    {
+        "name": "bignumeric_col",
+        "type": "BIGNUMERIC",
+        "mode": "NULLABLE"
+    },
+    {
+        "name": "geography_col",
+        "type": "GEOGRAPHY",
+        "mode": "NULLABLE"
+    },
+    {
+        "name": "duration_col",
+        "type": "INTEGER",
+        "mode": "NULLABLE",
+        "description": "#microseconds"
     }
 ]
diff --git a/packages/bigframes/tests/system/conftest.py b/packages/bigframes/tests/system/conftest.py
index f6fbdd0c510d..230a39a29f87 100644
--- a/packages/bigframes/tests/system/conftest.py
+++ b/packages/bigframes/tests/system/conftest.py
@@ -496,14 +496,221 @@ def nested_structs_df(
 
 @pytest.fixture(scope="session")
 def nested_structs_pandas_df(nested_structs_pandas_type: pd.ArrowDtype) -> pd.DataFrame:
-    """pd.DataFrame pointing at test data."""
+    """pd.DataFrame pointing at test data.
+
+    Manually parses using json.loads to preserve data types.
+    """
+    import base64
+    import datetime
+    import decimal
+    import json
+
+    import db_dtypes
+    import geopandas as gpd
+
+    with open(DATA_DIR / "nested_structs.jsonl") as f:
+        raw_rows = [json.loads(line) for line in f]
+
+    ids = [row["id"] for row in raw_rows]
+
+    def get_val(row, col_name):
+        return row.get(col_name)
+
+    # person
+    person_struct_schema = nested_structs_pandas_type.pyarrow_dtype
+    processed_person = []
+    for row in raw_rows:
+        x = get_val(row, "person")
+        if x is None:
+            processed_person.append(None)
+        else:
+            d = dict(x)
+            if "age" in d and d["age"] is not None:
+                d["age"] = int(d["age"])
+            processed_person.append(d)
+    person_arr = pa.array(processed_person, type=person_struct_schema)
+    person_ser = pd.Series(person_arr, index=ids, dtype=nested_structs_pandas_type)
+
+    # bool_col
+    bool_vals = [
+        bool(get_val(row, "bool_col")) if get_val(row, "bool_col") is not None else None
+        for row in raw_rows
+    ]
+    bool_ser = pd.Series(bool_vals, index=ids, dtype=pd.BooleanDtype())
+
+    # int64_col
+    int64_vals = [
+        int(get_val(row, "int64_col"))
+        if get_val(row, "int64_col") is not None
+        else None
+        for row in raw_rows
+    ]
+    int64_ser = pd.Series(int64_vals, index=ids, dtype=pd.Int64Dtype())
+
+    # float64_col
+    float64_vals = [
+        float(get_val(row, "float64_col"))
+        if get_val(row, "float64_col") is not None
+        else None
+        for row in raw_rows
+    ]
+    arr = pa.array(float64_vals, type=pa.float64())
+    mask = pa.compute.is_null(arr)
+    nonnull = pa.compute.fill_null(arr, float("nan"))
+    pd_array = pd.arrays.FloatingArray(
+        nonnull.to_numpy(zero_copy_only=False),
+        mask.to_numpy(zero_copy_only=False),
+    )
+    float64_ser = pd.Series(pd_array, index=ids, dtype=pd.Float64Dtype())
+
+    # string_col
+    string_vals = [
+        str(get_val(row, "string_col"))
+        if get_val(row, "string_col") is not None
+        else None
+        for row in raw_rows
+    ]
+    string_ser = pd.Series(
+        string_vals, index=ids, dtype=pd.StringDtype(storage="pyarrow")
+    )
 
-    df = pd.read_json(
-        DATA_DIR / "nested_structs.jsonl",
-        lines=True,
+    # json_col
+    json_strs = []
+    for row in raw_rows:
+        if "json_col" not in row:
+            json_strs.append(None)
+        elif row["json_col"] is None:
+            json_strs.append("null")
+        else:
+            json_strs.append(
+                json.dumps(row["json_col"], sort_keys=True, separators=(",", ":"))
+            )
+    json_arr = pa.array(json_strs, type=db_dtypes.JSONArrowType())
+    json_ser = pd.Series(
+        json_arr, index=ids, dtype=pd.ArrowDtype(db_dtypes.JSONArrowType())
+    )
+
+    # date_col
+    date_vals = [
+        datetime.date.fromisoformat(get_val(row, "date_col"))
+        if get_val(row, "date_col") is not None
+        else None
+        for row in raw_rows
+    ]
+    date_arr = pa.array(date_vals, type=pa.date32())
+    date_ser = pd.Series(date_arr, index=ids, dtype=pd.ArrowDtype(pa.date32()))
+
+    # time_col
+    time_vals = [
+        datetime.time.fromisoformat(get_val(row, "time_col"))
+        if get_val(row, "time_col") is not None
+        else None
+        for row in raw_rows
+    ]
+    time_arr = pa.array(time_vals, type=pa.time64("us"))
+    time_ser = pd.Series(time_arr, index=ids, dtype=pd.ArrowDtype(pa.time64("us")))
+
+    # datetime_col
+    datetime_vals = []
+    for row in raw_rows:
+        val = get_val(row, "datetime_col")
+        if val is None:
+            datetime_vals.append(None)
+        else:
+            datetime_vals.append(datetime.datetime.fromisoformat(val.replace(" ", "T")))
+    datetime_arr = pa.array(datetime_vals, type=pa.timestamp("us"))
+    datetime_ser = pd.Series(
+        datetime_arr, index=ids, dtype=pd.ArrowDtype(pa.timestamp("us"))
+    )
+
+    # timestamp_col
+    timestamp_vals = [
+        datetime.datetime.fromisoformat(get_val(row, "timestamp_col"))
+        if get_val(row, "timestamp_col") is not None
+        else None
+        for row in raw_rows
+    ]
+    timestamp_arr = pa.array(timestamp_vals, type=pa.timestamp("us", tz="UTC"))
+    timestamp_ser = pd.Series(
+        timestamp_arr, index=ids, dtype=pd.ArrowDtype(pa.timestamp("us", tz="UTC"))
+    )
+
+    # bytes_col
+    bytes_vals = []
+    for row in raw_rows:
+        val = get_val(row, "bytes_col")
+        if val is None:
+            bytes_vals.append(None)
+        elif val == "":
+            bytes_vals.append(b"")
+        else:
+            bytes_vals.append(base64.b64decode(val))
+    bytes_arr = pa.array(bytes_vals, type=pa.binary())
+    bytes_ser = pd.Series(bytes_arr, index=ids, dtype=pd.ArrowDtype(pa.binary()))
+
+    # numeric_col
+    numeric_vals = [
+        decimal.Decimal(str(get_val(row, "numeric_col")))
+        if get_val(row, "numeric_col") is not None
+        else None
+        for row in raw_rows
+    ]
+    numeric_arr = pa.array(numeric_vals, type=pa.decimal128(38, 9))
+    numeric_ser = pd.Series(
+        numeric_arr, index=ids, dtype=pd.ArrowDtype(pa.decimal128(38, 9))
+    )
+
+    # bignumeric_col
+    bignumeric_vals = [
+        decimal.Decimal(str(get_val(row, "bignumeric_col")))
+        if get_val(row, "bignumeric_col") is not None
+        else None
+        for row in raw_rows
+    ]
+    bignumeric_arr = pa.array(bignumeric_vals, type=pa.decimal256(76, 38))
+    bignumeric_ser = pd.Series(
+        bignumeric_arr, index=ids, dtype=pd.ArrowDtype(pa.decimal256(76, 38))
+    )
+
+    # geography_col
+    geo_vals = [get_val(row, "geography_col") for row in raw_rows]
+    geo_ser = gpd.GeoSeries.from_wkt(geo_vals)
+    geo_ser.index = ids
+
+    # duration_col
+    duration_vals = [
+        int(get_val(row, "duration_col"))
+        if get_val(row, "duration_col") is not None
+        else None
+        for row in raw_rows
+    ]
+    duration_arr = pa.array(duration_vals, type=pa.duration("us"))
+    duration_ser = pd.Series(
+        duration_arr, index=ids, dtype=pd.ArrowDtype(pa.duration("us"))
+    )
+
+    df = pd.DataFrame(
+        {
+            "person": person_ser,
+            "bool_col": bool_ser,
+            "int64_col": int64_ser,
+            "float64_col": float64_ser,
+            "string_col": string_ser,
+            "json_col": json_ser,
+            "date_col": date_ser,
+            "time_col": time_ser,
+            "datetime_col": datetime_ser,
+            "timestamp_col": timestamp_ser,
+            "bytes_col": bytes_ser,
+            "numeric_col": numeric_ser,
+            "bignumeric_col": bignumeric_ser,
+            "geography_col": geo_ser,
+            "duration_col": duration_ser,
+        },
+        index=ids,
     )
-    df = df.set_index("id")
-    df["person"] = df["person"].astype(nested_structs_pandas_type)
+    df.index.name = "id"
+
     return df
 
 
diff --git a/packages/bigframes/tests/system/small/test_pandas.py b/packages/bigframes/tests/system/small/test_pandas.py
index 7581557b0b33..356e498021b9 100644
--- a/packages/bigframes/tests/system/small/test_pandas.py
+++ b/packages/bigframes/tests/system/small/test_pandas.py
@@ -51,6 +51,84 @@ def test_concat_dataframe_w_struct_cols(nested_structs_df, nested_structs_pandas
     pd.testing.assert_frame_equal(bf_result, pd_result)
 
 
+def test_nested_structs_dtypes_and_edge_cases(nested_structs_df):
+    """Explicitly verify dtypes and edge case values for all supported types."""
+    import datetime as dt
+    import decimal
+
+    import numpy as np
+    import pandas as pd
+
+    import bigframes.dtypes as bfd
+
+    # 1. Verify BigFrames dtypes
+    expected_bf_dtypes = {
+        "person": nested_structs_df["person"].dtype,
+        "bool_col": bfd.BOOL_DTYPE,
+        "int64_col": bfd.INT_DTYPE,
+        "float64_col": bfd.FLOAT_DTYPE,
+        "string_col": bfd.STRING_DTYPE,
+        "json_col": bfd.JSON_DTYPE,
+        "date_col": bfd.DATE_DTYPE,
+        "time_col": bfd.TIME_DTYPE,
+        "datetime_col": bfd.DATETIME_DTYPE,
+        "timestamp_col": bfd.TIMESTAMP_DTYPE,
+        "bytes_col": bfd.BYTES_DTYPE,
+        "numeric_col": bfd.NUMERIC_DTYPE,
+        "bignumeric_col": bfd.BIGNUMERIC_DTYPE,
+        "geography_col": bfd.GEO_DTYPE,
+        "duration_col": bfd.TIMEDELTA_DTYPE,
+    }
+
+    for col_name, expected_dtype in expected_bf_dtypes.items():
+        assert nested_structs_df[col_name].dtype == expected_dtype, (
+            f"Dtype mismatch for {col_name}"
+        )
+
+    # 2. Convert to pandas for value assertions
+    pd_df = nested_structs_df.to_pandas()
+
+    # Verify we have 6 rows
+    assert len(pd_df) == 6
+
+    # Row 1: Normal typical values
+    assert pd_df.loc[1, "bool_col"] == True
+    assert pd_df.loc[1, "int64_col"] == 123456789
+    assert pd_df.loc[1, "float64_col"] == 1.25
+    assert pd_df.loc[1, "string_col"] == "Hello World"
+    assert pd_df.loc[1, "json_col"] == '{"a":1,"b":[1,2]}'
+    assert pd_df.loc[1, "date_col"] == dt.date(2026, 6, 24)
+
+    # Row 2: Min bounds / negative infinity
+    assert pd_df.loc[2, "int64_col"] == -9223372036854775808
+    assert pd_df.loc[2, "float64_col"] == float("-inf")
+    assert pd_df.loc[2, "numeric_col"] == decimal.Decimal(
+        "-99999999999999999999999999999.999999999"
+    )
+
+    # Row 3: Max bounds / infinity
+    assert pd_df.loc[3, "int64_col"] == 9223372036854775807
+    assert pd_df.loc[3, "float64_col"] == float("inf")
+
+    # Row 4: SQL NULLs (omitted keys)
+    assert pd.isna(pd_df.loc[4, "bool_col"])
+    assert pd.isna(pd_df.loc[4, "int64_col"])
+    assert pd.isna(pd_df.loc[4, "float64_col"])
+    assert pd.isna(pd_df.loc[4, "json_col"])
+    assert pd.isna(pd_df.loc[4, "geography_col"])
+
+    # Row 5: Special edge cases (NaN, empty, multiline)
+    assert np.isnan(pd_df.loc[5, "float64_col"])
+    assert pd_df.loc[5, "float64_col"] is not pd.NA
+    assert not pd_df["float64_col"].isna().loc[5]
+    assert pd_df.loc[5, "string_col"] == 'Line 1\nLine 2\n"Quotes"'
+    assert pd_df.loc[5, "bytes_col"] == b"\x00"
+
+    # Row 6: JSON null literal
+    assert pd_df.loc[6, "json_col"] == "null"
+    assert not pd_df["json_col"].isna().loc[6]
+
+
 def test_concat_series(scalars_dfs):
     scalars_df, scalars_pandas_df = scalars_dfs
     bf_result = bpd.concat(

From a5fa941d692a193271f585b538a4d88a25105cc1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= <swast@google.com>
Date: Fri, 26 Jun 2026 09:52:24 -0500
Subject: [PATCH 2/7] Update packages/bigframes/tests/system/conftest.py

Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
---
 packages/bigframes/tests/system/conftest.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/packages/bigframes/tests/system/conftest.py b/packages/bigframes/tests/system/conftest.py
index 230a39a29f87..a50f4fa7940b 100644
--- a/packages/bigframes/tests/system/conftest.py
+++ b/packages/bigframes/tests/system/conftest.py
@@ -625,7 +625,7 @@ def get_val(row, col_name):
 
     # timestamp_col
     timestamp_vals = [
-        datetime.datetime.fromisoformat(get_val(row, "timestamp_col"))
+        datetime.datetime.fromisoformat(get_val(row, "timestamp_col").replace("Z", "+00:00"))
         if get_val(row, "timestamp_col") is not None
         else None
         for row in raw_rows

From a6a241fdbc6cc2be393854d149b5e6de539c1ecb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tim=20Swe=C3=B1a?= <swast@google.com>
Date: Fri, 26 Jun 2026 16:57:38 +0000
Subject: [PATCH 3/7] fix typing

---
 packages/bigframes/tests/system/conftest.py | 43 +++++++++------------
 1 file changed, 18 insertions(+), 25 deletions(-)

diff --git a/packages/bigframes/tests/system/conftest.py b/packages/bigframes/tests/system/conftest.py
index 230a39a29f87..51753022d608 100644
--- a/packages/bigframes/tests/system/conftest.py
+++ b/packages/bigframes/tests/system/conftest.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import datetime
 import hashlib
 import logging
 import math
@@ -19,7 +20,6 @@
 import textwrap
 import traceback
 import typing
-from datetime import datetime
 from typing import Dict, Generator, Optional
 
 import fsspec  # type: ignore[import-untyped]
@@ -501,12 +501,11 @@ def nested_structs_pandas_df(nested_structs_pandas_type: pd.ArrowDtype) -> pd.Da
     Manually parses using json.loads to preserve data types.
     """
     import base64
-    import datetime
     import decimal
     import json
 
-    import db_dtypes
-    import geopandas as gpd
+    import db_dtypes  # type: ignore[import-untyped]
+    import geopandas as gpd  # type: ignore[import-untyped]
 
     with open(DATA_DIR / "nested_structs.jsonl") as f:
         raw_rows = [json.loads(line) for line in f]
@@ -518,7 +517,7 @@ def get_val(row, col_name):
 
     # person
     person_struct_schema = nested_structs_pandas_type.pyarrow_dtype
-    processed_person = []
+    processed_person: list[Optional[dict[str, typing.Any]]] = []
     for row in raw_rows:
         x = get_val(row, "person")
         if x is None:
@@ -555,13 +554,7 @@ def get_val(row, col_name):
         for row in raw_rows
     ]
     arr = pa.array(float64_vals, type=pa.float64())
-    mask = pa.compute.is_null(arr)
-    nonnull = pa.compute.fill_null(arr, float("nan"))
-    pd_array = pd.arrays.FloatingArray(
-        nonnull.to_numpy(zero_copy_only=False),
-        mask.to_numpy(zero_copy_only=False),
-    )
-    float64_ser = pd.Series(pd_array, index=ids, dtype=pd.Float64Dtype())
+    float64_ser = pd.Series(arr, index=ids, dtype=pd.Float64Dtype())
 
     # string_col
     string_vals = [
@@ -575,7 +568,7 @@ def get_val(row, col_name):
     )
 
     # json_col
-    json_strs = []
+    json_strs: list[Optional[str]] = []
     for row in raw_rows:
         if "json_col" not in row:
             json_strs.append(None)
@@ -611,7 +604,7 @@ def get_val(row, col_name):
     time_ser = pd.Series(time_arr, index=ids, dtype=pd.ArrowDtype(pa.time64("us")))
 
     # datetime_col
-    datetime_vals = []
+    datetime_vals: list[Optional[datetime.datetime]] = []
     for row in raw_rows:
         val = get_val(row, "datetime_col")
         if val is None:
@@ -636,7 +629,7 @@ def get_val(row, col_name):
     )
 
     # bytes_col
-    bytes_vals = []
+    bytes_vals: list[Optional[bytes]] = []
     for row in raw_rows:
         val = get_val(row, "bytes_col")
         if val is None:
@@ -1041,9 +1034,9 @@ def new_time_series_pandas_df():
     return pd.DataFrame(
         {
             "parsed_date": [
-                datetime(2017, 8, 2, tzinfo=utc),
-                datetime(2017, 8, 3, tzinfo=utc),
-                datetime(2017, 8, 4, tzinfo=utc),
+                datetime.datetime(2017, 8, 2, tzinfo=utc),
+                datetime.datetime(2017, 8, 3, tzinfo=utc),
+                datetime.datetime(2017, 8, 4, tzinfo=utc),
             ],
             "total_visits": [2500, 2500, 2500],
         }
@@ -1062,12 +1055,12 @@ def new_time_series_pandas_df_w_id():
     return pd.DataFrame(
         {
             "parsed_date": [
-                datetime(2017, 8, 2, tzinfo=utc),
-                datetime(2017, 8, 2, tzinfo=utc),
-                datetime(2017, 8, 3, tzinfo=utc),
-                datetime(2017, 8, 3, tzinfo=utc),
-                datetime(2017, 8, 4, tzinfo=utc),
-                datetime(2017, 8, 4, tzinfo=utc),
+                datetime.datetime(2017, 8, 2, tzinfo=utc),
+                datetime.datetime(2017, 8, 2, tzinfo=utc),
+                datetime.datetime(2017, 8, 3, tzinfo=utc),
+                datetime.datetime(2017, 8, 3, tzinfo=utc),
+                datetime.datetime(2017, 8, 4, tzinfo=utc),
+                datetime.datetime(2017, 8, 4, tzinfo=utc),
             ],
             "id": ["1", "2", "1", "2", "1", "2"],
             "total_visits": [2500, 2500, 2500, 2500, 2500, 2500],
@@ -1680,7 +1673,7 @@ def cleanup_cloud_functions(session, cloudfunctions_client, dataset_id_permanent
                 continue
 
             # Ignore the functions less than one day old
-            age = datetime.now() - datetime.fromtimestamp(
+            age = datetime.datetime.now() - datetime.datetime.fromtimestamp(
                 cloud_function.update_time.timestamp()
             )
             if age.days <= 0:

From 4000b9f86408101cd9713f82ce171cac3f278ed5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tim=20Swe=C3=B1a?= <swast@google.com>
Date: Fri, 26 Jun 2026 16:58:52 +0000
Subject: [PATCH 4/7] fix lint

---
 packages/bigframes/tests/system/conftest.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/packages/bigframes/tests/system/conftest.py b/packages/bigframes/tests/system/conftest.py
index 375864ba41be..37d56047513e 100644
--- a/packages/bigframes/tests/system/conftest.py
+++ b/packages/bigframes/tests/system/conftest.py
@@ -618,7 +618,9 @@ def get_val(row, col_name):
 
     # timestamp_col
     timestamp_vals = [
-        datetime.datetime.fromisoformat(get_val(row, "timestamp_col").replace("Z", "+00:00"))
+        datetime.datetime.fromisoformat(
+            get_val(row, "timestamp_col").replace("Z", "+00:00")
+        )
         if get_val(row, "timestamp_col") is not None
         else None
         for row in raw_rows

From 9393164c041329f1f5c3a5d06dbba5b28c391d1f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tim=20Swe=C3=B1a?= <swast@google.com>
Date: Fri, 26 Jun 2026 18:47:42 +0000
Subject: [PATCH 5/7] fix unit tests

---
 packages/bigframes/tests/unit/conftest.py     |  11 ++
 .../out.sql                                   | 130 +++++++++++++++++-
 2 files changed, 135 insertions(+), 6 deletions(-)

diff --git a/packages/bigframes/tests/unit/conftest.py b/packages/bigframes/tests/unit/conftest.py
index 3ab217cf09ba..34a580c9374d 100644
--- a/packages/bigframes/tests/unit/conftest.py
+++ b/packages/bigframes/tests/unit/conftest.py
@@ -178,6 +178,9 @@ def nested_structs_types_df(compiler_session_w_nested_structs_types) -> bpd.Data
 def nested_structs_pandas_df() -> pd.DataFrame:
     """Returns a pandas DataFrame containing STRUCT types and using the `id`
     column as the index."""
+    import json
+
+    import numpy as np
 
     df = pd.read_json(
         DATA_DIR / "nested_structs.jsonl",
@@ -196,6 +199,14 @@ def nested_structs_pandas_df() -> pd.DataFrame:
         ]
     )
     df["person"] = df["person"].astype(pd.ArrowDtype(person_struct_schema))
+
+    def to_json_str(val):
+        if val is None or (isinstance(val, float) and np.isnan(val)):
+            return None
+        return json.dumps(val)
+
+    df["json_col"] = df["json_col"].apply(to_json_str).astype(dtypes.JSON_DTYPE)
+
     return df
 
 
diff --git a/packages/bigframes/tests/unit/core/compile/sqlglot/snapshots/test_compile_readlocal/test_compile_readlocal_w_structs_df/out.sql b/packages/bigframes/tests/unit/core/compile/sqlglot/snapshots/test_compile_readlocal/test_compile_readlocal_w_structs_df/out.sql
index 7ded9cf5fff7..46f362a708b9 100644
--- a/packages/bigframes/tests/unit/core/compile/sqlglot/snapshots/test_compile_readlocal/test_compile_readlocal_w_structs_df/out.sql
+++ b/packages/bigframes/tests/unit/core/compile/sqlglot/snapshots/test_compile_readlocal/test_compile_readlocal_w_structs_df/out.sql
@@ -1,27 +1,145 @@
 WITH `bfcte_0` AS (
   SELECT
     *
-  FROM UNNEST(ARRAY<STRUCT<`bfcol_0` INT64, `bfcol_1` STRUCT<name STRING, age INT64, address STRUCT<city STRING, country STRING>>, `bfcol_2` INT64>>[STRUCT(
+  FROM UNNEST(ARRAY<STRUCT<`bfcol_0` INT64, `bfcol_1` STRUCT<name STRING, age INT64, address STRUCT<city STRING, country STRING>>, `bfcol_2` FLOAT64, `bfcol_3` FLOAT64, `bfcol_4` FLOAT64, `bfcol_5` STRING, `bfcol_6` JSON, `bfcol_7` STRING, `bfcol_8` STRING, `bfcol_9` STRING, `bfcol_10` TIMESTAMP, `bfcol_11` STRING, `bfcol_12` FLOAT64, `bfcol_13` FLOAT64, `bfcol_14` STRING, `bfcol_15` FLOAT64, `bfcol_16` INT64>>[STRUCT(
     1,
     STRUCT(
       'Alice' AS `name`,
       30 AS `age`,
       STRUCT('New York' AS `city`, 'USA' AS `country`) AS `address`
     ),
+    1.0,
+    123456789.0,
+    1.25,
+    'Hello World',
+    PARSE_JSON('{"a":1,"b":[1,2]}'),
+    '2026-06-24',
+    '12:34:56.789012',
+    '2026-06-24 12:34:56.789012',
+    CAST('2026-06-24T12:34:56.789012+00:00' AS TIMESTAMP),
+    'SGVsbG8=',
+    123456.789,
+    123456.78901234567,
+    'POINT(30 10)',
+    1000.0,
     0
   ), STRUCT(
     2,
+    STRUCT('' AS `name`, -1 AS `age`, STRUCT('' AS `city`, '' AS `country`) AS `address`),
+    0.0,
+    -9.223372036854776e+18,
+    CAST('-Infinity' AS FLOAT64),
+    '',
+    PARSE_JSON('{}'),
+    '0001-01-01',
+    '00:00:00',
+    '0001-01-02 00:00:00',
+    CAST('0001-01-02T00:00:00+00:00' AS TIMESTAMP),
+    '',
+    -1e+29,
+    -1e+38,
+    'POINT(0 0)',
+    -9223372036854776.0,
+    1
+  ), STRUCT(
+    3,
+    STRUCT(
+      'Very Long Name...' AS `name`,
+      150 AS `age`,
+      STRUCT('City' AS `city`, 'Country' AS `country`) AS `address`
+    ),
+    1.0,
+    9.223372036854776e+18,
+    CAST('Infinity' AS FLOAT64),
+    'Unicode: 🚀 Spark ✨',
+    PARSE_JSON('{"max":true,"nested":{"val":999}}'),
+    '9999-12-31',
+    '23:59:59.999999',
+    '9999-12-31 23:59:59.999999',
+    CAST('9999-12-31T23:59:59.999999+00:00' AS TIMESTAMP),
+    'dmVyeSBsb25nIGJ5dGVzIHZhbHVl',
+    1e+29,
+    1e+38,
+    'POLYGON((0 0, 10 0, 10 10, 0 10, 0 0))',
+    9223372036854776.0,
+    2
+  ), STRUCT(
+    4,
+    CAST(NULL AS STRUCT<name STRING, age INT64, address STRUCT<city STRING, country STRING>>),
+    CAST(NULL AS FLOAT64),
+    CAST(NULL AS FLOAT64),
+    CAST(NULL AS FLOAT64),
+    CAST(NULL AS STRING),
+    CAST(NULL AS JSON),
+    CAST(NULL AS STRING),
+    CAST(NULL AS STRING),
+    CAST(NULL AS STRING),
+    CAST(NULL AS TIMESTAMP),
+    CAST(NULL AS STRING),
+    CAST(NULL AS FLOAT64),
+    CAST(NULL AS FLOAT64),
+    CAST(NULL AS STRING),
+    CAST(NULL AS FLOAT64),
+    3
+  ), STRUCT(
+    5,
     STRUCT(
       'Bob' AS `name`,
-      25 AS `age`,
-      STRUCT('London' AS `city`, 'UK' AS `country`) AS `address`
+      0 AS `age`,
+      CAST(NULL AS STRUCT<city STRING, country STRING>) AS `address`
     ),
-    1
+    0.0,
+    0.0,
+    CAST(NULL AS FLOAT64),
+    'Line 1\nLine 2\n"Quotes"',
+    PARSE_JSON('[1,"two",null]'),
+    '1970-01-01',
+    '12:00:00',
+    '1970-01-01 12:00:00',
+    CAST('1970-01-01T12:00:00+00:00' AS TIMESTAMP),
+    'AA==',
+    0.0,
+    0.0,
+    'LINESTRING(0 0, 1 1, 2 2)',
+    0.0,
+    4
+  ), STRUCT(
+    6,
+    CAST(NULL AS STRUCT<name STRING, age INT64, address STRUCT<city STRING, country STRING>>),
+    CAST(NULL AS FLOAT64),
+    CAST(NULL AS FLOAT64),
+    CAST(NULL AS FLOAT64),
+    CAST(NULL AS STRING),
+    CAST(NULL AS JSON),
+    CAST(NULL AS STRING),
+    CAST(NULL AS STRING),
+    CAST(NULL AS STRING),
+    CAST(NULL AS TIMESTAMP),
+    CAST(NULL AS STRING),
+    CAST(NULL AS FLOAT64),
+    CAST(NULL AS FLOAT64),
+    CAST(NULL AS STRING),
+    CAST(NULL AS FLOAT64),
+    5
   )])
 )
 SELECT
   `bfcol_0` AS `id`,
-  `bfcol_1` AS `person`
+  `bfcol_1` AS `person`,
+  `bfcol_2` AS `bool_col`,
+  `bfcol_3` AS `int64_col`,
+  `bfcol_4` AS `float64_col`,
+  `bfcol_5` AS `string_col`,
+  `bfcol_6` AS `json_col`,
+  `bfcol_7` AS `date_col`,
+  `bfcol_8` AS `time_col`,
+  `bfcol_9` AS `datetime_col`,
+  `bfcol_10` AS `timestamp_col`,
+  `bfcol_11` AS `bytes_col`,
+  `bfcol_12` AS `numeric_col`,
+  `bfcol_13` AS `bignumeric_col`,
+  `bfcol_14` AS `geography_col`,
+  `bfcol_15` AS `duration_col`
 FROM `bfcte_0`
 ORDER BY
-  `bfcol_2` ASC NULLS LAST
\ No newline at end of file
+  `bfcol_16` ASC NULLS LAST

From 89cadb624ff536e9bac9bc9627fb2f9e9af5cf8f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tim=20Swe=C3=B1a?= <swast@google.com>
Date: Fri, 26 Jun 2026 18:52:35 +0000
Subject: [PATCH 6/7] fix snapshot

---
 .../test_compile_readlocal_w_structs_df/out.sql                 | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/packages/bigframes/tests/unit/core/compile/sqlglot/snapshots/test_compile_readlocal/test_compile_readlocal_w_structs_df/out.sql b/packages/bigframes/tests/unit/core/compile/sqlglot/snapshots/test_compile_readlocal/test_compile_readlocal_w_structs_df/out.sql
index 46f362a708b9..58a01635b7d2 100644
--- a/packages/bigframes/tests/unit/core/compile/sqlglot/snapshots/test_compile_readlocal/test_compile_readlocal_w_structs_df/out.sql
+++ b/packages/bigframes/tests/unit/core/compile/sqlglot/snapshots/test_compile_readlocal/test_compile_readlocal_w_structs_df/out.sql
@@ -142,4 +142,4 @@ SELECT
   `bfcol_15` AS `duration_col`
 FROM `bfcte_0`
 ORDER BY
-  `bfcol_16` ASC NULLS LAST
+  `bfcol_16` ASC NULLS LAST
\ No newline at end of file

From 38f4a5daf858efd66ce9dd44cb6f6edf24427305 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tim=20Swe=C3=B1a?= <swast@google.com>
Date: Fri, 26 Jun 2026 19:58:51 +0000
Subject: [PATCH 7/7] fix unit tests

---
 packages/bigframes/tests/system/conftest.py |  9 +++++++--
 packages/bigframes/tests/unit/conftest.py   | 18 ++++++++++++++++++
 2 files changed, 25 insertions(+), 2 deletions(-)

diff --git a/packages/bigframes/tests/system/conftest.py b/packages/bigframes/tests/system/conftest.py
index 37d56047513e..2f68471d176b 100644
--- a/packages/bigframes/tests/system/conftest.py
+++ b/packages/bigframes/tests/system/conftest.py
@@ -34,6 +34,7 @@
 import google.cloud.storage as storage  # type: ignore
 import numpy as np
 import pandas as pd
+import pandas.arrays
 import pyarrow as pa
 import pytest
 import pytz
@@ -553,8 +554,12 @@ def get_val(row, col_name):
         else None
         for row in raw_rows
     ]
-    arr = pa.array(float64_vals, type=pa.float64())
-    float64_ser = pd.Series(arr, index=ids, dtype=pd.Float64Dtype())
+    np_vals = np.array(
+        [x if x is not None else np.nan for x in float64_vals], dtype=np.float64
+    )
+    mask = np.array([x is None for x in float64_vals], dtype=bool)
+    float64_arr = pd.arrays.FloatingArray(np_vals, mask)  # type: ignore
+    float64_ser = pd.Series(float64_arr, index=ids)
 
     # string_col
     string_vals = [
diff --git a/packages/bigframes/tests/unit/conftest.py b/packages/bigframes/tests/unit/conftest.py
index 34a580c9374d..5a266bcd413a 100644
--- a/packages/bigframes/tests/unit/conftest.py
+++ b/packages/bigframes/tests/unit/conftest.py
@@ -207,6 +207,24 @@ def to_json_str(val):
 
     df["json_col"] = df["json_col"].apply(to_json_str).astype(dtypes.JSON_DTYPE)
 
+    # timestamp_col
+    import datetime
+
+    def parse_timestamp(val):
+        if pd.isna(val):
+            return None
+        if isinstance(val, str):
+            return datetime.datetime.fromisoformat(val.replace("Z", "+00:00"))
+        if hasattr(val, "to_pydatetime"):
+            return val.to_pydatetime()
+        return val
+
+    timestamp_vals = [parse_timestamp(x) for x in df["timestamp_col"]]
+    timestamp_arr = pa.array(timestamp_vals, type=dtypes.TIMESTAMP_DTYPE.pyarrow_dtype)
+    df["timestamp_col"] = pd.Series(
+        timestamp_arr, index=df.index, dtype=dtypes.TIMESTAMP_DTYPE
+    )
+
     return df