diff --git a/packages/google-cloud-bigtable/google/cloud/bigtable/data/row_filters.py b/packages/google-cloud-bigtable/google/cloud/bigtable/data/row_filters.py index 007a09f5f830..9a6511c40818 100644 --- a/packages/google-cloud-bigtable/google/cloud/bigtable/data/row_filters.py +++ b/packages/google-cloud-bigtable/google/cloud/bigtable/data/row_filters.py @@ -484,6 +484,34 @@ def _to_dict(self) -> dict[str, bytes]: return {"value_regex_filter": self.regex} +class ValueBitmaskFilter(RowFilter): + """Row filter for a value bitmask. + + Matches only cells with values that satisfy the condition + ``(value & mask) == mask``. The mask length must exactly match the value + length, otherwise the cell is not considered a match. + + :type mask: bytes or str + :param mask: A bitmask to match against cell values. String values + will be encoded as ASCII. + """ + + def __init__(self, mask: bytes | str): + self.mask: bytes = _to_bytes(mask) + + def __eq__(self, other): + if not isinstance(other, ValueBitmaskFilter): + return NotImplemented + return other.mask == self.mask + + def _to_dict(self) -> dict[str, Any]: + """Converts the row filter to a dict representation.""" + return {"value_bitmask_filter": {"mask": self.mask}} + + def __repr__(self) -> str: + return f"{self.__class__.__name__}(mask={self.mask!r})" + + class LiteralValueFilter(ValueRegexFilter): """Row filter for an exact value. diff --git a/packages/google-cloud-bigtable/tests/system/data/test_system_async.py b/packages/google-cloud-bigtable/tests/system/data/test_system_async.py index b65f05e4bd17..13674995411e 100644 --- a/packages/google-cloud-bigtable/tests/system/data/test_system_async.py +++ b/packages/google-cloud-bigtable/tests/system/data/test_system_async.py @@ -1116,6 +1116,41 @@ async def test_literal_value_filter( f"row {type(cell_value)}({cell_value}) not found with {type(filter_input)}({filter_input}) filter" ) + @pytest.mark.usefixtures("target") + @CrossSync.Retry( + predicate=retry.if_exception_type(ClientError), initial=1, maximum=5 + ) + @pytest.mark.parametrize( + "cell_value,mask,expect_match", + [ + (b"\x01\x02\x03", b"\x01\x02\x03", True), + (b"\x01\x02\x03", b"\x01\x00\x00", True), + (b"\x00\x02\x03", b"\x01\x00\x00", False), + ], + ) + @pytest.mark.skipif( + bool(os.environ.get(BIGTABLE_EMULATOR)), + reason="value_bitmask_filter not supported by emulator", + ) + @CrossSync.pytest + async def test_value_bitmask_filter( + self, target, temp_rows, cell_value, mask, expect_match + ): + """ + ValueBitmaskFilter matches cells where (value & mask) == mask. + Make sure inputs are properly interpreted by the server. + """ + from google.cloud.bigtable.data import ReadRowsQuery + from google.cloud.bigtable.data.row_filters import ValueBitmaskFilter + + f = ValueBitmaskFilter(mask) + await temp_rows.add_row(b"row_key_1", value=cell_value) + query = ReadRowsQuery(row_keys=[b"row_key_1"], row_filter=f) + row_list = await target.read_rows(query) + assert len(row_list) == bool(expect_match), ( + f"row {cell_value!r} not matched as {expect_match} with {mask!r} bitmask filter" + ) + @pytest.mark.skipif( bool(os.environ.get(BIGTABLE_EMULATOR)), reason="emulator doesn't support SQL", diff --git a/packages/google-cloud-bigtable/tests/system/data/test_system_autogen.py b/packages/google-cloud-bigtable/tests/system/data/test_system_autogen.py index c31b2c20a4b8..48474925edb7 100644 --- a/packages/google-cloud-bigtable/tests/system/data/test_system_autogen.py +++ b/packages/google-cloud-bigtable/tests/system/data/test_system_autogen.py @@ -903,6 +903,38 @@ def test_literal_value_filter( f"row {type(cell_value)}({cell_value}) not found with {type(filter_input)}({filter_input}) filter" ) + @pytest.mark.usefixtures("target") + @CrossSync._Sync_Impl.Retry( + predicate=retry.if_exception_type(ClientError), initial=1, maximum=5 + ) + @pytest.mark.parametrize( + "cell_value,mask,expect_match", + [ + (b"\x01\x02\x03", b"\x01\x02\x03", True), + (b"\x01\x02\x03", b"\x01\x00\x00", True), + (b"\x00\x02\x03", b"\x01\x00\x00", False), + ], + ) + @pytest.mark.skipif( + bool(os.environ.get(BIGTABLE_EMULATOR)), + reason="value_bitmask_filter not supported by emulator", + ) + def test_value_bitmask_filter( + self, target, temp_rows, cell_value, mask, expect_match + ): + """ValueBitmaskFilter matches cells where (value & mask) == mask. + Make sure inputs are properly interpreted by the server.""" + from google.cloud.bigtable.data import ReadRowsQuery + from google.cloud.bigtable.data.row_filters import ValueBitmaskFilter + + f = ValueBitmaskFilter(mask) + temp_rows.add_row(b"row_key_1", value=cell_value) + query = ReadRowsQuery(row_keys=[b"row_key_1"], row_filter=f) + row_list = target.read_rows(query) + assert len(row_list) == bool(expect_match), ( + f"row {cell_value!r} not matched as {expect_match} with {mask!r} bitmask filter" + ) + @pytest.mark.skipif( bool(os.environ.get(BIGTABLE_EMULATOR)), reason="emulator doesn't support SQL" ) diff --git a/packages/google-cloud-bigtable/tests/unit/data/test_row_filters.py b/packages/google-cloud-bigtable/tests/unit/data/test_row_filters.py index 6be9b4a2b252..6c7bd84bed80 100644 --- a/packages/google-cloud-bigtable/tests/unit/data/test_row_filters.py +++ b/packages/google-cloud-bigtable/tests/unit/data/test_row_filters.py @@ -1987,6 +1987,26 @@ def test_literal_value__write_literal_regex(input_arg, expected_bytes): assert filter_.regex == expected_bytes +class TestValueBitmaskFilter: + @staticmethod + def _target_class(): + from google.cloud.bigtable.data.row_filters import ValueBitmaskFilter + + return ValueBitmaskFilter + + def test_to_dict(self): + mask = b"\xaa" * 8 + row_filter = self._target_class()(mask) + expected = {"value_bitmask_filter": {"mask": mask}} + assert row_filter._to_dict() == expected + + def test_to_pb(self): + mask = b"\xaa" * 8 + row_filter = self._target_class()(mask) + pb = row_filter._to_pb() + assert pb.value_bitmask_filter.mask == mask + + def _ColumnRangePB(*args, **kw): from google.cloud.bigtable_v2.types import data as data_v2_pb2