From 97d90d2f1412f4348cc98e5d8e6b23418ab460d6 Mon Sep 17 00:00:00 2001 From: Juntao Zhang Date: Sun, 8 Mar 2026 21:53:06 +0800 Subject: [PATCH] [flink] Optimize byte array comparison --- .../paimon/flink/sorter/HilbertSorter.java | 19 +++++++++---------- .../paimon/flink/sorter/ZorderSorter.java | 19 +++++++++---------- 2 files changed, 18 insertions(+), 20 deletions(-) diff --git a/paimon-flink/paimon-flink-common/src/main/java/org/apache/paimon/flink/sorter/HilbertSorter.java b/paimon-flink/paimon-flink-common/src/main/java/org/apache/paimon/flink/sorter/HilbertSorter.java index 65d034ca7061..27867cd20526 100644 --- a/paimon-flink/paimon-flink-common/src/main/java/org/apache/paimon/flink/sorter/HilbertSorter.java +++ b/paimon-flink/paimon-flink-common/src/main/java/org/apache/paimon/flink/sorter/HilbertSorter.java @@ -26,8 +26,7 @@ import org.apache.paimon.types.DataField; import org.apache.paimon.types.DataTypes; import org.apache.paimon.types.RowType; - -import org.apache.paimon.shade.guava30.com.google.common.primitives.UnsignedBytes; +import org.apache.paimon.utils.SortUtil; import org.apache.flink.api.common.typeinfo.TypeInformation; import org.apache.flink.streaming.api.datastream.DataStream; @@ -37,6 +36,8 @@ import java.util.Arrays; import java.util.Collections; +import static org.apache.paimon.utils.Preconditions.checkArgument; + /** * This is a table sorter which will sort the records by the hilbert curve of specified columns. It * works on stream api. It computes the hilbert index by {@link HilbertIndexer}. After add the @@ -82,14 +83,12 @@ private DataStream sortStreamByHilbert( TypeInformation.of(byte[].class), () -> (b1, b2) -> { - assert b1.length == b2.length; - for (int i = 0; i < b1.length; i++) { - int ret = UnsignedBytes.compare(b1[i], b2[i]); - if (ret != 0) { - return ret; - } - } - return 0; + checkArgument( + b1.length == b2.length, + "Hilbert curve keys must have the same length, but got %d and %d", + b1.length, + b2.length); + return SortUtil.compareBinary(b1, b2); }, new SortUtils.KeyAbstract() { @Override diff --git a/paimon-flink/paimon-flink-common/src/main/java/org/apache/paimon/flink/sorter/ZorderSorter.java b/paimon-flink/paimon-flink-common/src/main/java/org/apache/paimon/flink/sorter/ZorderSorter.java index 7688bb609abd..dcb121024f6e 100644 --- a/paimon-flink/paimon-flink-common/src/main/java/org/apache/paimon/flink/sorter/ZorderSorter.java +++ b/paimon-flink/paimon-flink-common/src/main/java/org/apache/paimon/flink/sorter/ZorderSorter.java @@ -26,8 +26,7 @@ import org.apache.paimon.types.DataField; import org.apache.paimon.types.DataTypes; import org.apache.paimon.types.RowType; - -import org.apache.paimon.shade.guava30.com.google.common.primitives.UnsignedBytes; +import org.apache.paimon.utils.SortUtil; import org.apache.flink.api.common.typeinfo.TypeInformation; import org.apache.flink.streaming.api.datastream.DataStream; @@ -37,6 +36,8 @@ import java.util.Arrays; import java.util.Collections; +import static org.apache.paimon.utils.Preconditions.checkArgument; + /** * This is a table sorter which will sort the records by the z-order of specified columns. It works * on stream api. It computes the z-order-index by {@link ZIndexer}. After add the column of @@ -82,14 +83,12 @@ private DataStream sortStreamByZOrder( TypeInformation.of(byte[].class), () -> (b1, b2) -> { - assert b1.length == b2.length; - for (int i = 0; i < b1.length; i++) { - int ret = UnsignedBytes.compare(b1[i], b2[i]); - if (ret != 0) { - return ret; - } - } - return 0; + checkArgument( + b1.length == b2.length, + "Z-order curve keys must have the same length, but got %d and %d", + b1.length, + b2.length); + return SortUtil.compareBinary(b1, b2); }, new SortUtils.KeyAbstract() { @Override