From ab90a52a7a04af832f6650abdf0e1366db11a5ef Mon Sep 17 00:00:00 2001 From: jupblb Date: Thu, 2 Jul 2026 21:43:40 +0200 Subject: [PATCH] Fix duplicate documents in Bazel index aggregation --- .../scip_java/buildtools/BazelBuildTool.kt | 15 +++++++++++++++ .../src/main/resources/scip-java/scip_java.bzl | 7 +++++-- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/scip-java/src/main/kotlin/org/scip_code/scip_java/buildtools/BazelBuildTool.kt b/scip-java/src/main/kotlin/org/scip_code/scip_java/buildtools/BazelBuildTool.kt index 5ed74acb..f27c8af9 100644 --- a/scip-java/src/main/kotlin/org/scip_code/scip_java/buildtools/BazelBuildTool.kt +++ b/scip-java/src/main/kotlin/org/scip_code/scip_java/buildtools/BazelBuildTool.kt @@ -137,6 +137,21 @@ To narrow the set of targets to index or pass additional flags to Bazel, include Files.walkFileTree( bazelOutLink, object : SimpleFileVisitor() { + override fun preVisitDirectory( + dir: Path, + attrs: BasicFileAttributes, + ): FileVisitResult { + // The aspect declares a `.scip-targetroot` directory that + // contains the intermediate per-source `*.scip` shards. Those shards + // are already aggregated into the sibling `.scip` file, so + // including them here would duplicate every document in the index. + return if (dir.fileName.toString().endsWith(".scip-targetroot")) { + FileVisitResult.SKIP_SUBTREE + } else { + FileVisitResult.CONTINUE + } + } + override fun visitFile(file: Path, attrs: BasicFileAttributes): FileVisitResult { if (scipPattern.matches(file)) { val bytes = Files.readAllBytes(file) diff --git a/scip-java/src/main/resources/scip-java/scip_java.bzl b/scip-java/src/main/resources/scip-java/scip_java.bzl index 80390651..54d6ea25 100644 --- a/scip-java/src/main/resources/scip-java/scip_java.bzl +++ b/scip-java/src/main/resources/scip-java/scip_java.bzl @@ -13,9 +13,12 @@ many *.scip (https://github.com/scip-code/scip) files. These files encode information about which symbols are referenced from which locations in your source code. -Use the command below to merge all of these SCIP files into a single index: +Use the command below to merge all of these SCIP files into a single index. +The `*.scip-targetroot` directories are excluded because they contain +intermediate per-source shards that are already aggregated into the sibling +`.scip` files: - find bazel-bin/ -type f -name '*.scip' | xargs cat > index.scip + find bazel-bin/ -type f -name '*.scip' -not -path '*.scip-targetroot/*' | xargs cat > index.scip Use `src code-intel upload` to upload the unified SCIP file to Sourcegraph: