diff --git a/dotCMS/src/enterprise/java/com/dotcms/enterprise/publishing/sitesearch/ESSiteSearchAPI.java b/dotCMS/src/enterprise/java/com/dotcms/enterprise/publishing/sitesearch/ESSiteSearchAPI.java
index 09f1f54c96b7..25ab67a4ecff 100644
--- a/dotCMS/src/enterprise/java/com/dotcms/enterprise/publishing/sitesearch/ESSiteSearchAPI.java
+++ b/dotCMS/src/enterprise/java/com/dotcms/enterprise/publishing/sitesearch/ESSiteSearchAPI.java
@@ -14,6 +14,9 @@
import com.dotcms.content.elasticsearch.business.*;
import com.dotcms.content.elasticsearch.util.RestHighLevelClientProvider;
import com.dotcms.content.index.IndexAPI;
+import com.dotcms.content.index.IndexTag;
+import com.dotcms.content.index.domain.Aggregation;
+import com.dotcms.content.index.domain.DotSearchException;
import com.dotcms.enterprise.LicenseUtil;
import com.dotcms.enterprise.license.LicenseLevel;
import com.dotcms.enterprise.priv.util.SearchSourceBuilderUtil;
@@ -64,7 +67,6 @@
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchHits;
-import org.elasticsearch.search.aggregations.Aggregation;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightField;
@@ -88,7 +90,11 @@ public ESSiteSearchAPI(final IndexAPI indexApi,
}
public ESSiteSearchAPI() {
- this(APILocator.getESIndexAPI(), new ESMappingAPIImpl(), APILocator.getIndiciesAPI());
+ // Use the vendor-specific ESIndexAPI directly (NOT APILocator.getESIndexAPI(), which returns
+ // the phase-aware IndexAPIImpl router). The SiteSearchAPIImpl router is the single fan-out
+ // point for the ES → OS migration; routing index ops through the neutral router here as well
+ // would dual-write a second time and create duplicate OpenSearch indices.
+ this(new ESIndexAPI(), new ESMappingAPIImpl(), APILocator.getIndiciesAPI());
}
/**
@@ -351,7 +357,7 @@ public void deactivateIndex(String indexName) throws DotDataException, IOExcepti
}
@Override
- public synchronized boolean createSiteSearchIndex(String indexName, String alias, int shards) throws ElasticsearchException, IOException {
+ public synchronized boolean createSiteSearchIndex(String indexName, String alias, int shards) throws DotSearchException, IOException {
if(indexName==null){
return false;
}
@@ -379,7 +385,7 @@ public synchronized boolean createSiteSearchIndex(String indexName, String alias
}
if(i++ > 300){
- throw new ElasticsearchException("index timed out creating");
+ throw new DotSearchException("index timed out creating");
}
}
@@ -387,8 +393,12 @@ public synchronized boolean createSiteSearchIndex(String indexName, String alias
indexApi.createAlias(indexName, alias);
}
- //put mappings
- mappingAPI.putMapping(indexName, mapping);
+ // Put mappings on the ES index only. ESMappingAPIImpl.putMapping(String, String) is
+ // phase-dispatched and would fan out to OpenSearch, but SiteSearchAPIImpl is already the
+ // single fan-out point for site search (it invokes OSSiteSearchAPI separately, which owns
+ // its own untagged OS index + mapping). Fanning out here too would re-issue the mapping to
+ // a `.os`-tagged physical name that site-search OS indices never use → HTTP 404. Pin to ES.
+ mappingAPI.putMapping(List.of(indexName), mapping, IndexTag.ES);
return true;
}
@@ -634,7 +644,7 @@ public Map getAggregations ( String indexName, String query
}
if ( indexName == null || !IndexType.SITE_SEARCH.is(indexName) ) {
- throw new ElasticsearchException( indexName + " is not a sitesearch index or alias" );
+ throw new DotSearchException( indexName + " is not a sitesearch index or alias" );
}
//https://github.com/elasticsearch/elasticsearch/issues/2980
@@ -648,10 +658,10 @@ public Map getAggregations ( String indexName, String query
.timeout(TimeValue.timeValueMillis(INDEX_OPERATIONS_TIMEOUT_IN_MS)));
final SearchResponse response = client.search(request, RequestOptions.DEFAULT);
- return response.getAggregations().asMap();
+ return Aggregation.from(response.getAggregations());
} catch ( ElasticsearchException | IOException e ) {
Logger.error( this.getClass(), "Error getting aggregations for query.\n" + e.getMessage(), e );
- throw new ElasticsearchException( "Error getting aggregations for query.\n" + e.getMessage(), e );
+ throw new DotSearchException( "Error getting aggregations for query.\n" + e.getMessage(), e );
}
}
@@ -669,7 +679,7 @@ public Map getFacets ( String indexName, String query ) thr
}
if ( indexName == null || !IndexType.SITE_SEARCH.is(indexName ) ) {
- throw new ElasticsearchException( indexName + " is not a sitesearch index or alias" );
+ throw new DotSearchException( indexName + " is not a sitesearch index or alias" );
}
//https://github.com/elasticsearch/elasticsearch/issues/2980
@@ -683,10 +693,10 @@ public Map getFacets ( String indexName, String query ) thr
.timeout(TimeValue.timeValueMillis(INDEX_OPERATIONS_TIMEOUT_IN_MS)));
final SearchResponse response = client.search(request, RequestOptions.DEFAULT);
- return response.getAggregations().asMap();
+ return Aggregation.from(response.getAggregations());
} catch ( ElasticsearchException | IOException e ) {
Logger.error( this.getClass(), "Error getting Facets for query.\n" + e.getMessage(), e );
- throw new ElasticsearchException( "Error getting Facets for query.\n" + e.getMessage(), e );
+ throw new DotSearchException( "Error getting Facets for query.\n" + e.getMessage(), e );
}
}
diff --git a/dotCMS/src/enterprise/java/com/dotcms/enterprise/publishing/sitesearch/OSSiteSearchAPI.java b/dotCMS/src/enterprise/java/com/dotcms/enterprise/publishing/sitesearch/OSSiteSearchAPI.java
new file mode 100644
index 000000000000..836b56b575ee
--- /dev/null
+++ b/dotCMS/src/enterprise/java/com/dotcms/enterprise/publishing/sitesearch/OSSiteSearchAPI.java
@@ -0,0 +1,910 @@
+/*
+*
+* Copyright (c) 2025 dotCMS LLC
+* Use of this software is governed by the Business Source License included
+* in the LICENSE file found at in the root directory of software.
+* SPDX-License-Identifier: BUSL-1.1
+*
+*/
+
+package com.dotcms.enterprise.publishing.sitesearch;
+
+import com.dotcms.cdi.CDIUtils;
+import com.dotcms.content.elasticsearch.business.ContentletIndexAPIImpl;
+import com.dotcms.content.elasticsearch.business.ESMappingAPIImpl;
+import com.dotcms.content.elasticsearch.business.IndexType;
+import com.dotcms.content.index.IndexAPI;
+import com.dotcms.content.index.IndexTag;
+import com.dotcms.content.index.VersionedIndices;
+import com.dotcms.content.index.VersionedIndicesAPI;
+import com.dotcms.content.index.VersionedIndicesImpl;
+import com.dotcms.content.index.domain.Aggregation;
+import com.dotcms.content.index.domain.ContentSearchResponse;
+import com.dotcms.content.index.domain.DotSearchException;
+import com.dotcms.content.index.domain.SearchHit;
+import com.dotcms.content.index.domain.SearchHits;
+import com.dotcms.content.index.opensearch.OSClientProvider;
+import com.dotcms.content.index.opensearch.OSIndexAPIImpl;
+import com.dotcms.enterprise.LicenseUtil;
+import com.dotcms.enterprise.license.LicenseLevel;
+import com.dotcms.publishing.job.SiteSearchJobProxy;
+import com.dotmarketing.business.APILocator;
+import com.dotmarketing.exception.DotDataException;
+import com.dotmarketing.exception.DotRuntimeException;
+import com.dotmarketing.quartz.CronScheduledTask;
+import com.dotmarketing.quartz.QuartzUtils;
+import com.dotmarketing.quartz.ScheduledTask;
+import com.dotmarketing.quartz.TaskRuntimeValues;
+import com.dotmarketing.sitesearch.business.SiteSearchAPI;
+import com.dotmarketing.util.Logger;
+import com.dotmarketing.util.StringUtils;
+import com.dotmarketing.util.UUIDGenerator;
+import com.dotmarketing.util.UtilMethods;
+import com.dotmarketing.util.json.JSONArray;
+import com.dotmarketing.util.json.JSONException;
+import com.dotmarketing.util.json.JSONObject;
+import com.google.common.annotations.VisibleForTesting;
+import io.vavr.control.Try;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.charset.StandardCharsets;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.time.Duration;
+import java.time.Instant;
+import java.util.ArrayList;
+import java.util.Calendar;
+import java.util.Collections;
+import java.util.Date;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+import java.util.stream.Collectors;
+import javax.enterprise.context.ApplicationScoped;
+import javax.enterprise.inject.Default;
+import javax.inject.Inject;
+import org.opensearch.client.json.JsonpDeserializer;
+import org.opensearch.client.json.JsonpMapper;
+import org.opensearch.client.opensearch.OpenSearchClient;
+import org.opensearch.client.opensearch.core.GetResponse;
+import org.opensearch.client.opensearch.core.SearchResponse;
+import org.opensearch.client.opensearch.generic.Bodies;
+import org.opensearch.client.opensearch.generic.Body;
+import org.opensearch.client.opensearch.generic.Requests;
+import org.opensearch.client.opensearch.generic.Response;
+import org.quartz.SchedulerException;
+
+/**
+ * OpenSearch implementation of {@link SiteSearchAPI}.
+ *
+ *
Vendor-specific counterpart to {@link ESSiteSearchAPI}. The two implementations are kept
+ * functionally symmetric and are selected at runtime by the {@link SiteSearchAPIImpl} router based
+ * on the migration phase. This class confines every {@code org.opensearch.*} type to its private
+ * helpers — the {@link SiteSearchAPI} contract it implements is vendor-neutral.
+ *
+ *
Index source of truth
+ *
Where {@link ESSiteSearchAPI} reads the active site-search index name from the legacy
+ * {@code IndiciesAPI}, this class uses {@link VersionedIndicesAPI} — the canonical OpenSearch index
+ * registry — via the {@code siteSearch} slot of the default ({@link VersionedIndices#OPENSEARCH_3X})
+ * versioned indices. Index lifecycle operations (create/list/delete/alias) are delegated to
+ * the OpenSearch {@link IndexAPI} provider ({@link OSIndexAPIImpl}) directly rather than the neutral
+ * router, because the {@link SiteSearchAPIImpl} router is already the single phase-aware fan-out point
+ * — routing through the neutral {@code IndexAPI} router here would dual-write a second time.
+ *
+ *
Index naming
+ *
Site-search index names are handled as plain logical names (e.g. {@code sitesearch_1718000000000}),
+ * exactly as in {@link ESSiteSearchAPI}: the cluster-id prefix is added only when a name reaches the
+ * OpenSearch client (via {@link IndexAPI#getNameWithClusterIDPrefix(String)}). The {@code .os}
+ * {@link com.dotcms.content.index.IndexTag} is intentionally not applied to site-search indices —
+ * production ES and OS run on separate clusters, and the site-search pointer lives in its own
+ * {@code siteSearch} slot, so there is no shared-name collision to disambiguate.
+ * TODO OS: revisit if single-cluster dual-write of site-search is ever required (then tag with
+ * {@code IndexTag.OS}).
+ *
+ * @author Fabrizio Araya
+ * @see ESSiteSearchAPI
+ * @see SiteSearchAPIImpl
+ * @see com.dotcms.content.index.opensearch.OSSearchAPIImpl
+ */
+@ApplicationScoped
+@Default
+public class OSSiteSearchAPI implements SiteSearchAPI {
+
+ /**
+ * Response deserializer with {@code TDocument} bound to {@code Object} (JSON objects become
+ * {@code Map}). The query body is sent through the low-level (generic) client so nested
+ * sub-aggregations are preserved; the bare {@code SearchResponse._DESERIALIZER} has no document
+ * deserializer bound and would fail on a hit carrying a {@code _source}. Mirrors
+ * {@link com.dotcms.content.index.opensearch.OSSearchAPIImpl}.
+ */
+ private static final JsonpDeserializer> SEARCH_RESPONSE_DESERIALIZER =
+ SearchResponse.createSearchResponseDeserializer(JsonpDeserializer.of(Object.class));
+
+ private final OSClientProvider clientProvider;
+ private final IndexAPI indexApi;
+
+ /** CDI-managed constructor. */
+ @Inject
+ public OSSiteSearchAPI() {
+ this(CDIUtils.getBeanThrows(OSClientProvider.class),
+ CDIUtils.getBeanThrows(OSIndexAPIImpl.class));
+ }
+
+ /** Package-private constructor for testing. */
+ @VisibleForTesting
+ OSSiteSearchAPI(final OSClientProvider clientProvider,
+ final IndexAPI indexApi) {
+ this.clientProvider = clientProvider;
+ this.indexApi = indexApi;
+ }
+
+ // =========================================================================
+ // Index listing
+ // =========================================================================
+
+ @Override
+ public List listIndices() {
+ if (LicenseUtil.getLevel() < LicenseLevel.STANDARD.level) {
+ return Collections.emptyList();
+ }
+ final List indices = indexApi.listIndices().stream()
+ .filter(IndexType.SITE_SEARCH::is)
+ .collect(Collectors.toList());
+
+ Collections.sort(indices);
+ Collections.reverse(indices);
+ setDefaultToSpecificPosition(indices, 0);
+ return indices;
+ }
+
+ /**
+ * Moves the active (default) site-search index to {@code indexPosition} of the list, mirroring
+ * {@link ESSiteSearchAPI} but resolving the default from {@link VersionedIndicesAPI}.
+ */
+ private void setDefaultToSpecificPosition(final List list, final int indexPosition) {
+ if (list == null || list.size() <= 1) {
+ return;
+ }
+ final String defaultIndice = defaultSiteSearchIndex().orElse(null);
+ if (UtilMethods.isSet(defaultIndice) && !list.isEmpty()) {
+ final int index = list.indexOf(defaultIndice);
+ if (index < 0) {
+ Logger.warn(this.getClass(), String.format(
+ "The default site search '%s' index was not found in the list of indices.",
+ defaultIndice));
+ } else {
+ list.remove(index);
+ list.add(indexPosition, defaultIndice);
+ }
+ }
+ }
+
+ @Override
+ public List listClosedIndices() {
+ final List indices = new ArrayList<>();
+ if (LicenseUtil.getLevel() < LicenseLevel.STANDARD.level) {
+ return indices;
+ }
+ for (final String indexName : indexApi.getClosedIndexes()) {
+ if (IndexType.SITE_SEARCH.is(indexName)) {
+ indices.add(indexName);
+ }
+ }
+ Collections.sort(indices);
+ Collections.reverse(indices);
+ return indices;
+ }
+
+ // =========================================================================
+ // Search & aggregations
+ // =========================================================================
+
+ @Override
+ public SiteSearchResults search(final String query, final int start, final int rows) {
+ final SiteSearchResults results = new SiteSearchResults();
+ if (query == null) {
+ results.setError("null query");
+ return results;
+ }
+ if (LicenseUtil.getLevel() < LicenseLevel.STANDARD.level) {
+ return results;
+ }
+ try {
+ return search(defaultSiteSearchIndex().orElse(null), query, start, rows);
+ } catch (final Exception e) {
+ results.setError(e.getMessage());
+ }
+ return results;
+ }
+
+ @Override
+ public SiteSearchResults search(String indexName, String query, final int offset, final int limit) {
+ if (!UtilMethods.isSet(query)) {
+ query = "*";
+ }
+ final SiteSearchResults results = new SiteSearchResults();
+
+ if (LicenseUtil.getLevel() < LicenseLevel.STANDARD.level) {
+ return results;
+ }
+
+ final boolean isJson = StringUtils.isJson(query);
+
+ //https://github.com/elasticsearch/elasticsearch/issues/2980
+ if (query.contains("/")) {
+ query = query.replaceAll("/", "\\\\/");
+ }
+
+ results.setQuery(query);
+ results.setLimit(limit);
+ results.setOffset(offset);
+
+ try {
+ if (indexName == null) {
+ indexName = defaultSiteSearchIndex().orElse(null);
+ }
+ if (!IndexType.SITE_SEARCH.is(indexName)) {
+ throw new DotSearchException(indexName + " is not a sitesearch index");
+ }
+ results.setIndex(indexName);
+
+ final JSONObject body;
+ if (!isJson) {
+ body = new JSONObject();
+ body.put("query", new JSONObject().put("query_string",
+ new JSONObject().put("query", query).put("default_field", "*")));
+ if (limit > 0) {
+ body.put("size", limit);
+ }
+ if (offset > 0) {
+ body.put("from", offset);
+ }
+ body.put("highlight", new JSONObject().put("fields",
+ new JSONObject().put("content", new JSONObject().put("fragment_size", 255))));
+ } else {
+ body = new JSONObject(query);
+ }
+
+ final ContentSearchResponse response = rawSearch(physicalName(indexName), body);
+ results.setTook(response.tookMillis() + "ms");
+ if (!isJson) {
+ results.setQuery(body.toString());
+ }
+
+ final SearchHits hits = response.hits();
+ results.setTotalResults(hits.getTotalHits().value());
+
+ float maxScore = 0f;
+ for (final SearchHit hit : hits) {
+ final SiteSearchResult ssr = new SiteSearchResult(new HashMap<>(hit.getSourceAsMap()));
+ ssr.setScore(hit.getScore());
+ maxScore = Math.max(maxScore, hit.getScore());
+ // TODO OS: the neutral SearchHit DTO does not carry per-field highlights yet.
+ // Site-search highlights are a best-effort extra (the ES path also swallows
+ // highlight failures); set empty until the neutral hit exposes highlight fragments.
+ ssr.setHighLight(new String[0]);
+ results.getResults().add(ssr);
+ }
+ results.setMaxScore(maxScore);
+
+ } catch (final Exception e) {
+ Logger.error(OSSiteSearchAPI.class, e.getMessage(), e);
+ results.setError(e.getMessage());
+ }
+
+ return results;
+ }
+
+ @Override
+ public Map getAggregations(String indexName, String query)
+ throws DotDataException {
+ indexName = resolveIndexOrAlias(indexName);
+ if (indexName == null || !IndexType.SITE_SEARCH.is(indexName)) {
+ throw new DotSearchException(indexName + " is not a sitesearch index or alias");
+ }
+
+ //https://github.com/elasticsearch/elasticsearch/issues/2980
+ if (query.contains("/")) {
+ query = query.replaceAll("/", "\\\\\\\\/");
+ }
+
+ try {
+ final ContentSearchResponse response = rawSearch(physicalName(indexName), new JSONObject(query));
+ return response.aggregationTree();
+ } catch (final Exception e) {
+ Logger.error(this.getClass(), "Error getting aggregations for query.\n" + e.getMessage(), e);
+ throw new DotSearchException("Error getting aggregations for query.\n" + e.getMessage(), e);
+ }
+ }
+
+ /**
+ * {@inheritDoc}
+ *
+ * @deprecated use {@link #getAggregations(String, String)} instead.
+ */
+ @Deprecated
+ @Override
+ public Map getFacets(String indexName, String query) throws DotDataException {
+ indexName = resolveIndexOrAlias(indexName);
+ if (indexName == null || !IndexType.SITE_SEARCH.is(indexName)) {
+ throw new DotSearchException(indexName + " is not a sitesearch index or alias");
+ }
+
+ //https://github.com/elasticsearch/elasticsearch/issues/2980
+ if (query.contains("/")) {
+ query = query.replaceAll("/", "\\\\\\\\/");
+ }
+
+ try {
+ final ContentSearchResponse response = rawSearch(physicalName(indexName), new JSONObject(query));
+ return response.aggregationTree();
+ } catch (final Exception e) {
+ Logger.error(this.getClass(), "Error getting Facets for query.\n" + e.getMessage(), e);
+ throw new DotSearchException("Error getting Facets for query.\n" + e.getMessage(), e);
+ }
+ }
+
+ // =========================================================================
+ // Default index activation / inspection
+ // =========================================================================
+
+ @Override
+ public boolean isDefaultIndex(final String indexName) throws DotDataException {
+ return indexName != null && indexName.equals(defaultSiteSearchIndex().orElse(null));
+ }
+
+ @Override
+ public void activateIndex(final String indexName) throws DotDataException {
+ if (LicenseUtil.getLevel() < LicenseLevel.STANDARD.level) {
+ return;
+ }
+ if (!IndexType.SITE_SEARCH.is(indexName)) {
+ return;
+ }
+ final VersionedIndicesImpl.Builder builder = copyDefaultIndices();
+ builder.siteSearch(indexName);
+ saveDefaultIndices(builder);
+ }
+
+ @Override
+ public void deactivateIndex(final String indexName) throws DotDataException, IOException {
+ if (LicenseUtil.getLevel() < LicenseLevel.STANDARD.level) {
+ return;
+ }
+ if (!IndexType.SITE_SEARCH.is(indexName)) {
+ return;
+ }
+ // Rebuild the default indices without the site-search slot. saveIndices() does a
+ // delete-by-version then re-insert, so omitting the slot clears the pointer while preserving
+ // the content live/working rows. If site-search was the ONLY slot for this version, the
+ // rebuilt info would be empty (saveIndices rejects empty), so drop the version row instead.
+ final VersionedIndicesImpl rebuilt = copyDefaultIndicesExceptSiteSearch().build();
+ final VersionedIndicesAPI api = APILocator.getVersionedIndicesAPI();
+ if (rebuilt.hasAnyIndex()) {
+ api.saveIndices(rebuilt);
+ } else {
+ api.removeVersion(rebuilt.version());
+ }
+ api.clearCache();
+ }
+
+ // =========================================================================
+ // Index creation / mapping
+ // =========================================================================
+
+ @Override
+ public synchronized boolean createSiteSearchIndex(String indexName, final String alias, final int shards)
+ throws DotSearchException, IOException {
+ if (indexName == null) {
+ return false;
+ }
+ if (LicenseUtil.getLevel() < LicenseLevel.STANDARD.level) {
+ return false;
+ }
+
+ indexName = indexName.toLowerCase();
+ final ClassLoader classLoader = Thread.currentThread().getContextClassLoader();
+ // OpenSearch-format resources, kept separate from their es-*.json counterparts so the OS
+ // index lifecycle never depends on an ES-named file. Settings: the legacy
+ // es-sitesearch-settings.json uses ES-only token filter syntax (e.g. edgeNGram / side) that
+ // the typed OpenSearch IndexSettings deserializer rejects; os-sitesearch-settings.json
+ // declares the same analyzers (standard_content, partial_content) in OpenSearch syntax.
+ // The mapping is functionally identical to es-sitesearch-mapping.json today, but owning a
+ // dedicated os-sitesearch-mapping.json decouples the two vendors — a future ES mapping
+ // change cannot silently alter OS behaviour.
+ // Read via getResourceAsStream so the index lifecycle works when these resources are packaged
+ // inside a JAR (new File(url.getPath()) only works for filesystem URLs and NPEs if missing).
+ final String settings = readResource(classLoader, "os-sitesearch-settings.json");
+ final String mapping = readResource(classLoader, "os-sitesearch-mapping.json");
+
+ try {
+ indexApi.createIndex(indexName, settings, shards);
+ } catch (final Exception e) {
+ throw new DotSearchException("Error creating OpenSearch site search index: " + e.getMessage(), e);
+ }
+
+ if (UtilMethods.isSet(alias)) {
+ indexApi.createAlias(indexName, alias);
+ }
+
+ putMapping(indexName, mapping);
+
+ return true;
+ }
+
+ /**
+ * Applies the mapping to the site-search index via a raw {@code PUT //_mapping}.
+ *
+ *
Done here rather than via {@code MappingOperationsOS} on purpose: that helper force-tags the
+ * physical name with {@code .os}, which would target a different index than the untagged one this
+ * class creates and queries (see the class "Index naming" note), leaving the real index on the
+ * dynamic default mapping (string fields become {@code text}, which then breaks keyword
+ * aggregations such as {@code mimeType}). Forwarding to the same untagged physical name used by
+ * {@code createIndex}/search/put keeps the mapping on the index that is actually hit.
+ */
+ /**
+ * Reads a UTF-8 classpath resource fully into a String via {@code getResourceAsStream}, so it
+ * resolves whether the resource sits on the filesystem or inside a packaged JAR. Throws a clear
+ * {@link DotSearchException} when the resource is absent rather than NPE-ing on a null URL.
+ */
+ private static String readResource(final ClassLoader classLoader, final String resource)
+ throws DotSearchException {
+ try (final InputStream in = classLoader.getResourceAsStream(resource)) {
+ if (in == null) {
+ throw new DotSearchException(
+ "Required OpenSearch site search resource not found on the classpath: " + resource);
+ }
+ return new String(in.readAllBytes(), StandardCharsets.UTF_8);
+ } catch (final IOException e) {
+ throw new DotSearchException(
+ "Error reading OpenSearch site search resource " + resource + ": " + e.getMessage(), e);
+ }
+ }
+
+ private void putMapping(final String indexName, final String mapping) throws DotSearchException {
+ final String endpoint = "/" + physicalName(indexName) + "/_mapping";
+ try (final Response response = clientProvider.getClient().generic()
+ .execute(Requests.builder()
+ .method("PUT")
+ .endpoint(endpoint)
+ .body(Bodies.json(mapping))
+ .build())) {
+ final int status = response.getStatus();
+ if (status < 200 || status >= 300) {
+ throw new DotSearchException("Error applying mapping to OpenSearch site search index "
+ + indexName + " — HTTP " + status + " — "
+ + response.getBody().map(Body::bodyAsString).orElse(""));
+ }
+ } catch (final IOException e) {
+ throw new DotSearchException("Error applying mapping to OpenSearch site search index: "
+ + e.getMessage(), e);
+ }
+ }
+
+ @Override
+ public synchronized boolean setAlias(String indexName, final String alias) {
+ if (LicenseUtil.getLevel() < LicenseLevel.STANDARD.level) {
+ return false;
+ }
+ if (UtilMethods.isNotSet(indexName) || UtilMethods.isNotSet(alias)) {
+ throw new IllegalArgumentException(String.format(
+ " either one or both params aren't set. index: `%s`, alias: `%s` ", indexName, alias));
+ }
+ indexName = indexName.toLowerCase();
+ indexApi.createAlias(indexName, alias);
+ // createAlias is void and throws on failure, so reaching here means the alias was created.
+ // (Legacy ESSiteSearchAPI returns false here, but its only caller — ESSiteSearchPublisher —
+ // ignores the result, so the divergence is benign; reporting success honestly is correct.)
+ return true;
+ }
+
+ /**
+ * Mirrors {@link ESSiteSearchAPI#deleteOldSiteSearchIndices()} but resolves the active index from
+ * {@link VersionedIndicesAPI} and deletes through the OpenSearch {@link IndexAPI} provider.
+ */
+ @Override
+ public void deleteOldSiteSearchIndices() {
+ final List indicesToRemove = new ArrayList<>(listIndices());
+
+ // Keep the default (active) site-search index.
+ defaultSiteSearchIndex().ifPresent(indicesToRemove::remove);
+
+ // Keep any index that backs an alias.
+ final List indicesWithAlias =
+ new ArrayList<>(indexApi.getIndexAlias(indicesToRemove).keySet());
+ indicesToRemove.removeAll(indicesWithAlias);
+
+ // Keep indices created within the last 24 hours.
+ final Date yesterday = Date.from(Instant.now().minus(Duration.ofDays(1)));
+ final long yesterdayTimestamp =
+ Long.parseLong(ContentletIndexAPIImpl.timestampFormatter.format(yesterday));
+
+ final List recent = new ArrayList<>();
+ for (final String index : indicesToRemove) {
+ try {
+ final long indexTimestamp = Long.parseLong(index.split("_")[1]);
+ if (indexTimestamp >= yesterdayTimestamp) {
+ recent.add(index);
+ }
+ } catch (final RuntimeException e) {
+ Logger.warn(this.getClass(),
+ "Unable to parse timestamp from site search index '" + index + "': " + e.getMessage());
+ }
+ }
+ indicesToRemove.removeAll(recent);
+
+ if (!indicesToRemove.isEmpty()) {
+ Logger.info(this.getClass(),
+ "The following indices will be deleted: " + String.join(",", indicesToRemove));
+ indexApi.deleteMultiple(indicesToRemove.toArray(new String[0]));
+ }
+ }
+
+ // =========================================================================
+ // Document operations
+ // =========================================================================
+
+ @Override
+ public void putToIndex(final String idx, final SiteSearchResult res, final String resultType) {
+ if (LicenseUtil.getLevel() < LicenseLevel.STANDARD.level) {
+ return;
+ }
+ requireValidIndexName(idx);
+ try {
+ if (res.getContentLength() == 0) {
+ return;
+ }
+ if (res.getTitle() == null && res.getFileName() != null) {
+ res.setTitle(res.getFileName());
+ }
+
+ // Strip HTML out of text content.
+ if (res.getContent() != null && UtilMethods.isSet(res.getMimeType())
+ && res.getMimeType().contains("text/")) {
+ res.getMap().put("content_raw", res.getContent());
+ res.setContent(res.getContent().replaceAll("\\<.*?\\>", ""));
+ }
+
+ String desc = res.getDescription();
+ if (!UtilMethods.isSet(res.getDescription()) && UtilMethods.isSet(res.getContent())) {
+ desc = UtilMethods.prettyShortenString(res.getContent(), 500);
+ }
+ res.setDescription(desc);
+
+ if (res.getMap().containsKey("keywords") && res.getMap().containsKey("seokeywords")) {
+ res.setKeywords((String) res.getMap().get("seokeywords"));
+ } else {
+ res.setKeywords((String) res.getMap().get("keywords"));
+ }
+
+ Logger.debug(this.getClass(),
+ () -> "writing to index " + idx + " type: " + resultType + " url:" + res.getUrl());
+ final String json = new ESMappingAPIImpl().toJsonString(res.getMap());
+
+ final String endpoint = "/" + physicalName(idx) + "/_doc/" + res.getId();
+ try (final Response response = clientProvider.getClient().generic()
+ .execute(Requests.builder()
+ .method("PUT")
+ .endpoint(endpoint)
+ .query(Map.of("refresh", "true"))
+ .body(Bodies.json(json))
+ .build())) {
+ final int status = response.getStatus();
+ if (status < 200 || status >= 300) {
+ throw new DotSearchException("putToIndex failed for doc " + res.getId()
+ + " on index " + idx + " — HTTP " + status);
+ }
+ }
+ } catch (final DotSearchException e) {
+ // Already a neutral failure signal — never swallow it. Propagating lets the phase
+ // router apply its per-phase policy: in Phase 3 (OS is primary) the failure is
+ // re-thrown so the publishing pipeline observes the data loss; in the shadow phases
+ // (1/2, OS secondary) PhaseRouter swallows it and logs at WARN, so ES stays unaffected.
+ throw e;
+ } catch (final Exception e) {
+ throw new DotSearchException("putToIndex failed for doc " + res.getId()
+ + " on index " + idx + ": " + e.getMessage(), e);
+ }
+ }
+
+ @Override
+ public void putToIndex(final String idx, final List res, final String resultType) {
+ if (LicenseUtil.getLevel() < LicenseLevel.STANDARD.level) {
+ return;
+ }
+ for (final SiteSearchResult r : res) {
+ putToIndex(idx, r, resultType);
+ }
+ }
+
+ @Override
+ @SuppressWarnings({"unchecked", "rawtypes"})
+ public SiteSearchResult getFromIndex(final String index, final String id) {
+ if (LicenseUtil.getLevel() < LicenseLevel.STANDARD.level) {
+ return null;
+ }
+ try {
+ final String physical = physicalName(index);
+ final GetResponse
*
*
The components are named {@code getName} / {@code getType} / {@code getBuckets} / {@code getHits}
- * so the canonical record accessors are bean-style; this keeps {@code $results.aggregations..buckets}
- * (property access, resolved via {@code getBuckets()}) working from Velocity.
+ * / {@code getMetadata} so the canonical record accessors are bean-style; this keeps
+ * {@code $results.aggregations..buckets} (property access, resolved via {@code getBuckets()})
+ * working from Velocity.
*
*
Factory methods are the only places where vendor imports are allowed in this file.
*
- * @param getName the aggregation name as declared in the query (e.g. {@code content_types})
- * @param getType the vendor-reported aggregation type (e.g. {@code sterms}, {@code lterms},
- * {@code top_hits}); defaults to {@code unknown}
- * @param getBuckets buckets for multi-bucket ({@code terms}) aggregations; empty for metric aggregations
- * @param getHits hits for the {@code top_hits} metric aggregation; {@code null} for other types
+ * @param getName the aggregation name as declared in the query (e.g. {@code content_types})
+ * @param getType the vendor-reported aggregation type (e.g. {@code sterms}, {@code lterms},
+ * {@code top_hits}); defaults to {@code unknown}
+ * @param getBuckets buckets for multi-bucket ({@code terms}) aggregations; empty for metric aggregations
+ * @param getHits hits for the {@code top_hits} metric aggregation; {@code null} for other types
+ * @param getMetadata the optional {@code meta} object attached to the aggregation in the query;
+ * mirrors {@code org.elasticsearch.search.aggregations.Aggregation#getMetadata()}
+ * (and OpenSearch's {@code Aggregate#meta()}) so it survives a rollback to the ES
+ * type, which exposes the same accessor; empty map when no {@code meta} was set
* @see AggregationBucket
*/
public record Aggregation(
String getName,
String getType,
List getBuckets,
- @Nullable SearchHits getHits) implements Iterable {
+ @Nullable SearchHits getHits,
+ Map getMetadata) implements Iterable {
/**
- * Canonical constructor. {@code getType} defaults to {@code "unknown"} and {@code getBuckets}
- * to an empty list when {@code null} (mirrors the previous Immutables defaults).
+ * Canonical constructor. {@code getType} defaults to {@code "unknown"}, {@code getBuckets}
+ * to an empty list and {@code getMetadata} to an empty map when {@code null} (mirrors the
+ * previous Immutables defaults).
*/
public Aggregation {
getType = getType == null ? "unknown" : getType;
getBuckets = getBuckets == null ? Collections.emptyList() : getBuckets;
+ getMetadata = getMetadata == null ? Collections.emptyMap() : getMetadata;
}
/** Iterate the buckets directly: {@code #foreach($bucket in $agg)}. */
@Override
- public Iterator iterator() {
+ public @NotNull Iterator iterator() {
return getBuckets().iterator();
}
@@ -80,7 +89,8 @@ public static Map from(
private static Aggregation fromSingle(final org.elasticsearch.search.aggregations.Aggregation esAgg) {
final Builder builder = builder()
.name(esAgg.getName())
- .type(esAgg.getType());
+ .type(esAgg.getType())
+ .metadata(esAgg.getMetadata());
if (esAgg instanceof org.elasticsearch.search.aggregations.bucket.terms.Terms) {
final org.elasticsearch.search.aggregations.bucket.terms.Terms terms =
@@ -88,6 +98,12 @@ private static Aggregation fromSingle(final org.elasticsearch.search.aggregation
builder.buckets(terms.getBuckets().stream()
.map(AggregationBucket::from)
.collect(Collectors.toList()));
+ } else if (esAgg instanceof org.elasticsearch.search.aggregations.bucket.histogram.Histogram) {
+ final org.elasticsearch.search.aggregations.bucket.histogram.Histogram histogram =
+ (org.elasticsearch.search.aggregations.bucket.histogram.Histogram) esAgg;
+ builder.buckets(histogram.getBuckets().stream()
+ .map(AggregationBucket::fromHistogram)
+ .collect(Collectors.toList()));
} else if (esAgg instanceof org.elasticsearch.search.aggregations.metrics.TopHits) {
final org.elasticsearch.search.aggregations.metrics.TopHits topHits =
(org.elasticsearch.search.aggregations.metrics.TopHits) esAgg;
@@ -125,36 +141,71 @@ private static Aggregation fromSingleOS(final String name,
final Builder builder = builder().name(name);
if (agg.isSterms()) {
+ final org.opensearch.client.opensearch._types.aggregations.StringTermsAggregate sterms =
+ agg.sterms();
return builder.type("sterms")
- .buckets(agg.sterms().buckets().array().stream()
+ .metadata(fromOSMeta(sterms.meta()))
+ .buckets(sterms.buckets().array().stream()
.map(AggregationBucket::fromOS)
.collect(Collectors.toList()))
.build();
} else if (agg.isLterms()) {
+ final org.opensearch.client.opensearch._types.aggregations.LongTermsAggregate lterms =
+ agg.lterms();
return builder.type("lterms")
- .buckets(agg.lterms().buckets().array().stream()
+ .metadata(fromOSMeta(lterms.meta()))
+ .buckets(lterms.buckets().array().stream()
.map(AggregationBucket::fromOS)
.collect(Collectors.toList()))
.build();
} else if (agg.isDterms()) {
+ final org.opensearch.client.opensearch._types.aggregations.DoubleTermsAggregate dterms =
+ agg.dterms();
return builder.type("dterms")
- .buckets(agg.dterms().buckets().array().stream()
+ .metadata(fromOSMeta(dterms.meta()))
+ .buckets(dterms.buckets().array().stream()
.map(AggregationBucket::fromOS)
.collect(Collectors.toList()))
.build();
} else if (agg.isTopHits()) {
+ final org.opensearch.client.opensearch._types.aggregations.TopHitsAggregate topHits =
+ agg.topHits();
return builder.type("top_hits")
- .hits(SearchHits.from(agg.topHits().hits()))
+ .metadata(fromOSMeta(topHits.meta()))
+ .hits(SearchHits.from(topHits.hits()))
.build();
}
return null;
}
+ /**
+ * Converts an OpenSearch aggregation {@code meta} map ({@code Map}) into the
+ * neutral plain-Java {@code Map} so it matches the shape Elasticsearch already
+ * returns from {@code Aggregation#getMetadata()}. Each {@code JsonData} is unwrapped to its
+ * closest plain value (Map/List/String/Number/Boolean); if a value cannot be mapped it falls
+ * back to its raw JSON string rather than failing the whole aggregation.
+ */
+ private static Map fromOSMeta(
+ final Map osMeta) {
+ if (osMeta == null || osMeta.isEmpty()) {
+ return Collections.emptyMap();
+ }
+ final Map meta = new LinkedHashMap<>();
+ for (final Map.Entry entry : osMeta.entrySet()) {
+ try {
+ meta.put(entry.getKey(), entry.getValue().to(Object.class));
+ } catch (final RuntimeException cannotMap) {
+ meta.put(entry.getKey(), entry.getValue().toJson().toString());
+ }
+ }
+ return meta;
+ }
+
/**
* Fluent builder for {@link Aggregation}. An unset {@code type} defaults to {@code "unknown"},
- * unset {@code buckets} to an empty list and {@code hits} to {@code null}, preserving the
- * lenient behaviour of the former Immutables builder.
+ * unset {@code buckets} to an empty list, {@code hits} to {@code null} and {@code metadata} to
+ * an empty map, preserving the lenient behaviour of the former Immutables builder.
*/
public static final class Builder {
@@ -162,6 +213,7 @@ public static final class Builder {
private String type;
private List buckets = Collections.emptyList();
private SearchHits hits;
+ private Map metadata = Collections.emptyMap();
public Builder name(final String name) {
this.name = name;
@@ -183,8 +235,13 @@ public Builder hits(final SearchHits hits) {
return this;
}
+ public Builder metadata(final Map metadata) {
+ this.metadata = metadata;
+ return this;
+ }
+
public Aggregation build() {
- return new Aggregation(name, type, buckets, hits);
+ return new Aggregation(name, type, buckets, hits, metadata);
}
}
}
diff --git a/dotCMS/src/main/java/com/dotcms/content/index/domain/AggregationBucket.java b/dotCMS/src/main/java/com/dotcms/content/index/domain/AggregationBucket.java
index c8904dcc8c34..79929696d59d 100644
--- a/dotCMS/src/main/java/com/dotcms/content/index/domain/AggregationBucket.java
+++ b/dotCMS/src/main/java/com/dotcms/content/index/domain/AggregationBucket.java
@@ -92,6 +92,34 @@ public static AggregationBucket from(
.build();
}
+ /**
+ * Creates a bucket from an Elasticsearch histogram bucket (date or numeric), including its
+ * sub-aggregations. The key is normalized to its numeric form so {@link #getKeyAsNumber()}
+ * returns the epoch-millis (date histogram) or the numeric interval (numeric histogram):
+ * a date-histogram key is a {@code java.time.ZonedDateTime} in ES 7.x, not a number, so it is
+ * converted to epoch-millis here rather than via {@code getKeyAsString()} (which yields a
+ * formatted date).
+ */
+ public static AggregationBucket fromHistogram(
+ final org.elasticsearch.search.aggregations.bucket.histogram.Histogram.Bucket esBucket) {
+ return builder()
+ .key(histogramKey(esBucket.getKey()))
+ .docCount(esBucket.getDocCount())
+ .subAggregations(Aggregation.from(esBucket.getAggregations()))
+ .build();
+ }
+
+ /** Normalizes a histogram bucket key to a numeric String ({@link #getKeyAsNumber()}-friendly). */
+ private static String histogramKey(final Object key) {
+ if (key instanceof java.time.ZonedDateTime) {
+ return String.valueOf(((java.time.ZonedDateTime) key).toInstant().toEpochMilli());
+ }
+ if (key instanceof Number) {
+ return String.valueOf(((Number) key).longValue());
+ }
+ return String.valueOf(key);
+ }
+
// -------------------------------------------------------------------------
// OS factories
// -------------------------------------------------------------------------
diff --git a/dotCMS/src/main/java/com/dotcms/content/index/domain/DotSearchException.java b/dotCMS/src/main/java/com/dotcms/content/index/domain/DotSearchException.java
new file mode 100644
index 000000000000..6a45e5d0186b
--- /dev/null
+++ b/dotCMS/src/main/java/com/dotcms/content/index/domain/DotSearchException.java
@@ -0,0 +1,32 @@
+package com.dotcms.content.index.domain;
+
+import com.dotmarketing.exception.DotRuntimeException;
+
+/**
+ * Vendor-neutral search exception for the index abstraction layer.
+ *
+ *
Replaces {@code org.elasticsearch.ElasticsearchException} on the public surface of the
+ * search/site-search APIs so that callers — and the interfaces themselves — no longer couple to
+ * Elasticsearch (or any other engine) types. It is the neutral failure signal raised by both the
+ * Elasticsearch and OpenSearch providers when a search or index operation cannot be completed.
+ *
+ *
It extends {@link DotRuntimeException} (and therefore is unchecked) to mirror the unchecked
+ * nature of {@code ElasticsearchException}: existing callers that never declared a {@code catch}
+ * for the vendor exception keep compiling unchanged.
+ */
+public class DotSearchException extends DotRuntimeException {
+
+ private static final long serialVersionUID = 1L;
+
+ public DotSearchException(final String message) {
+ super(message);
+ }
+
+ public DotSearchException(final Throwable cause) {
+ super(cause);
+ }
+
+ public DotSearchException(final String message, final Throwable cause) {
+ super(message, cause);
+ }
+}
\ No newline at end of file
diff --git a/dotCMS/src/main/java/com/dotcms/content/index/domain/SearchHit.java b/dotCMS/src/main/java/com/dotcms/content/index/domain/SearchHit.java
index fa3dc06662d6..b4278390a21c 100644
--- a/dotCMS/src/main/java/com/dotcms/content/index/domain/SearchHit.java
+++ b/dotCMS/src/main/java/com/dotcms/content/index/domain/SearchHit.java
@@ -93,9 +93,18 @@ public static SearchHit from(org.opensearch.client.opensearch.core.search.Hit>
Object source = osHit.source();
if (source instanceof Map) {
sourceMap = (Map) source;
+ } else if (source instanceof org.opensearch.client.json.JsonData) {
+ // top_hits aggregation hits carry their _source as JsonData (HitsMetadata),
+ // not a Map — unwrap it so the document survives the conversion instead of being dropped.
+ Map unwrapped;
+ try {
+ unwrapped = ((org.opensearch.client.json.JsonData) source).to(Map.class);
+ } catch (final RuntimeException cannotMap) {
+ unwrapped = null;
+ }
+ sourceMap = unwrapped != null ? unwrapped : Map.of();
} else {
- // If "source" is a typed object, we might need custom mapping logic here
- // For now, we'll create an empty map as fallback
+ // Unknown typed source — fall back to an empty map rather than failing the conversion.
sourceMap = Map.of();
}
diff --git a/dotCMS/src/main/java/com/dotmarketing/business/APILocator.java b/dotCMS/src/main/java/com/dotmarketing/business/APILocator.java
index f107e7a4f3f8..6d434689c27e 100644
--- a/dotCMS/src/main/java/com/dotmarketing/business/APILocator.java
+++ b/dotCMS/src/main/java/com/dotmarketing/business/APILocator.java
@@ -67,6 +67,7 @@
import com.dotcms.enterprise.linkchecker.LinkCheckerAPIImpl;
import com.dotcms.enterprise.priv.ESSearchProxy;
import com.dotcms.enterprise.publishing.sitesearch.ESSiteSearchAPI;
+import com.dotcms.enterprise.publishing.sitesearch.SiteSearchAPIImpl;
import com.dotcms.enterprise.rules.RulesAPI;
import com.dotcms.experiments.business.ExperimentsAPI;
import com.dotcms.experiments.business.ExperimentsAPIImpl;
@@ -1483,7 +1484,7 @@ Object create() {
case FORM_API: return new FormAPIImpl();
case MENULINK_API: return new MenuLinkAPIImpl();
case DASHBOARD_API: return new DashboardAPIImpl();
- case SITESEARCH_API: return new ESSiteSearchAPI();
+ case SITESEARCH_API: return new SiteSearchAPIImpl();
case FILEASSET_API: return new FileAssetAPIImpl();
case VERSIONABLE_API: return new VersionableAPIImpl();
case WORKFLOW_API : return new WorkflowAPIImpl();
diff --git a/dotCMS/src/main/java/com/dotmarketing/sitesearch/business/SiteSearchAPI.java b/dotCMS/src/main/java/com/dotmarketing/sitesearch/business/SiteSearchAPI.java
index ac2031f1ac73..7a13c33847b1 100644
--- a/dotCMS/src/main/java/com/dotmarketing/sitesearch/business/SiteSearchAPI.java
+++ b/dotCMS/src/main/java/com/dotmarketing/sitesearch/business/SiteSearchAPI.java
@@ -5,10 +5,10 @@
import java.util.List;
import java.util.Map;
-import org.elasticsearch.ElasticsearchException;
-import org.elasticsearch.search.aggregations.Aggregation;
import org.quartz.SchedulerException;
+import com.dotcms.content.index.domain.Aggregation;
+import com.dotcms.content.index.domain.DotSearchException;
import com.dotcms.enterprise.publishing.sitesearch.SiteSearchConfig;
import com.dotcms.enterprise.publishing.sitesearch.SiteSearchPublishStatus;
import com.dotcms.enterprise.publishing.sitesearch.SiteSearchResult;
@@ -36,7 +36,7 @@ public interface SiteSearchAPI {
void deactivateIndex(String indexName) throws DotDataException, IOException;
- boolean createSiteSearchIndex(String indexName, String alias, int shards) throws ElasticsearchException, IOException;
+ boolean createSiteSearchIndex(String indexName, String alias, int shards) throws DotSearchException, IOException;
boolean setAlias(String indexName, final String alias);
diff --git a/dotCMS/src/main/java/com/dotmarketing/sitesearch/viewtool/SiteSearchWebAPI.java b/dotCMS/src/main/java/com/dotmarketing/sitesearch/viewtool/SiteSearchWebAPI.java
index de09cbcff072..ed3bf36bb8dd 100644
--- a/dotCMS/src/main/java/com/dotmarketing/sitesearch/viewtool/SiteSearchWebAPI.java
+++ b/dotCMS/src/main/java/com/dotmarketing/sitesearch/viewtool/SiteSearchWebAPI.java
@@ -1,6 +1,8 @@
package com.dotmarketing.sitesearch.viewtool;
import com.dotcms.content.index.IndexAPI;
+import com.dotcms.content.index.domain.Aggregation;
+import com.dotcms.content.index.domain.AggregationBucket;
import com.dotcms.enterprise.publishing.sitesearch.SiteSearchResults;
import com.dotmarketing.beans.Host;
import com.dotmarketing.business.APILocator;
@@ -13,16 +15,11 @@
import com.dotmarketing.util.StringUtils;
import org.apache.velocity.tools.view.context.ViewContext;
import org.apache.velocity.tools.view.tools.ViewTool;
-import org.elasticsearch.search.aggregations.Aggregation;
-import org.elasticsearch.search.aggregations.bucket.histogram.InternalDateHistogram;
-import org.elasticsearch.search.aggregations.bucket.terms.StringTerms;
-import org.elasticsearch.search.aggregations.bucket.terms.StringTerms.Bucket;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import java.io.IOException;
import java.util.*;
-import org.joda.time.DateTime;
public class SiteSearchWebAPI implements ViewTool {
@@ -173,15 +170,16 @@ public Map getFacets(final String indexName, final String query)
for (String key : aggregations.keySet()) {
final Aggregation aggregation = aggregations.get(key);
+ final String type = aggregation.getType();
- if (aggregation instanceof InternalDateHistogram) {
+ if (isHistogram(type)) {
internalFacet = new InternalWrapperCountDateHistogramFacet(aggregation.getName(),
- aggregation.getType(), ((InternalDateHistogram) aggregation).getBuckets());
- } else if (aggregation instanceof StringTerms) {
+ type, aggregation.getBuckets());
+ } else if (!aggregation.getBuckets().isEmpty()) {
internalFacet = new InternalWrapperStringTermsFacet(aggregation.getName(),
- aggregation.getType(), ((StringTerms) aggregation).getBuckets());
+ type, aggregation.getBuckets());
} else {
- internalFacet = new Facet(aggregation.getName(), aggregation.getType());
+ internalFacet = new Facet(aggregation.getName(), type);
}
internalFacets.put(key, internalFacet);
}
@@ -189,23 +187,32 @@ public Map getFacets(final String indexName, final String query)
return internalFacets;
}
+ /**
+ * A histogram aggregation (date or numeric) reports a vendor type containing
+ * {@code "histogram"} (e.g. {@code date_histogram}); its buckets carry numeric keys.
+ */
+ private static boolean isHistogram(final String type) {
+ return type != null && type.contains("histogram");
+ }
+
/**
* Internal wrapper class for backwards compatibility with the new Elastic Search in Site
* Search.
*
- * @deprecated use ES Aggregations instead
+ * @deprecated use the vendor-neutral {@link #getAggregations(String, String)} instead
*/
public class InternalWrapperCountDateHistogramFacet extends Facet {
private final List entries;
public InternalWrapperCountDateHistogramFacet(final String name, final String type,
- List entries) {
+ List entries) {
super(name, type);
this.entries = new ArrayList<>();
- for (final InternalDateHistogram.Bucket entry : entries) {
- this.entries.add(new CountEntry(((DateTime) entry.getKey()).getMillis(),
- entry.getDocCount()));
+ for (final AggregationBucket entry : entries) {
+ final Number key = entry.getKeyAsNumber();
+ final long time = key != null ? key.longValue() : 0L;
+ this.entries.add(new CountEntry(time, entry.getDocCount()));
}
}
@@ -237,20 +244,20 @@ public long getCount() {
* Internal wrapper class for backwards compatibility with the new Elastic Search in Site
* Search.
*
- * @deprecated use ES Aggregations instead
+ * @deprecated use the vendor-neutral {@link #getAggregations(String, String)} instead
*/
public class InternalWrapperStringTermsFacet extends Facet {
private List entries;
- public InternalWrapperStringTermsFacet(final String name, final String type, final List entries) {
+ public InternalWrapperStringTermsFacet(final String name, final String type, final List entries) {
super(name, type);
this.entries = new ArrayList<>();
- for (final Bucket entry : entries) {
+ for (final AggregationBucket entry : entries) {
this.entries
- .add(new InternalTermEntry(entry.getKey().toString(), entry.getDocCount()));
+ .add(new InternalTermEntry(entry.getKey(), entry.getDocCount()));
}
}
@@ -279,7 +286,7 @@ public long getCount() {
}
/**
- * @deprecated use ES Aggregations instead
+ * @deprecated use the vendor-neutral {@link #getAggregations(String, String)} instead
*/
public class Facet {
diff --git a/dotCMS/src/main/resources/os-sitesearch-mapping.json b/dotCMS/src/main/resources/os-sitesearch-mapping.json
new file mode 100644
index 000000000000..c4d2c28d0235
--- /dev/null
+++ b/dotCMS/src/main/resources/os-sitesearch-mapping.json
@@ -0,0 +1,62 @@
+{
+
+ "properties": {
+ "content": {
+ "type": "text",
+ "analyzer": "standard_content",
+ "term_vector":"with_positions_offsets",
+ "fields": {
+ "untouched": {
+ "type": "keyword",
+ "ignore_above": 8191,
+ "doc_values" : true
+ },
+ "ngram": {
+ "search_analyzer": "standard_content",
+ "analyzer": "partial_content",
+ "type": "text"
+ }
+ }
+ },
+ "host": {
+ "type": "keyword",
+ "doc_values" : true
+ },
+ "contentLength": {
+ "type": "long"
+ },
+ "uri": {
+ "type": "keyword",
+ "doc_values" : true
+ },
+ "url": {
+ "type": "keyword",
+ "doc_values" : true
+ },
+ "mimeType": {
+ "type": "keyword",
+ "doc_values" : true
+ },
+ "title": {
+ "type": "text"
+ },
+ "description": {
+ "type": "text"
+ },
+ "modified": {
+ "type": "date",
+ "doc_values" : true
+ },
+ "keywords": {
+ "type": "keyword",
+ "doc_values" : true
+ },
+ "language": {
+ "type": "long"
+ },
+ "author": {
+ "type": "text"
+ }
+ }
+
+}
diff --git a/dotCMS/src/main/resources/os-sitesearch-settings.json b/dotCMS/src/main/resources/os-sitesearch-settings.json
new file mode 100644
index 000000000000..168e3e0bcb1c
--- /dev/null
+++ b/dotCMS/src/main/resources/os-sitesearch-settings.json
@@ -0,0 +1,39 @@
+{
+ "analysis": {
+ "filter": {
+ "content_ngrams": {
+ "type": "edge_ngram",
+ "min_gram": 1,
+ "max_gram": 10
+ },
+ "content_stemmer": {
+ "type": "stemmer",
+ "name": "english"
+ }
+ },
+ "analyzer": {
+ "standard_content": {
+ "type": "custom",
+ "tokenizer": "standard",
+ "filter": [
+ "lowercase",
+ "asciifolding",
+ "content_stemmer"
+ ]
+ },
+ "partial_content": {
+ "type": "custom",
+ "tokenizer": "standard",
+ "filter": [
+ "lowercase",
+ "asciifolding",
+ "content_ngrams"
+ ]
+ },
+ "comma_analyzer": {
+ "type": "pattern",
+ "pattern": ","
+ }
+ }
+ }
+}
diff --git a/dotCMS/src/test/java/com/dotcms/content/index/domain/AggregationDomainTest.java b/dotCMS/src/test/java/com/dotcms/content/index/domain/AggregationDomainTest.java
index a26ba95743d0..218061c4443b 100644
--- a/dotCMS/src/test/java/com/dotcms/content/index/domain/AggregationDomainTest.java
+++ b/dotCMS/src/test/java/com/dotcms/content/index/domain/AggregationDomainTest.java
@@ -5,12 +5,20 @@
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
+import static org.mockito.Mockito.doReturn;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.datatype.guava.GuavaModule;
+import java.time.ZoneOffset;
+import java.time.ZonedDateTime;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
+import org.elasticsearch.search.aggregations.Aggregations;
+import org.elasticsearch.search.aggregations.bucket.histogram.Histogram;
+import org.elasticsearch.search.aggregations.bucket.terms.Terms;
import org.junit.Test;
/**
@@ -132,4 +140,157 @@ public void aggregationBucket_nestedSubAggregations_areReachable() {
assertNotNull(bucket.getAggregations().get("top_content"));
assertEquals("top_hits", bucket.getAggregations().get("top_content").getType());
}
+
+ // =========================================================================
+ // Elasticsearch factory conversion (Aggregation.from / AggregationBucket.from*)
+ // =========================================================================
+ //
+ // These exercise the vendor-specific ES → neutral conversion deterministically (no search
+ // engine, no container): the mocked ES aggregation objects mirror exactly what the live ES
+ // client hands the factories, so the conversion is locked down here and not only end-to-end.
+
+ /**
+ * A {@code date_histogram} bucket key is a {@link ZonedDateTime} in Elasticsearch 7.x — NOT a
+ * number. {@link AggregationBucket#fromHistogram} must normalize it to epoch-millis so that
+ * {@code getKeyAsNumber()} (and the legacy {@code InternalWrapperCountDateHistogramFacet} that
+ * reads it) returns a real timestamp rather than null/0. This is the trickiest branch of the
+ * neutral conversion and the one with no obvious end-to-end equivalent, so it is pinned here.
+ */
+ @Test
+ public void esFactory_dateHistogram_normalizesZonedDateTimeKeyToEpochMillis() {
+ final ZonedDateTime day = ZonedDateTime.of(2024, 1, 15, 0, 0, 0, 0, ZoneOffset.UTC);
+ final long expectedEpochMillis = day.toInstant().toEpochMilli();
+
+ final Aggregations emptySubAggs = emptyEsAggregations();
+ final Histogram.Bucket bucket = mock(Histogram.Bucket.class);
+ when(bucket.getKey()).thenReturn(day); // ES 7.x date-histogram key type
+ when(bucket.getDocCount()).thenReturn(4L);
+ when(bucket.getAggregations()).thenReturn(emptySubAggs);
+
+ final Histogram histogram = mock(Histogram.class);
+ when(histogram.getName()).thenReturn("by_day");
+ when(histogram.getType()).thenReturn("date_histogram");
+ doReturn(List.of(bucket)).when(histogram).getBuckets();
+
+ final Aggregations esAggs = mock(Aggregations.class);
+ when(esAggs.asList()).thenReturn(List.of(histogram));
+
+ final Aggregation byDay = Aggregation.from(esAggs).get("by_day");
+ assertNotNull("date_histogram aggregation must be mapped", byDay);
+ assertEquals("date_histogram", byDay.getType());
+ assertEquals("one bucket expected", 1, byDay.getBuckets().size());
+
+ final AggregationBucket b = byDay.getBuckets().get(0);
+ assertEquals("doc count must round-trip", 4L, b.getDocCount());
+ assertEquals("a ZonedDateTime key must become epoch-millis, not a formatted date",
+ expectedEpochMillis, b.getKeyAsNumber().longValue());
+ assertEquals("getKeyAsString must expose the same epoch-millis",
+ String.valueOf(expectedEpochMillis), b.getKeyAsString());
+ }
+
+ /**
+ * A numeric {@code histogram} bucket key is a {@link Number} (a {@code Double} in ES); the
+ * conversion must take the {@code longValue()} branch of {@code histogramKey} and yield that
+ * number as the key.
+ */
+ @Test
+ public void esFactory_numericHistogram_normalizesNumberKeyToLong() {
+ final Aggregations emptySubAggs = emptyEsAggregations();
+ final Histogram.Bucket bucket = mock(Histogram.Bucket.class);
+ when(bucket.getKey()).thenReturn(Double.valueOf(50.0)); // ES numeric-histogram key type
+ when(bucket.getDocCount()).thenReturn(2L);
+ when(bucket.getAggregations()).thenReturn(emptySubAggs);
+
+ final Histogram histogram = mock(Histogram.class);
+ when(histogram.getName()).thenReturn("by_len");
+ when(histogram.getType()).thenReturn("histogram");
+ doReturn(List.of(bucket)).when(histogram).getBuckets();
+
+ final Aggregations esAggs = mock(Aggregations.class);
+ when(esAggs.asList()).thenReturn(List.of(histogram));
+
+ final AggregationBucket b = Aggregation.from(esAggs).get("by_len").getBuckets().get(0);
+ assertEquals("a numeric key must be preserved as a long", 50L, b.getKeyAsNumber().longValue());
+ assertEquals("50", b.getKeyAsString());
+ }
+
+ /**
+ * A {@code terms} aggregation maps every bucket through {@link AggregationBucket#from}: the
+ * String key round-trips on {@code getKey()}/{@code getKeyAsString()}, a non-numeric key yields
+ * a null number, doc counts survive, and a metric-less terms aggregation carries no top-hits.
+ */
+ @Test
+ public void esFactory_terms_mapsBucketsAndIsHitsFree() {
+ final Aggregations emptySubAggs = emptyEsAggregations();
+ final Terms.Bucket esBucket = mock(Terms.Bucket.class);
+ when(esBucket.getKeyAsString()).thenReturn("text/html");
+ when(esBucket.getDocCount()).thenReturn(3L);
+ when(esBucket.getAggregations()).thenReturn(emptySubAggs);
+
+ final Terms terms = mock(Terms.class);
+ when(terms.getName()).thenReturn("by_mime");
+ when(terms.getType()).thenReturn("sterms");
+ doReturn(List.of(esBucket)).when(terms).getBuckets();
+
+ final Aggregations esAggs = mock(Aggregations.class);
+ when(esAggs.asList()).thenReturn(List.of(terms));
+
+ final Aggregation byMime = Aggregation.from(esAggs).get("by_mime");
+ assertNotNull(byMime);
+ assertEquals("sterms", byMime.getType());
+ assertNull("a terms aggregation carries no top-hits", byMime.getHits());
+ assertEquals(1, byMime.getBuckets().size());
+
+ final AggregationBucket b = byMime.getBuckets().get(0);
+ assertEquals("text/html", b.getKey());
+ assertEquals("text/html", b.getKeyAsString());
+ assertNull("a non-numeric key must yield a null number", b.getKeyAsNumber());
+ assertEquals(3L, b.getDocCount());
+ assertTrue("no nested sub-aggregations here", b.getAggregations().isEmpty());
+ }
+
+ /** A null Elasticsearch aggregation set maps to an empty tree rather than throwing. */
+ @Test
+ public void esFactory_nullAggregations_yieldEmptyMap() {
+ assertTrue("null ES aggregations must map to an empty tree",
+ Aggregation.from((Aggregations) null).isEmpty());
+ }
+
+ /**
+ * The {@code meta} object set on an aggregation in the query is preserved on the neutral type
+ * via {@code getMetadata()} — closing the last equivalence gap with the ES {@code Aggregation}
+ * interface ({@code getName}/{@code getType}/{@code getMetadata}). This accessor is rollback-safe
+ * because the ES type exposes the same method, so a template adopting {@code $agg.metadata}
+ * resolves on both N (neutral) and N-1 (ES).
+ */
+ @Test
+ public void esFactory_metadata_isPreserved() {
+ final Map meta = Map.of("unit", "days", "version", 2);
+ final Terms terms = mock(Terms.class);
+ when(terms.getName()).thenReturn("by_day");
+ when(terms.getType()).thenReturn("sterms");
+ when(terms.getMetadata()).thenReturn(meta);
+ doReturn(List.of()).when(terms).getBuckets();
+
+ final Aggregations esAggs = mock(Aggregations.class);
+ when(esAggs.asList()).thenReturn(List.of(terms));
+
+ assertEquals("the aggregation meta map must round-trip from ES",
+ meta, Aggregation.from(esAggs).get("by_day").getMetadata());
+ }
+
+ /** {@code getMetadata()} is never null — it defaults to an empty map when no meta was set. */
+ @Test
+ public void aggregation_metadata_defaultsToEmptyWhenUnset() {
+ final Aggregation agg = Aggregation.builder().name("x").type("sterms").build();
+ assertNotNull("metadata must never be null", agg.getMetadata());
+ assertTrue("metadata defaults to empty when unset", agg.getMetadata().isEmpty());
+ }
+
+ /** An empty (but non-null) Elasticsearch aggregation set whose buckets carry no sub-aggs. */
+ private static Aggregations emptyEsAggregations() {
+ final Aggregations aggs = mock(Aggregations.class);
+ when(aggs.asList()).thenReturn(List.of());
+ return aggs;
+ }
}
diff --git a/dotcms-integration/src/test/java/com/dotcms/MainSuite1b.java b/dotcms-integration/src/test/java/com/dotcms/MainSuite1b.java
index b1e5bf853a22..fa9b83f71785 100644
--- a/dotcms-integration/src/test/java/com/dotcms/MainSuite1b.java
+++ b/dotcms-integration/src/test/java/com/dotcms/MainSuite1b.java
@@ -52,6 +52,7 @@
com.dotcms.rendering.velocity.viewtools.content.ContentMapTest.class,
com.dotcms.rendering.velocity.viewtools.content.ContentToolTest.class,
com.dotcms.rendering.velocity.viewtools.ContentSearchToolTest.class,
+ com.dotmarketing.sitesearch.viewtool.SiteSearchWebAPITest.class,
com.dotcms.rendering.velocity.viewtools.WorkflowToolTest.class,
com.dotcms.rendering.velocity.viewtools.WebsiteToolTest.class,
com.dotcms.rendering.velocity.viewtools.LanguageWebAPITest.class,
diff --git a/dotcms-integration/src/test/java/com/dotcms/OpenSearchUpgradeSuite.java b/dotcms-integration/src/test/java/com/dotcms/OpenSearchUpgradeSuite.java
index 722e82865a7c..ce000e4e7f0f 100644
--- a/dotcms-integration/src/test/java/com/dotcms/OpenSearchUpgradeSuite.java
+++ b/dotcms-integration/src/test/java/com/dotcms/OpenSearchUpgradeSuite.java
@@ -12,6 +12,8 @@
import com.dotcms.content.index.opensearch.OSClientConfigTest;
import com.dotcms.content.index.opensearch.OSClientProviderIntegrationTest;
import com.dotcms.content.index.opensearch.OSSearchAPIImplIntegrationTest;
+import com.dotcms.content.index.opensearch.OSSiteSearchAPIIntegrationTest;
+import com.dotcms.enterprise.publishing.sitesearch.SiteSearchDualWriteRouterIT;
import com.dotcms.junit.MainBaseSuite;
import org.junit.runner.RunWith;
import org.junit.runners.Suite.SuiteClasses;
@@ -46,7 +48,9 @@
OSClientConfigTest.class,
ContentletIndexAPIImplMigrationIntegrationTest.class,
ContentletIndexAPIImplPhaseSwitchIntegrationTest.class,
- OSSearchAPIImplIntegrationTest.class
+ OSSearchAPIImplIntegrationTest.class,
+ OSSiteSearchAPIIntegrationTest.class,
+ SiteSearchDualWriteRouterIT.class
})
public class OpenSearchUpgradeSuite {
}
\ No newline at end of file
diff --git a/dotcms-integration/src/test/java/com/dotcms/content/index/opensearch/OSSiteSearchAPIIntegrationTest.java b/dotcms-integration/src/test/java/com/dotcms/content/index/opensearch/OSSiteSearchAPIIntegrationTest.java
new file mode 100644
index 000000000000..3e164f612cc1
--- /dev/null
+++ b/dotcms-integration/src/test/java/com/dotcms/content/index/opensearch/OSSiteSearchAPIIntegrationTest.java
@@ -0,0 +1,406 @@
+package com.dotcms.content.index.opensearch;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+
+import com.dotcms.DataProviderWeldRunner;
+import com.dotcms.IntegrationTestBase;
+import com.dotcms.content.index.domain.Aggregation;
+import com.dotcms.content.index.domain.AggregationBucket;
+import com.dotcms.content.index.domain.SearchHit;
+import com.dotcms.enterprise.publishing.sitesearch.OSSiteSearchAPI;
+import com.dotcms.enterprise.publishing.sitesearch.SiteSearchResult;
+import com.dotcms.enterprise.publishing.sitesearch.SiteSearchResults;
+import com.dotcms.LicenseTestUtil;
+import com.dotcms.util.IntegrationTestInitService;
+import com.dotmarketing.business.APILocator;
+import com.dotmarketing.common.db.DotConnect;
+import com.dotmarketing.util.Logger;
+import com.dotmarketing.util.json.JSONObject;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.UUID;
+import javax.enterprise.context.ApplicationScoped;
+import javax.inject.Inject;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+
+/**
+ * Integration tests for {@link OSSiteSearchAPI} exercised against a live OpenSearch 3.x container.
+ *
+ *
Validates the OpenSearch implementation of the Site Search API in isolation: index lifecycle
+ * (create / list / delete), the document round-trip ({@code putToIndex} → {@code getFromIndex} →
+ * {@code search} → {@code deleteFromIndex}), aggregations, and default-index activation through
+ * {@link com.dotcms.content.index.VersionedIndicesAPI}.
+ *
+ *
The {@code @Inject}-ed {@link OSSiteSearchAPI} resolves its OpenSearch client through
+ * {@link OSClientProvider}; the {@code @Alternative} {@link OSTestClientProvider} on the test
+ * classpath points it at the {@code opensearch-upgrade} container ({@code http://localhost:9201}).
+ * Index names are scoped with a per-run suffix so concurrent runs never collide; the {@code .os}
+ * tag is intentionally not used for site search (see {@link OSSiteSearchAPI}).
+ *
+ *
Registered in {@link com.dotcms.OpenSearchUpgradeSuite}. Run with:
+ *
+ *
+ *
+ * @author Fabrizio Araya
+ */
+@ApplicationScoped
+@RunWith(DataProviderWeldRunner.class)
+public class OSSiteSearchAPIIntegrationTest extends IntegrationTestBase {
+
+ private static final String RUN_ID =
+ UUID.randomUUID().toString().replace("-", "").substring(0, 8);
+
+ /** Numeric suffix so names match the {@code sitesearch_} convention. */
+ private static final String SUFFIX = String.valueOf(Math.abs((long) RUN_ID.hashCode()));
+
+ private static final String IDX_ONE = "sitesearch_" + SUFFIX;
+ private static final String IDX_TWO = "sitesearch_" + (Long.parseLong(SUFFIX) + 1);
+
+ private static final String DOC_ID = "os-ss-it-" + RUN_ID;
+
+ @Inject
+ private OSSiteSearchAPI osSiteSearchAPI;
+
+ @Inject
+ private OSIndexAPIImpl osIndexAPI;
+
+ // =======================================================================
+ // Lifecycle
+ // =======================================================================
+
+ @BeforeClass
+ public static void prepare() throws Exception {
+ IntegrationTestInitService.getInstance().init();
+ LicenseTestUtil.getLicense();
+ }
+
+ @Before
+ public void setUp() {
+ cleanupTestData();
+ }
+
+ @After
+ public void tearDown() {
+ cleanupTestData();
+ }
+
+ // =======================================================================
+ // Section 1 — Core index lifecycle
+ // =======================================================================
+
+ /**
+ * Given scenario: a fresh site-search index name that does not yet exist in OpenSearch.
+ * Expected: createSiteSearchIndex creates it, indexExists reports it, and it shows up in
+ * listIndices.
+ */
+ @Test
+ public void test_createSiteSearchIndex_shouldExistAndBeListed() throws Exception {
+ assertFalse("Pre-condition: index must not exist yet", osIndexAPI.indexExists(IDX_ONE));
+
+ final boolean created = osSiteSearchAPI.createSiteSearchIndex(IDX_ONE, null, 1);
+
+ assertTrue("createSiteSearchIndex must return true", created);
+ assertTrue("Index must exist in OpenSearch after creation", osIndexAPI.indexExists(IDX_ONE));
+ assertTrue("Index must be returned by listIndices",
+ osSiteSearchAPI.listIndices().contains(IDX_ONE));
+
+ Logger.info(this, "✅ test_createSiteSearchIndex_shouldExistAndBeListed passed – index: " + IDX_ONE);
+ }
+
+ /**
+ * Given scenario: an existing site-search index.
+ * Expected: deleting it through the OpenSearch index API removes it from the cluster.
+ */
+ @Test
+ public void test_deleteSiteSearchIndex_shouldRemoveIt() throws Exception {
+ osSiteSearchAPI.createSiteSearchIndex(IDX_ONE, null, 1);
+ assertTrue("Pre-condition: index must exist", osIndexAPI.indexExists(IDX_ONE));
+
+ osIndexAPI.delete(IDX_ONE);
+
+ assertFalse("Index must be gone after deletion", osIndexAPI.indexExists(IDX_ONE));
+ Logger.info(this, "✅ test_deleteSiteSearchIndex_shouldRemoveIt passed");
+ }
+
+ // =======================================================================
+ // Section 2 — Document round-trip (put / get / search / delete)
+ // =======================================================================
+
+ /**
+ * Given scenario: an empty site-search index.
+ * Expected: a document put to the index is retrievable by id, discoverable by search, and gone
+ * after deleteFromIndex.
+ */
+ @Test
+ public void test_putGetSearchDelete_documentRoundTrip() throws Exception {
+ osSiteSearchAPI.createSiteSearchIndex(IDX_ONE, null, 1);
+ assertNull("Pre-condition: document must not exist yet",
+ osSiteSearchAPI.getFromIndex(IDX_ONE, DOC_ID));
+
+ final SiteSearchResult doc = new SiteSearchResult();
+ doc.setId(DOC_ID);
+ doc.setUrl("/os-site-search-it/" + RUN_ID);
+ doc.setTitle("OpenSearch Site Search IT " + RUN_ID);
+ doc.setMimeType("text/html");
+ doc.setContent("dotcms opensearch site search integration roundtrip " + RUN_ID);
+ doc.setContentLength(doc.getContent().length());
+
+ osSiteSearchAPI.putToIndex(IDX_ONE, doc, "content");
+
+ final SiteSearchResult fetched = osSiteSearchAPI.getFromIndex(IDX_ONE, DOC_ID);
+ assertNotNull("Document must be retrievable after put", fetched);
+ assertEquals("Fetched document id must match", DOC_ID, fetched.getId());
+
+ final SiteSearchResults results = osSiteSearchAPI.search(IDX_ONE, "roundtrip", 0, 10);
+ assertNull("Search must not return an error: " + results.getError(), results.getError());
+ assertTrue("Search must find the indexed document", results.getTotalResults() >= 1);
+
+ osSiteSearchAPI.deleteFromIndex(IDX_ONE, DOC_ID);
+ assertNull("Document must be gone after deleteFromIndex",
+ osSiteSearchAPI.getFromIndex(IDX_ONE, DOC_ID));
+
+ Logger.info(this, "✅ test_putGetSearchDelete_documentRoundTrip passed – hits: "
+ + results.getTotalResults());
+ }
+
+ /**
+ * Given scenario: an index holding 3 html + 2 pdf documents.
+ * Expected: a terms aggregation on {@code mimeType} maps through the OpenSearch
+ * {@code fromOS(StringTermsBucket)} factory to a neutral {@link Aggregation} with one bucket per
+ * mimeType — correct keys, doc counts, {@code getKeyAsString} mirroring {@code getKey}, a null
+ * numeric key for the non-numeric mimeType, and no top-hits — so the OS path produces the same
+ * neutral shape the ES path does (not merely a non-null map).
+ */
+ @Test
+ public void test_getAggregations_termsBucketsHaveCorrectKeysAndCounts() throws Exception {
+ osSiteSearchAPI.createSiteSearchIndex(IDX_ONE, null, 1);
+
+ final int htmlDocs = 3;
+ final int pdfDocs = 2;
+ for (int i = 0; i < htmlDocs + pdfDocs; i++) {
+ final SiteSearchResult doc = new SiteSearchResult();
+ doc.setId(DOC_ID + "-" + i);
+ doc.setUrl("/agg/" + RUN_ID + "/" + i);
+ doc.setTitle("Aggregation doc " + i);
+ doc.setMimeType(i < htmlDocs ? "text/html" : "application/pdf");
+ doc.setContent("aggregation bucket sample " + RUN_ID);
+ doc.setContentLength(doc.getContent().length());
+ osSiteSearchAPI.putToIndex(IDX_ONE, doc, "content");
+ }
+
+ final String aggQuery = new JSONObject()
+ .put("size", 0)
+ .put("aggs", new JSONObject().put("by_mime",
+ new JSONObject().put("terms",
+ new JSONObject().put("field", "mimeType").put("size", 10)))).toString();
+
+ final Map aggregations =
+ osSiteSearchAPI.getAggregations(IDX_ONE, aggQuery);
+
+ assertNotNull("Aggregations map must not be null", aggregations);
+ final Aggregation byMime = aggregations.get("by_mime");
+ assertNotNull("Aggregation 'by_mime' must be present", byMime);
+ assertEquals("aggregation name must round-trip", "by_mime", byMime.getName());
+ assertNull("a terms aggregation carries no top-hits", byMime.getHits());
+ assertEquals("there must be one bucket per mimeType", 2, byMime.getBuckets().size());
+
+ final Set expectedMimes = Set.of("text/html", "application/pdf");
+ long htmlCount = -1;
+ long pdfCount = -1;
+ for (final AggregationBucket bucket : byMime.getBuckets()) {
+ assertTrue("bucket key must be a known mimeType", expectedMimes.contains(bucket.getKey()));
+ assertEquals("getKeyAsString must mirror getKey", bucket.getKey(), bucket.getKeyAsString());
+ assertNull("a non-numeric key must yield a null number", bucket.getKeyAsNumber());
+ assertTrue("each bucket must carry documents", bucket.getDocCount() > 0);
+ if ("text/html".equals(bucket.getKey())) {
+ htmlCount = bucket.getDocCount();
+ } else if ("application/pdf".equals(bucket.getKey())) {
+ pdfCount = bucket.getDocCount();
+ }
+ }
+ assertEquals("html bucket must count the html docs", htmlDocs, htmlCount);
+ assertEquals("pdf bucket must count the pdf docs", pdfDocs, pdfCount);
+
+ Logger.info(this, "✅ test_getAggregations_termsBucketsHaveCorrectKeysAndCounts passed");
+ }
+
+ /**
+ * Given scenario: a terms aggregation with a nested {@code top_hits} sub-aggregation.
+ * Expected: the OpenSearch path preserves the nested {@code top_docs} as a neutral
+ * {@link Aggregation} carrying {@link SearchHit}s (each with an id and a non-empty source),
+ * reachable via {@code bucket.getAggregations()} — exercising
+ * {@code AggregationBucket.fromOS} sub-aggregation nesting and {@code SearchHits.from(OS hits)},
+ * which the terms-only test does not reach.
+ */
+ @Test
+ public void test_getAggregations_nestedTopHits_preservedOnOpenSearchPath() throws Exception {
+ osSiteSearchAPI.createSiteSearchIndex(IDX_ONE, null, 1);
+
+ for (int i = 0; i < 3; i++) {
+ final SiteSearchResult doc = new SiteSearchResult();
+ doc.setId(DOC_ID + "-th-" + i);
+ doc.setUrl("/agg-th/" + RUN_ID + "/" + i);
+ doc.setTitle("Top hits doc " + i);
+ doc.setMimeType("text/html");
+ doc.setContent("top hits nested sample " + RUN_ID);
+ doc.setContentLength(doc.getContent().length());
+ osSiteSearchAPI.putToIndex(IDX_ONE, doc, "content");
+ }
+
+ final String aggQuery = new JSONObject()
+ .put("size", 0)
+ .put("aggs", new JSONObject().put("by_mime", new JSONObject()
+ .put("terms", new JSONObject().put("field", "mimeType").put("size", 10))
+ .put("aggs", new JSONObject().put("top_docs",
+ new JSONObject().put("top_hits",
+ new JSONObject().put("size", 2)))))).toString();
+
+ final Map aggregations =
+ osSiteSearchAPI.getAggregations(IDX_ONE, aggQuery);
+
+ final Aggregation byMime = aggregations.get("by_mime");
+ assertNotNull("'by_mime' aggregation must be present", byMime);
+ assertFalse("'by_mime' must have buckets", byMime.getBuckets().isEmpty());
+
+ final AggregationBucket firstBucket = byMime.getBuckets().get(0);
+ final Aggregation topDocs = firstBucket.getAggregations().get("top_docs");
+ assertNotNull("nested top_hits sub-aggregation must be preserved on the OS path", topDocs);
+ assertNotNull("top_hits must carry a SearchHits container", topDocs.getHits());
+
+ final List hits = topDocs.getHits().getHits();
+ assertFalse("top_hits must carry at least one hit", hits.isEmpty());
+ final SearchHit hit = hits.get(0);
+ assertNotNull("each top-hit must expose an id", hit.getId());
+ assertFalse("each top-hit must expose its source document", hit.getSourceAsMap().isEmpty());
+
+ Logger.info(this, "✅ test_getAggregations_nestedTopHits_preservedOnOpenSearchPath passed – "
+ + "hits: " + hits.size());
+ }
+
+ /**
+ * Given scenario: a document write targeting an index name carrying characters OpenSearch
+ * forbids.
+ * Expected: putToIndex fails fast with an IllegalArgumentException (the malformed name never
+ * reaches the cluster as a cryptic HTTP 400).
+ */
+ @Test(expected = IllegalArgumentException.class)
+ public void test_putToIndex_invalidIndexName_throwsFast() {
+ final SiteSearchResult doc = new SiteSearchResult();
+ doc.setId(DOC_ID);
+ doc.setContent("x");
+ doc.setContentLength(1);
+ osSiteSearchAPI.putToIndex("Invalid Name/With*Chars", doc, "content");
+ }
+
+ /**
+ * Given scenario: a delete targeting a blank index name.
+ * Expected: deleteFromIndex fails fast with an IllegalArgumentException rather than NPE-ing on
+ * the null/blank name.
+ */
+ @Test(expected = IllegalArgumentException.class)
+ public void test_deleteFromIndex_blankIndexName_throwsFast() {
+ osSiteSearchAPI.deleteFromIndex(" ", DOC_ID);
+ }
+
+ // =======================================================================
+ // Section 3 — Default index activation (VersionedIndicesAPI)
+ // =======================================================================
+
+ /**
+ * Given scenario: a created site-search index that is not yet the default.
+ * Expected: activateIndex makes isDefaultIndex true and orders it first in listIndices;
+ * deactivateIndex clears the default.
+ */
+ @Test
+ public void test_activateDeactivate_shouldToggleDefault() throws Exception {
+ osSiteSearchAPI.createSiteSearchIndex(IDX_ONE, null, 1);
+ assertFalse("Pre-condition: index must not be default yet",
+ osSiteSearchAPI.isDefaultIndex(IDX_ONE));
+
+ osSiteSearchAPI.activateIndex(IDX_ONE);
+ assertTrue("Index must be the default after activation",
+ osSiteSearchAPI.isDefaultIndex(IDX_ONE));
+
+ osSiteSearchAPI.deactivateIndex(IDX_ONE);
+ assertFalse("Index must no longer be the default after deactivation",
+ osSiteSearchAPI.isDefaultIndex(IDX_ONE));
+
+ Logger.info(this, "✅ test_activateDeactivate_shouldToggleDefault passed");
+ }
+
+ /**
+ * Given scenario: two created site-search indices with the second activated as default.
+ * Expected: listIndices returns both and places the active (default) index first.
+ */
+ @Test
+ public void test_listIndices_shouldPlaceDefaultFirst() throws Exception {
+ osSiteSearchAPI.createSiteSearchIndex(IDX_ONE, null, 1);
+ osSiteSearchAPI.createSiteSearchIndex(IDX_TWO, null, 1);
+
+ osSiteSearchAPI.activateIndex(IDX_TWO);
+
+ final List indices = osSiteSearchAPI.listIndices();
+ assertTrue("Both indices must be listed",
+ indices.contains(IDX_ONE) && indices.contains(IDX_TWO));
+ assertEquals("The default index must be first", IDX_TWO, indices.get(0));
+
+ Logger.info(this, "✅ test_listIndices_shouldPlaceDefaultFirst passed – order: " + indices);
+ }
+
+ // =======================================================================
+ // Section 4 — Additional interface methods
+ // =======================================================================
+
+ /**
+ * Given scenario: no closed site-search indices for this run.
+ * Expected: listClosedIndices returns a non-null list without raising.
+ */
+ @Test
+ public void test_listClosedIndices_shouldNotFail() {
+ final List closed = osSiteSearchAPI.listClosedIndices();
+ assertNotNull("listClosedIndices must never return null", closed);
+ Logger.info(this, "✅ test_listClosedIndices_shouldNotFail passed – count: " + closed.size());
+ }
+
+ // =======================================================================
+ // Cleanup helpers
+ // =======================================================================
+
+ private synchronized void cleanupTestData() {
+ for (final String name : List.of(IDX_ONE, IDX_TWO)) {
+ try {
+ if (osIndexAPI.indexExists(name)) {
+ osIndexAPI.delete(name);
+ }
+ } catch (final Exception e) {
+ Logger.warn(this, "Cleanup: error removing OS index '" + name + "': " + e.getMessage());
+ }
+ }
+ cleanupVersionedRows();
+ }
+
+ private void cleanupVersionedRows() {
+ try {
+ new DotConnect()
+ .setSQL("DELETE FROM indicies WHERE index_name LIKE ?")
+ .addParam("%" + SUFFIX + "%")
+ .loadResult();
+ APILocator.getVersionedIndicesAPI().clearCache();
+ } catch (final Exception e) {
+ Logger.warn(this, "Cleanup: error removing versioned DB rows: " + e.getMessage());
+ }
+ }
+}
diff --git a/dotcms-integration/src/test/java/com/dotcms/enterprise/publishing/sitesearch/SiteSearchDualWriteRouterIT.java b/dotcms-integration/src/test/java/com/dotcms/enterprise/publishing/sitesearch/SiteSearchDualWriteRouterIT.java
new file mode 100644
index 000000000000..a400ce4aae36
--- /dev/null
+++ b/dotcms-integration/src/test/java/com/dotcms/enterprise/publishing/sitesearch/SiteSearchDualWriteRouterIT.java
@@ -0,0 +1,246 @@
+package com.dotcms.enterprise.publishing.sitesearch;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assume.assumeFalse;
+
+import com.dotcms.DataProviderWeldRunner;
+import com.dotcms.IntegrationTestBase;
+import com.dotcms.LicenseTestUtil;
+import com.dotcms.content.elasticsearch.business.ESIndexAPI;
+import com.dotcms.content.index.IndexAPIImpl;
+import com.dotcms.content.index.IndexConfigHelper;
+import com.dotcms.content.index.opensearch.OSIndexAPIImpl;
+import com.dotcms.util.IntegrationTestInitService;
+import com.dotmarketing.business.APILocator;
+import com.dotmarketing.common.db.DotConnect;
+import com.dotmarketing.sitesearch.business.SiteSearchAPI;
+import com.dotmarketing.util.Config;
+import com.dotmarketing.util.Logger;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.UUID;
+import javax.enterprise.context.ApplicationScoped;
+import javax.inject.Inject;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+
+/**
+ * Integration tests that exercise Site Search through the phase-aware {@link SiteSearchAPIImpl}
+ * router in a dual-write phase, where every write fans out to both the
+ * Elasticsearch ({@link ESSiteSearchAPI}) and OpenSearch ({@link OSSiteSearchAPI}) leaves.
+ *
+ *
These tests guard two regressions that only reproduce through the router fan-out — the
+ * isolated {@link com.dotcms.content.index.opensearch.OSSiteSearchAPIIntegrationTest} (which calls
+ * the OS leaf directly) cannot catch them:
+ *
+ *
+ *
Shared mutable result across the fan-out. {@code putToIndex} mutates the
+ * {@link SiteSearchResult} map in place — notably {@link SiteSearchResult#setKeywords(String)}
+ * rewrites the {@code keywords} entry from a {@code String} to a {@code List}. With a single
+ * shared instance, the first leaf (ES) corrupted the input the second leaf (OS) then read,
+ * producing {@code ClassCastException: EmptyList cannot be cast to String} on the OS write —
+ * silently dropping every document from OpenSearch. The router now hands each
+ * provider its own copy. This test asserts the document actually lands in OpenSearch.
+ *
Mapping fan-out leak. {@code createSiteSearchIndex} on the ES leaf applied
+ * its mapping through the phase-dispatched {@code ESMappingAPIImpl.putMapping}, which fanned
+ * out a second time to OpenSearch using a {@code .os}-tagged physical name that site-search OS
+ * indices never use → HTTP 404. The create path is now ES-pinned; this test asserts a
+ * router-driven create yields a fully functional, queryable OS index.
+ *
+ *
+ *
Runs only when ES and OS are separate clusters (dual-write requires two endpoints); skipped
+ * via {@link org.junit.Assume#assumeFalse} on the single-cluster {@code opensearch-upgrade}
+ * profile. Registered in {@link com.dotcms.OpenSearchUpgradeSuite}. Run with:
+ *
+ *
+ * @author Fabrizio Araya
+ */
+@ApplicationScoped
+@RunWith(DataProviderWeldRunner.class)
+public class SiteSearchDualWriteRouterIT extends IntegrationTestBase {
+
+ /** Phase 1 — dual-write, ES reads. Writes fan out to [ES, OS]; reads come from ES. */
+ private static final int PHASE_DUAL_WRITE_ES_READS = 1;
+
+ private static final String RUN_ID =
+ UUID.randomUUID().toString().replace("-", "").substring(0, 8);
+
+ /** Numeric suffix so the name matches the {@code sitesearch_} convention. */
+ private static final String SUFFIX = String.valueOf(Math.abs((long) RUN_ID.hashCode()));
+
+ private static final String IDX = "sitesearch_" + SUFFIX;
+ private static final String DOC_ID = "ss-dualwrite-it-" + RUN_ID;
+
+ @Inject
+ private OSSiteSearchAPI osSiteSearchAPI;
+
+ @Inject
+ private OSIndexAPIImpl osIndexAPI;
+
+ /** The phase-aware fan-out router under test. */
+ private SiteSearchAPI router;
+
+ // =======================================================================
+ // Lifecycle
+ // =======================================================================
+
+ @BeforeClass
+ public static void prepare() throws Exception {
+ IntegrationTestInitService.getInstance().init();
+ LicenseTestUtil.getLicense();
+ }
+
+ @Before
+ public void setUp() {
+ // Dual-write fans out to both clusters; a single-cluster profile would collide on the
+ // shared untagged site-search name (and cannot host both leaves), so skip there.
+ assumeFalse("Requires separate ES and OS clusters for dual-write", esSameAsOs());
+ router = APILocator.getSiteSearchAPI();
+ cleanupTestData();
+ setPhase(PHASE_DUAL_WRITE_ES_READS);
+ }
+
+ @After
+ public void tearDown() {
+ setPhase(null);
+ cleanupTestData();
+ }
+
+ // =======================================================================
+ // Tests
+ // =======================================================================
+
+ /**
+ * Given scenario: Phase 1 (dual-write). An index and a single document with {@code keywords}
+ * set are written through the router, fanning out to ES then OS on the same result instance.
+ * Expected: the document reaches OpenSearch (no {@code ClassCastException} on the OS leaf) and
+ * is searchable through the router's ES read path — proving the dual-write completed on both
+ * backends. {@code keywords} round-trips as a {@code List}.
+ */
+ @Test
+ public void test_dualWritePutToIndex_documentReachesBothBackends() throws Exception {
+ router.createSiteSearchIndex(IDX, null, 1);
+
+ final SiteSearchResult doc = new SiteSearchResult();
+ doc.setId(DOC_ID);
+ doc.setUrl("/ss-dualwrite-it/" + RUN_ID);
+ doc.setTitle("Dual-write Site Search IT " + RUN_ID);
+ doc.setMimeType("text/html");
+ doc.setContent("dotcms dual write roundtrip " + RUN_ID);
+ doc.setContentLength(doc.getContent().length());
+ // The exact Bug 1 trigger: keywords enters the map as a raw String. The first leaf in the
+ // fan-out rewrites it to a List; the second leaf must not see that mutation.
+ doc.getMap().put("keywords", "alpha, beta");
+
+ router.putToIndex(IDX, doc, "content");
+
+ // Bug 1 — OpenSearch must have received the document (unpatched: ClassCastException → null).
+ final SiteSearchResult fromOs = osSiteSearchAPI.getFromIndex(IDX, DOC_ID);
+ assertNotNull("Document must be retrievable from OpenSearch after dual-write", fromOs);
+ assertEquals("Document id must match in OpenSearch", DOC_ID, fromOs.getId());
+ assertEquals("keywords must round-trip as a trimmed list",
+ List.of("alpha", "beta"), fromOs.getKeywords());
+
+ // The dual-write also reached ES: in Phase 1 the router reads from ES.
+ final SiteSearchResults esRead = router.search(IDX, "roundtrip", 0, 10);
+ assertNull("ES read must not error: " + esRead.getError(), esRead.getError());
+ assertTrue("Document must be searchable via the router's ES read path",
+ esRead.getTotalResults() >= 1);
+
+ Logger.info(this, "✅ test_dualWritePutToIndex_documentReachesBothBackends passed");
+ }
+
+ /**
+ * Given scenario: Phase 1 (dual-write). A batch of documents is written through the
+ * {@code putToIndex(String, List, String)} router overload. This exercises the list fan-out
+ * path, where each provider must receive its own copy of every result.
+ * Expected: every document lands in OpenSearch.
+ */
+ @Test
+ public void test_dualWriteBatchPutToIndex_allDocumentsReachOpenSearch() throws Exception {
+ router.createSiteSearchIndex(IDX, null, 1);
+
+ final List docs = new ArrayList<>();
+ for (int i = 0; i < 3; i++) {
+ final SiteSearchResult doc = new SiteSearchResult();
+ doc.setId(DOC_ID + "-" + i);
+ doc.setUrl("/ss-dualwrite-batch/" + RUN_ID + "/" + i);
+ doc.setTitle("Batch doc " + i);
+ doc.setMimeType("text/html");
+ doc.setContent("dotcms dual write batch sample " + RUN_ID);
+ doc.setContentLength(doc.getContent().length());
+ doc.getMap().put("keywords", "kw" + i + ", shared");
+ docs.add(doc);
+ }
+
+ router.putToIndex(IDX, docs, "content");
+
+ for (int i = 0; i < 3; i++) {
+ final String id = DOC_ID + "-" + i;
+ assertNotNull("Batch document '" + id + "' must reach OpenSearch",
+ osSiteSearchAPI.getFromIndex(IDX, id));
+ }
+
+ Logger.info(this, "✅ test_dualWriteBatchPutToIndex_allDocumentsReachOpenSearch passed");
+ }
+
+ // =======================================================================
+ // Helpers
+ // =======================================================================
+
+ /**
+ * True when the ES and OS clients are configured against the same cluster endpoint (the
+ * single-cluster {@code opensearch-upgrade} profile). Mirrors the gate used by the core
+ * migration ITs.
+ */
+ private static boolean esSameAsOs() {
+ final String esEndpoint = Config.getStringProperty("DOT_ES_ENDPOINTS",
+ "http://localhost:9207");
+ final String osEndpoint = Config.getStringProperty("OS_ENDPOINTS",
+ "http://localhost:9201");
+ return esEndpoint.trim().equalsIgnoreCase(osEndpoint.trim());
+ }
+
+ private static void setPhase(final Integer ordinal) {
+ Config.setProperty(IndexConfigHelper.MigrationPhase.FLAG_KEY,
+ ordinal == null ? null : String.valueOf(ordinal));
+ }
+
+ private synchronized void cleanupTestData() {
+ try {
+ if (osIndexAPI.indexExists(IDX)) {
+ osIndexAPI.delete(IDX);
+ }
+ } catch (final Exception e) {
+ Logger.warn(this, "Cleanup: error removing OS index '" + IDX + "': " + e.getMessage());
+ }
+ // The dual-write create also lands an ES index; remove it directly on the ES cluster.
+ try {
+ final ESIndexAPI esIndex = ((IndexAPIImpl) APILocator.getESIndexAPI()).esImpl();
+ if (esIndex.indexExists(IDX)) {
+ esIndex.delete(IDX);
+ }
+ } catch (final Exception e) {
+ Logger.warn(this, "Cleanup: error removing ES index '" + IDX + "': " + e.getMessage());
+ }
+ try {
+ new DotConnect()
+ .setSQL("DELETE FROM indicies WHERE index_name LIKE ?")
+ .addParam("%" + SUFFIX + "%")
+ .loadResult();
+ APILocator.getVersionedIndicesAPI().clearCache();
+ } catch (final Exception e) {
+ Logger.warn(this, "Cleanup: error removing versioned DB rows: " + e.getMessage());
+ }
+ }
+}
diff --git a/dotcms-integration/src/test/java/com/dotmarketing/sitesearch/viewtool/SiteSearchWebAPITest.java b/dotcms-integration/src/test/java/com/dotmarketing/sitesearch/viewtool/SiteSearchWebAPITest.java
new file mode 100644
index 000000000000..63e3b79bc6b6
--- /dev/null
+++ b/dotcms-integration/src/test/java/com/dotmarketing/sitesearch/viewtool/SiteSearchWebAPITest.java
@@ -0,0 +1,578 @@
+package com.dotmarketing.sitesearch.viewtool;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+import com.dotcms.IntegrationTestBase;
+import com.dotcms.LicenseTestUtil;
+import com.dotcms.content.index.domain.Aggregation;
+import com.dotcms.content.index.domain.AggregationBucket;
+import com.dotcms.content.index.domain.SearchHit;
+import com.dotcms.enterprise.publishing.sitesearch.SiteSearchResult;
+import com.dotcms.enterprise.publishing.sitesearch.SiteSearchResults;
+import com.dotcms.util.IntegrationTestInitService;
+import com.dotmarketing.business.APILocator;
+import com.dotmarketing.sitesearch.business.SiteSearchAPI;
+import com.dotmarketing.sitesearch.viewtool.SiteSearchWebAPI.Facet;
+import com.dotmarketing.sitesearch.viewtool.SiteSearchWebAPI.InternalWrapperCountDateHistogramFacet;
+import com.dotmarketing.sitesearch.viewtool.SiteSearchWebAPI.InternalWrapperStringTermsFacet;
+import com.dotmarketing.util.Logger;
+import java.util.Date;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import javax.servlet.http.HttpServletRequest;
+import javax.servlet.http.HttpServletResponse;
+import org.apache.velocity.tools.view.context.ViewContext;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+/**
+ * Integration test for the {@link SiteSearchWebAPI} Velocity view tool, modelled on
+ * {@code ContentSearchToolTest}.
+ *
+ *
Exercises the public view-tool surface end-to-end against a live search backend after the
+ * Elasticsearch → OpenSearch neutral-aggregation refactor (#35786), with emphasis on the fields of
+ * the POJOs returned by the refactored methods:
+ *
+ *
{@code search(...)} → {@link SiteSearchResults} / {@link SiteSearchResult} fields, the
+ * alias path, the default-index path, pagination and error states.
+ *
{@code getAggregations(...)} → the neutral {@link Aggregation} / {@link AggregationBucket}
+ * tree: name/type/buckets, doc counts, {@code getKeyAsNumber} (numeric histogram), and the
+ * nested {@code top_hits} {@link SearchHit}s.
+ *
{@code getFacets(...)} → all three legacy wrappers: string-terms, date/numeric-histogram and
+ * the plain {@link Facet} fallback, plus their entry POJOs.
The tool resolves its backend through {@code APILocator.getSiteSearchAPI()} — now the
+ * {@code SiteSearchAPIImpl} phase router — so this also proves the router wiring did not break the
+ * legacy view-tool contract. Runs in the default integration profile (migration Phase 0 →
+ * Elasticsearch), like {@code ContentSearchToolTest}; no OpenSearch container is required.
+ *
+ * @author Fabrizio Araya
+ */
+public class SiteSearchWebAPITest extends IntegrationTestBase {
+
+ private static final long SUFFIX = System.currentTimeMillis();
+ private static final String IDX = "sitesearch_" + SUFFIX;
+ private static final String ALIAS = "ss_it_alias_" + SUFFIX;
+
+ /** Unique token embedded in every indexed doc so the text query matches only this run's data. */
+ private static final String TOKEN = "ssqa" + SUFFIX;
+
+ private static final String MIME_HTML = "text/html";
+ private static final String MIME_PDF = "application/pdf";
+ private static final Set EXPECTED_MIMES = Set.of(MIME_HTML, MIME_PDF);
+
+ /** 3 html docs + 2 pdf docs = 5 docs, all carrying TOKEN. */
+ private static final int HTML_DOCS = 3;
+ private static final int PDF_DOCS = 2;
+ private static final int TOTAL_DOCS = HTML_DOCS + PDF_DOCS;
+
+ // ---- Queries (JSON, so search() skips the request-host lookup) -----------------------------
+
+ private static final String SEARCH_TOKEN =
+ "{\"query\":{\"query_string\":{\"query\":\"" + "TOKEN_PLACEHOLDER"
+ + "\",\"default_field\":\"*\"}}}";
+
+ private static final String TERMS_AGG =
+ "{\"size\":0,\"aggs\":{\"by_mime\":{\"terms\":{\"field\":\"mimeType\",\"size\":10}}}}";
+
+ private static final String NESTED_AGG =
+ "{\"size\":0,\"aggs\":{\"by_mime\":{\"terms\":{\"field\":\"mimeType\",\"size\":10},"
+ + "\"aggs\":{\"top_docs\":{\"top_hits\":{\"size\":2}}}}}}";
+
+ private static final String HISTO_AGG =
+ "{\"size\":0,\"aggs\":{\"by_len\":{\"histogram\":{\"field\":\"contentLength\","
+ + "\"interval\":25}}}}";
+
+ /** Date histogram over the {@code modified} field — one bucket per UTC day. */
+ private static final String DATE_HISTO_AGG =
+ "{\"size\":0,\"aggs\":{\"by_day\":{\"date_histogram\":{\"field\":\"modified\","
+ + "\"calendar_interval\":\"day\"}}}}";
+
+ /** One day in millis, used to spread each doc's {@code modified} date into its own day bucket. */
+ private static final long DAY_MILLIS = 86_400_000L;
+
+ /** Fixed UTC-midnight base (2024-01-15T00:00:00Z) so the date-histogram keys are deterministic. */
+ private static final long BASE_MODIFIED = 1_705_276_800_000L;
+
+ /** Query matches no doc, so the terms aggregation comes back with empty buckets. */
+ private static final String EMPTY_AGG =
+ "{\"size\":0,\"query\":{\"term\":{\"mimeType\":\"zzz/none\"}},"
+ + "\"aggs\":{\"empty\":{\"terms\":{\"field\":\"mimeType\",\"size\":10}}}}";
+
+ private static SiteSearchAPI siteSearchAPI;
+
+ @BeforeClass
+ public static void prepare() throws Exception {
+ IntegrationTestInitService.getInstance().init();
+ LicenseTestUtil.getLicense();
+
+ siteSearchAPI = APILocator.getSiteSearchAPI();
+
+ // Create the index WITH an alias (so the alias search path is exercised) and activate it as
+ // the default (so the default-index search path is exercised).
+ siteSearchAPI.createSiteSearchIndex(IDX, ALIAS, 1);
+ siteSearchAPI.activateIndex(IDX);
+
+ for (int i = 0; i < TOTAL_DOCS; i++) {
+ final boolean html = i < HTML_DOCS;
+ final SiteSearchResult doc = new SiteSearchResult();
+ doc.setId("ss-it-" + SUFFIX + "-" + i);
+ doc.setUrl("/site-search-webapi-it/" + i);
+ doc.setTitle("Site Search WebAPI IT doc " + i);
+ doc.setHost("demo.dotcms.com");
+ doc.setAuthor("qa-author-" + i);
+ doc.setMimeType(html ? MIME_HTML : MIME_PDF);
+ // Vary the body length so the numeric histogram on contentLength spreads over buckets.
+ doc.setContent("dotcms site search viewtool integration " + TOKEN
+ + " ".repeat(i * 30));
+ doc.setContentLength(doc.getContent().length());
+ // Distinct UTC-midnight day per doc so the date histogram on `modified` yields one
+ // populated bucket per doc (exercises the ZonedDateTime -> epoch-millis key conversion).
+ doc.setModified(new Date(BASE_MODIFIED + (i * DAY_MILLIS)));
+ siteSearchAPI.putToIndex(IDX, doc, "content");
+ }
+ }
+
+ @AfterClass
+ public static void cleanup() {
+ try {
+ siteSearchAPI.deactivateIndex(IDX);
+ } catch (final Exception e) {
+ Logger.warn(SiteSearchWebAPITest.class, "Cleanup: deactivate failed: " + e.getMessage());
+ }
+ try {
+ APILocator.getESIndexAPI()
+ .delete(APILocator.getESIndexAPI().getNameWithClusterIDPrefix(IDX));
+ } catch (final Exception e) {
+ Logger.warn(SiteSearchWebAPITest.class, "Cleanup: delete failed: " + e.getMessage());
+ }
+ }
+
+ /** Builds a {@link SiteSearchWebAPI} initialized with a mock request/response. */
+ private SiteSearchWebAPI siteSearchWebAPI() {
+ final ViewContext viewContext = mock(ViewContext.class);
+ final HttpServletRequest request = mock(HttpServletRequest.class);
+ final HttpServletResponse response = mock(HttpServletResponse.class);
+ when(viewContext.getRequest()).thenReturn(request);
+ when(viewContext.getResponse()).thenReturn(response);
+
+ final SiteSearchWebAPI tool = new SiteSearchWebAPI();
+ tool.init(viewContext);
+ return tool;
+ }
+
+ private static String searchToken() {
+ return SEARCH_TOKEN.replace("TOKEN_PLACEHOLDER", TOKEN);
+ }
+
+ // =========================================================================
+ // listSearchIndicies
+ // =========================================================================
+
+ /**
+ * Given scenario: a populated, active site-search index.
+ * Expected: listSearchIndicies() (and its legacy-typo alias) returns the created index.
+ */
+ @Test
+ public void listSearchIndicies_containsCreatedIndex() {
+ final SiteSearchWebAPI tool = siteSearchWebAPI();
+
+ assertTrue("listSearchIndicies() must contain the created index",
+ tool.listSearchIndicies().contains(IDX));
+ assertTrue("legacy-typo alias listSearchIncidies() must behave identically",
+ tool.listSearchIncidies().contains(IDX));
+
+ Logger.info(this, "✅ listSearchIndicies_containsCreatedIndex passed");
+ }
+
+ // =========================================================================
+ // search — SiteSearchResults / SiteSearchResult field coverage
+ // =========================================================================
+
+ /**
+ * Given scenario: 5 docs carrying TOKEN in the default (active) index.
+ * Expected: the default-index search (3-arg) populates every SiteSearchResults field and each
+ * SiteSearchResult exposes id/url/title/mimeType/score.
+ */
+ @Test
+ public void search_defaultIndex_populatesResultFields() throws Exception {
+ final SiteSearchWebAPI tool = siteSearchWebAPI();
+
+ final SiteSearchResults results = tool.search(searchToken(), 0, 10);
+
+ assertNull("Search must not return an error: " + results.getError(), results.getError());
+ assertEquals("All TOKEN docs must be counted", TOTAL_DOCS, results.getTotalResults());
+ assertEquals("getTotalHits() alias must match getTotalResults()",
+ results.getTotalResults(), results.getTotalHits());
+ assertEquals("Result rows must match the total (under the page size)",
+ TOTAL_DOCS, results.getResults().size());
+ assertTrue("maxScore must be positive for a matching query", results.getMaxScore() > 0);
+ assertEquals("offset must reflect the requested start", 0, results.getOffset());
+ assertEquals("start alias must match offset", results.getOffset(), results.getStart());
+ assertEquals("limit must reflect the requested rows", 10, results.getLimit());
+ assertNotNull("query echo must be set", results.getQuery());
+ assertNotNull("took must be set", results.getTook());
+
+ for (final SiteSearchResult hit : results.getResults()) {
+ assertNotNull("each hit must carry an id", hit.getId());
+ assertTrue("each hit id must belong to this run", hit.getId().startsWith("ss-it-" + SUFFIX));
+ assertNotNull("each hit must carry a url", hit.getUrl());
+ assertNotNull("each hit must carry a title", hit.getTitle());
+ assertTrue("each hit mimeType must be one of the indexed types",
+ EXPECTED_MIMES.contains(hit.getMimeType()));
+ assertTrue("each hit must have a positive score", hit.getScore() > 0);
+ }
+
+ Logger.info(this, "✅ search_defaultIndex_populatesResultFields passed – hits: "
+ + results.getTotalResults());
+ }
+
+ /**
+ * Given scenario: the index was created with an alias.
+ * Expected: the 4-arg alias search resolves the alias to the backing index and returns the docs.
+ */
+ @Test
+ public void search_byAlias_resolvesIndex() {
+ final SiteSearchWebAPI tool = siteSearchWebAPI();
+
+ final SiteSearchResults results = tool.search(ALIAS, searchToken(), 0, 10);
+
+ assertNull("Alias search must not return an error: " + results.getError(),
+ results.getError());
+ assertEquals("Alias search must reach the same docs", TOTAL_DOCS, results.getTotalResults());
+
+ Logger.info(this, "✅ search_byAlias_resolvesIndex passed");
+ }
+
+ /**
+ * Given scenario: a JSON body that caps the page size to 2.
+ * Expected: the returned rows are capped to the page size while the total still reflects all
+ * matches — covering the offset/limit/totalResults fields together.
+ */
+ @Test
+ public void search_pagination_capsReturnedRows() throws Exception {
+ final SiteSearchWebAPI tool = siteSearchWebAPI();
+
+ final String paged = "{\"size\":2,\"query\":{\"query_string\":{\"query\":\"" + TOKEN
+ + "\",\"default_field\":\"*\"}}}";
+ final SiteSearchResults results = tool.search(paged, 0, 2);
+
+ assertNull("Paged search must not error: " + results.getError(), results.getError());
+ assertEquals("Total must still reflect every match", TOTAL_DOCS, results.getTotalResults());
+ assertTrue("Returned rows must be capped by the page size",
+ results.getResults().size() <= 2);
+
+ Logger.info(this, "✅ search_pagination_capsReturnedRows passed – returned: "
+ + results.getResults().size());
+ }
+
+ /**
+ * Given scenario: a query for a token that matches nothing.
+ * Expected: zero results, an empty result list and no error (a clean empty response).
+ */
+ @Test
+ public void search_noMatch_returnsEmptyWithoutError() throws Exception {
+ final SiteSearchWebAPI tool = siteSearchWebAPI();
+
+ final String noMatch = "{\"query\":{\"query_string\":{\"query\":\"zzznomatchzzz" + SUFFIX
+ + "\",\"default_field\":\"*\"}}}";
+ final SiteSearchResults results = tool.search(noMatch, 0, 10);
+
+ assertNull("No-match search must not error", results.getError());
+ assertEquals("No-match search must count zero", 0, results.getTotalResults());
+ assertTrue("No-match search must return no rows", results.getResults().isEmpty());
+
+ Logger.info(this, "✅ search_noMatch_returnsEmptyWithoutError passed");
+ }
+
+ /**
+ * Given scenario: a null query.
+ * Expected: the tool reports an error on the SiteSearchResults rather than throwing.
+ */
+ @Test
+ public void search_nullQuery_setsError() throws Exception {
+ final SiteSearchWebAPI tool = siteSearchWebAPI();
+
+ final SiteSearchResults results = tool.search(null, 0, 10);
+
+ assertNotNull("A null query must surface an error", results.getError());
+ Logger.info(this, "✅ search_nullQuery_setsError passed – error: " + results.getError());
+ }
+
+ // =========================================================================
+ // getAggregations — Aggregation / AggregationBucket field coverage
+ // =========================================================================
+
+ /**
+ * Given scenario: 3 html + 2 pdf docs.
+ * Expected: the terms aggregation on mimeType exposes a populated neutral Aggregation — name,
+ * type, two buckets with correct doc counts, string keys, null numeric keys (non-numeric) and no
+ * top-hits — covering the multi-bucket AggregationBucket accessors.
+ */
+ @Test
+ public void getAggregations_termsBuckets_fieldsPopulated() throws Exception {
+ final SiteSearchWebAPI tool = siteSearchWebAPI();
+
+ final Map aggregations = tool.getAggregations(IDX, TERMS_AGG);
+
+ assertNotNull("Aggregations map must not be null", aggregations);
+ final Aggregation byMime = aggregations.get("by_mime");
+ assertNotNull("'by_mime' aggregation must be present", byMime);
+ assertEquals("aggregation name must round-trip", "by_mime", byMime.getName());
+ assertNotNull("aggregation type must be reported", byMime.getType());
+ assertNull("a terms aggregation carries no top-hits", byMime.getHits());
+ assertEquals("there must be one bucket per mimeType", 2, byMime.getBuckets().size());
+
+ long htmlCount = -1;
+ long pdfCount = -1;
+ for (final AggregationBucket bucket : byMime.getBuckets()) {
+ assertTrue("bucket key must be a known mimeType",
+ EXPECTED_MIMES.contains(bucket.getKey()));
+ assertEquals("getKeyAsString must mirror getKey", bucket.getKey(),
+ bucket.getKeyAsString());
+ assertNull("a non-numeric key must yield a null number", bucket.getKeyAsNumber());
+ assertTrue("each bucket must carry documents", bucket.getDocCount() > 0);
+ assertTrue("a terms bucket has no sub-aggregations here",
+ bucket.getAggregations().isEmpty());
+ if (MIME_HTML.equals(bucket.getKey())) {
+ htmlCount = bucket.getDocCount();
+ } else if (MIME_PDF.equals(bucket.getKey())) {
+ pdfCount = bucket.getDocCount();
+ }
+ }
+ assertEquals("html bucket must count the html docs", HTML_DOCS, htmlCount);
+ assertEquals("pdf bucket must count the pdf docs", PDF_DOCS, pdfCount);
+
+ Logger.info(this, "✅ getAggregations_termsBuckets_fieldsPopulated passed");
+ }
+
+ /**
+ * Given scenario: a terms aggregation with a nested top_hits sub-aggregation.
+ * Expected: the neutral tree preserves the nested {@code top_docs} as an Aggregation that carries
+ * SearchHits, and each SearchHit exposes id and source — covering getHits()/SearchHit fields and
+ * the nested getAggregations() path.
+ */
+ @Test
+ public void getAggregations_nestedTopHits_preserved() throws Exception {
+ final SiteSearchWebAPI tool = siteSearchWebAPI();
+
+ final Map aggregations = tool.getAggregations(IDX, NESTED_AGG);
+ final Aggregation byMime = aggregations.get("by_mime");
+ assertNotNull("'by_mime' aggregation must be present", byMime);
+ assertFalse("'by_mime' must have buckets", byMime.getBuckets().isEmpty());
+
+ final AggregationBucket firstBucket = byMime.getBuckets().getFirst();
+ final Aggregation topDocs = firstBucket.getAggregations().get("top_docs");
+ assertNotNull("nested top_hits sub-aggregation must be preserved", topDocs);
+ assertNotNull("top_hits must carry a SearchHits container", topDocs.getHits());
+
+ final List hits = topDocs.getHits().getHits();
+ assertFalse("top_hits must carry at least one hit", hits.isEmpty());
+ final SearchHit hit = hits.getFirst();
+ assertNotNull("each top-hit must expose an id", hit.getId());
+ assertFalse("each top-hit must expose its source document",
+ hit.getSourceAsMap().isEmpty());
+
+ Logger.info(this, "✅ getAggregations_nestedTopHits_preserved passed – topHits: " + hits.size());
+ }
+
+ /**
+ * Given scenario: a numeric histogram on the long field {@code contentLength}.
+ * Expected: the buckets carry numeric keys, so {@link AggregationBucket#getKeyAsNumber()} returns
+ * a non-null Number — covering the numeric-key path (distinct from the non-numeric terms keys).
+ */
+ @Test
+ public void getAggregations_numericHistogram_keyAsNumber() throws Exception {
+ final SiteSearchWebAPI tool = siteSearchWebAPI();
+
+ final Map aggregations = tool.getAggregations(IDX, HISTO_AGG);
+ final Aggregation byLen = aggregations.get("by_len");
+ assertNotNull("'by_len' histogram aggregation must be present", byLen);
+ assertTrue("histogram type must be reported as a histogram",
+ byLen.getType().contains("histogram"));
+ assertFalse("histogram must produce buckets", byLen.getBuckets().isEmpty());
+
+ boolean sawPopulatedNumericBucket = false;
+ for (final AggregationBucket bucket : byLen.getBuckets()) {
+ assertNotNull("a histogram bucket key must be numeric", bucket.getKeyAsNumber());
+ if (bucket.getDocCount() > 0) {
+ sawPopulatedNumericBucket = true;
+ }
+ }
+ assertTrue("at least one histogram bucket must contain documents", sawPopulatedNumericBucket);
+
+ Logger.info(this, "✅ getAggregations_numericHistogram_keyAsNumber passed");
+ }
+
+ /**
+ * Given scenario: a {@code date_histogram} on the {@code modified} date field.
+ * Expected: each bucket key is normalized to a numeric epoch-millis timestamp (the ES
+ * {@code date_histogram} key is a {@code ZonedDateTime} under the hood), so
+ * {@link AggregationBucket#getKeyAsNumber()} returns a real timestamp and {@code getKeyAsString}
+ * mirrors it — covering the date-histogram key path, which is distinct from the numeric-histogram
+ * one and was otherwise untested end-to-end.
+ */
+ @Test
+ public void getAggregations_dateHistogram_keyAsEpochMillis() throws Exception {
+ final SiteSearchWebAPI tool = siteSearchWebAPI();
+
+ final Map aggregations = tool.getAggregations(IDX, DATE_HISTO_AGG);
+ final Aggregation byDay = aggregations.get("by_day");
+ assertNotNull("'by_day' date-histogram aggregation must be present", byDay);
+ assertTrue("date_histogram type must be reported as a histogram",
+ byDay.getType().contains("histogram"));
+ assertFalse("date histogram must produce buckets", byDay.getBuckets().isEmpty());
+
+ long totalDocs = 0;
+ for (final AggregationBucket bucket : byDay.getBuckets()) {
+ final Number key = bucket.getKeyAsNumber();
+ // The crux: a ZonedDateTime key must surface as numeric epoch-millis here, NOT a
+ // formatted date string (which would make getKeyAsNumber return null).
+ assertNotNull("a date-histogram bucket key must be numeric (epoch-millis)", key);
+ assertTrue("the key must be a real epoch-millis timestamp (>= the base UTC day)",
+ key.longValue() >= BASE_MODIFIED);
+ assertEquals("getKeyAsString must mirror the numeric epoch-millis",
+ String.valueOf(key.longValue()), bucket.getKeyAsString());
+ totalDocs += bucket.getDocCount();
+ }
+ assertEquals("every indexed doc must fall into exactly one day bucket", TOTAL_DOCS, totalDocs);
+
+ Logger.info(this, "✅ getAggregations_dateHistogram_keyAsEpochMillis passed – buckets: "
+ + byDay.getBuckets().size());
+ }
+
+ // =========================================================================
+ // getFacets — legacy wrapper coverage (terms / histogram / plain)
+ // =========================================================================
+
+ /**
+ * Given scenario: a terms aggregation with non-empty buckets.
+ * Expected: getFacets wraps it as an {@link InternalWrapperStringTermsFacet} exposing name/type
+ * and term entries with term + count — covering the legacy string-terms facet POJO.
+ */
+ @Test
+ public void getFacets_termsAggregation_wrapsAsStringTermsFacet() throws Exception {
+ final SiteSearchWebAPI tool = siteSearchWebAPI();
+
+ final Map facets = tool.getFacets(IDX, TERMS_AGG);
+ assertNotNull("Facets map must not be null", facets);
+
+ final Facet facet = facets.get("by_mime");
+ assertNotNull("'by_mime' facet must be present", facet);
+ assertEquals("facet name must round-trip", "by_mime", facet.getName());
+ assertNotNull("facet type must be reported", facet.getType());
+ assertTrue("non-empty terms aggregation must map to InternalWrapperStringTermsFacet",
+ facet instanceof InternalWrapperStringTermsFacet);
+
+ final InternalWrapperStringTermsFacet termsFacet = (InternalWrapperStringTermsFacet) facet;
+ assertEquals("there must be one entry per bucket", 2, termsFacet.entries().size());
+
+ long htmlCount = -1;
+ for (final var entry : termsFacet.entries()) {
+ assertTrue("entry term must be a known mimeType", EXPECTED_MIMES.contains(entry.getTerm()));
+ assertTrue("entry count must be positive", entry.getCount() > 0);
+ if (MIME_HTML.equals(entry.getTerm())) {
+ htmlCount = entry.getCount();
+ }
+ }
+ assertEquals("html term entry must count the html docs", HTML_DOCS, htmlCount);
+
+ Logger.info(this, "✅ getFacets_termsAggregation_wrapsAsStringTermsFacet passed");
+ }
+
+ /**
+ * Given scenario: a numeric histogram aggregation.
+ * Expected: getFacets wraps it as an {@link InternalWrapperCountDateHistogramFacet} exposing
+ * CountEntry rows with time (the numeric key) and count — covering the legacy histogram facet
+ * POJO and the {@code isHistogram} branch.
+ */
+ @Test
+ public void getFacets_histogramAggregation_wrapsAsCountHistogramFacet() throws Exception {
+ final SiteSearchWebAPI tool = siteSearchWebAPI();
+
+ final Map facets = tool.getFacets(IDX, HISTO_AGG);
+ final Facet facet = facets.get("by_len");
+ assertNotNull("'by_len' facet must be present", facet);
+ assertTrue("a histogram aggregation must map to InternalWrapperCountDateHistogramFacet",
+ facet instanceof InternalWrapperCountDateHistogramFacet);
+
+ final InternalWrapperCountDateHistogramFacet histoFacet =
+ (InternalWrapperCountDateHistogramFacet) facet;
+ assertFalse("histogram facet must expose count entries", histoFacet.entries().isEmpty());
+
+ boolean sawPopulatedEntry = false;
+ for (final var entry : histoFacet.entries()) {
+ assertTrue("entry time (numeric key) must be non-negative", entry.getTime() >= 0);
+ if (entry.getCount() > 0) {
+ sawPopulatedEntry = true;
+ }
+ }
+ assertTrue("at least one histogram entry must carry a count", sawPopulatedEntry);
+
+ Logger.info(this, "✅ getFacets_histogramAggregation_wrapsAsCountHistogramFacet passed");
+ }
+
+ /**
+ * Given scenario: a {@code date_histogram} on the {@code modified} field.
+ * Expected: getFacets wraps it as an {@link InternalWrapperCountDateHistogramFacet} whose
+ * CountEntry rows carry the day's epoch-millis as {@code time} — proving the legacy date-facet
+ * path (which reads {@code getKeyAsNumber().longValue()}) surfaces a real timestamp rather than
+ * the {@code 0L} fallback used when the key fails to parse as a number.
+ */
+ @Test
+ public void getFacets_dateHistogram_exposesEpochMillisTime() throws Exception {
+ final SiteSearchWebAPI tool = siteSearchWebAPI();
+
+ final Map facets = tool.getFacets(IDX, DATE_HISTO_AGG);
+ final Facet facet = facets.get("by_day");
+ assertNotNull("'by_day' facet must be present", facet);
+ assertTrue("a date histogram must map to InternalWrapperCountDateHistogramFacet",
+ facet instanceof InternalWrapperCountDateHistogramFacet);
+
+ final InternalWrapperCountDateHistogramFacet histoFacet =
+ (InternalWrapperCountDateHistogramFacet) facet;
+ assertFalse("date histogram facet must expose count entries", histoFacet.entries().isEmpty());
+
+ long totalCount = 0;
+ for (final var entry : histoFacet.entries()) {
+ assertTrue("each entry time must be a real epoch-millis (>= the base UTC day), not the "
+ + "0L parse-failure fallback", entry.getTime() >= BASE_MODIFIED);
+ totalCount += entry.getCount();
+ }
+ assertEquals("every indexed doc must be counted across the day entries", TOTAL_DOCS, totalCount);
+
+ Logger.info(this, "✅ getFacets_dateHistogram_exposesEpochMillisTime passed");
+ }
+
+ /**
+ * Given scenario: a terms aggregation whose query matches no document (empty buckets).
+ * Expected: getFacets falls back to a plain {@link Facet} (neither wrapper), still exposing
+ * name and type — covering the empty-bucket branch.
+ */
+ @Test
+ public void getFacets_emptyBuckets_fallsBackToPlainFacet() throws Exception {
+ final SiteSearchWebAPI tool = siteSearchWebAPI();
+
+ final Map facets = tool.getFacets(IDX, EMPTY_AGG);
+ final Facet facet = facets.get("empty");
+ assertNotNull("'empty' facet must be present", facet);
+ assertEquals("facet name must round-trip", "empty", facet.getName());
+ assertNotNull("facet type must be reported", facet.getType());
+ assertFalse("an empty terms aggregation must NOT be a string-terms wrapper",
+ facet instanceof InternalWrapperStringTermsFacet);
+ assertFalse("an empty terms aggregation must NOT be a histogram wrapper",
+ facet instanceof InternalWrapperCountDateHistogramFacet);
+
+ Logger.info(this, "✅ getFacets_emptyBuckets_fallsBackToPlainFacet passed");
+ }
+}