1
1
mirror of https://github.com/MarginaliaSearch/MarginaliaSearch.git synced 2025-10-07 03:42:39 +02:00

Compare commits

...

4 Commits

Author SHA1 Message Date
Viktor Lofgren
c67a635103 (search, experimental) Add a few debugging tracks to the search UI 2025-01-10 15:44:44 +01:00
Viktor Lofgren
20b24133fb (search, experimental) Add a few debugging tracks to the search UI 2025-01-10 15:34:48 +01:00
Viktor Lofgren
f2567677e8 (index-client) Clean up index client code
Improve error handling.  This should be a relatively rare case, but we don't want one bad index partition to blow up the entire query.
2025-01-10 15:17:07 +01:00
Viktor Lofgren
bc2c2061f2 (index-client) Clean up index client code
This should have the rpc stream reception be performed in parallel in separate threads, rather blocking sequentially in the main thread, hopefully giving a slight performance boost.
2025-01-10 15:14:42 +01:00
7 changed files with 178 additions and 57 deletions

View File

@@ -16,20 +16,19 @@ import org.slf4j.LoggerFactory;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Comparator; import java.util.Comparator;
import java.util.Iterator;
import java.util.List; import java.util.List;
import java.util.concurrent.CompletableFuture; import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ExecutorService; import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors; import java.util.concurrent.Executors;
import java.util.concurrent.atomic.AtomicInteger;
import static java.lang.Math.clamp; import java.util.function.Consumer;
@Singleton @Singleton
public class IndexClient { public class IndexClient {
private static final Logger logger = LoggerFactory.getLogger(IndexClient.class); private static final Logger logger = LoggerFactory.getLogger(IndexClient.class);
private final GrpcMultiNodeChannelPool<IndexApiGrpc.IndexApiBlockingStub> channelPool; private final GrpcMultiNodeChannelPool<IndexApiGrpc.IndexApiBlockingStub> channelPool;
private final DomainBlacklistImpl blacklist; private final DomainBlacklistImpl blacklist;
private static final ExecutorService executor = Executors.newVirtualThreadPerTaskExecutor(); private static final ExecutorService executor = Executors.newCachedThreadPool();
@Inject @Inject
public IndexClient(GrpcChannelPoolFactory channelPoolFactory, DomainBlacklistImpl blacklist) { public IndexClient(GrpcChannelPoolFactory channelPoolFactory, DomainBlacklistImpl blacklist) {
@@ -51,40 +50,37 @@ public class IndexClient {
/** Execute a query on the index partitions and return the combined results. */ /** Execute a query on the index partitions and return the combined results. */
public AggregateQueryResponse executeQueries(RpcIndexQuery indexRequest, Pagination pagination) { public AggregateQueryResponse executeQueries(RpcIndexQuery indexRequest, Pagination pagination) {
List<CompletableFuture<Iterator<RpcDecoratedResultItem>>> futures =
channelPool.call(IndexApiGrpc.IndexApiBlockingStub::query)
.async(executor)
.runEach(indexRequest);
final int requestedMaxResults = indexRequest.getQueryLimits().getResultsTotal(); final int requestedMaxResults = indexRequest.getQueryLimits().getResultsTotal();
final int resultsUpperBound = requestedMaxResults * channelPool.getNumNodes();
List<RpcDecoratedResultItem> results = new ArrayList<>(resultsUpperBound); AtomicInteger totalNumResults = new AtomicInteger(0);
for (var future : futures) { List<RpcDecoratedResultItem> results =
try { channelPool.call(IndexApiGrpc.IndexApiBlockingStub::query)
future.get().forEachRemaining(results::add); .async(executor)
} .runEach(indexRequest)
catch (Exception e) { .stream()
logger.error("Downstream exception", e); .map(future -> future.thenApply(iterator -> {
} List<RpcDecoratedResultItem> ret = new ArrayList<>(requestedMaxResults);
} iterator.forEachRemaining(ret::add);
totalNumResults.addAndGet(ret.size());
return ret;
}))
.mapMulti((CompletableFuture<List<RpcDecoratedResultItem>> fut, Consumer<List<RpcDecoratedResultItem>> c) ->{
try {
c.accept(fut.join());
} catch (Exception e) {
logger.error("Error while fetching results", e);
}
})
.flatMap(List::stream)
.filter(item -> !isBlacklisted(item))
.sorted(comparator)
.skip(Math.max(0, (pagination.page - 1) * pagination.pageSize))
.limit(pagination.pageSize)
.toList();
// Sort the results by ranking score and remove blacklisted domains return new AggregateQueryResponse(results, pagination.page(), totalNumResults.get());
results.sort(comparator);
results.removeIf(this::isBlacklisted);
int numReceivedResults = results.size();
// pagination is typically 1-indexed, so we need to adjust the start and end indices
int indexStart = (pagination.page - 1) * pagination.pageSize;
int indexEnd = (pagination.page) * pagination.pageSize;
results = results.subList(
clamp(indexStart, 0, Math.max(0, results.size() - 1)), // from is inclusive, so subtract 1 from size()
clamp(indexEnd, 0, results.size()));
return new AggregateQueryResponse(results, pagination.page(), numReceivedResults);
} }
private boolean isBlacklisted(RpcDecoratedResultItem item) { private boolean isBlacklisted(RpcDecoratedResultItem item) {

View File

@@ -294,4 +294,105 @@ public class SearchOperator {
} }
} }
public DecoratedSearchResults doSearchFastTrack1(SearchParameters userParams) {
var queryParams = paramFactory.forRegularSearch(userParams);
QueryResponse queryResponse = queryClient.search(queryParams);
var queryResults = getResultsFromQuery(queryResponse).results;
// Cluster the results based on the query response
List<ClusteredUrlDetails> clusteredResults = SearchResultClusterer
.selectStrategy(queryResponse)
.clusterResults(queryResults, 25);
String focusDomain = queryResponse.domain();
int focusDomainId = (focusDomain == null || focusDomain.isBlank())
? -1
: domainQueries.tryGetDomainId(new EdgeDomain(focusDomain)).orElse(0);
List<ResultsPage> resultPages = IntStream.rangeClosed(1, queryResponse.totalPages())
.mapToObj(number -> new ResultsPage(
number,
number == userParams.page(),
userParams.withPage(number).renderUrl()
))
.toList();
// Return the results to the user
return DecoratedSearchResults.builder()
.params(userParams)
.problems(List.of())
.evalResult("")
.results(clusteredResults)
.filters(new SearchFilters(userParams))
.focusDomain(focusDomain)
.focusDomainId(focusDomainId)
.resultPages(resultPages)
.build();
}
public DecoratedSearchResults doSearchFastTrack2(SearchParameters userParams) {
var queryParams = paramFactory.forRegularSearch(userParams);
QueryResponse queryResponse = queryClient.search(queryParams);
var queryResults = getResultsFromQuery(queryResponse).results;
// Cluster the results based on the query response
List<ClusteredUrlDetails> clusteredResults = SearchResultClusterer
.noOpClustering(queryResults, queryResults.size());
String focusDomain = queryResponse.domain();
int focusDomainId = (focusDomain == null || focusDomain.isBlank())
? -1
: domainQueries.tryGetDomainId(new EdgeDomain(focusDomain)).orElse(0);
List<ResultsPage> resultPages = IntStream.rangeClosed(1, queryResponse.totalPages())
.mapToObj(number -> new ResultsPage(
number,
number == userParams.page(),
userParams.withPage(number).renderUrl()
))
.toList();
// Return the results to the user
return DecoratedSearchResults.builder()
.params(userParams)
.problems(List.of())
.evalResult("")
.results(clusteredResults)
.filters(new SearchFilters(userParams))
.focusDomain(focusDomain)
.focusDomainId(focusDomainId)
.resultPages(resultPages)
.build();
}
public DecoratedSearchResults doSearchFastTrack3(SearchParameters userParams) {
var queryParams = paramFactory.forRegularSearch(userParams);
QueryResponse queryResponse = queryClient.search(queryParams);
var queryResults = getResultsFromQuery(queryResponse).results;
// Cluster the results based on the query response
List<ClusteredUrlDetails> clusteredResults = SearchResultClusterer
.noOpClustering(queryResults, queryResults.size());
List<ResultsPage> resultPages = IntStream.rangeClosed(1, queryResponse.totalPages())
.mapToObj(number -> new ResultsPage(
number,
number == userParams.page(),
userParams.withPage(number).renderUrl()
))
.toList();
// Return the results to the user
return DecoratedSearchResults.builder()
.params(userParams)
.problems(List.of())
.evalResult("")
.results(clusteredResults)
.filters(new SearchFilters(userParams))
.focusDomain(null)
.focusDomainId(-1)
.resultPages(resultPages)
.build();
}
} }

View File

@@ -17,13 +17,13 @@ public class SearchResultClusterer {
public static SearchResultClusterStrategy selectStrategy(QueryResponse response) { public static SearchResultClusterStrategy selectStrategy(QueryResponse response) {
if (response.domain() != null && !response.domain().isBlank()) if (response.domain() != null && !response.domain().isBlank())
return SearchResultClusterer::noOp; return SearchResultClusterer::noOpClustering;
return SearchResultClusterer::byDomain; return SearchResultClusterer::byDomain;
} }
/** No clustering, just return the results as is */ /** No clustering, just return the results as is */
private static List<ClusteredUrlDetails> noOp(List<UrlDetails> results, int total) { public static List<ClusteredUrlDetails> noOpClustering(List<UrlDetails> results, int total) {
if (results.isEmpty()) if (results.isEmpty())
return List.of(); return List.of();

View File

@@ -21,7 +21,8 @@ public record SearchParameters(WebsiteUrl url,
SearchTitleParameter searchTitle, SearchTitleParameter searchTitle,
SearchAdtechParameter adtech, SearchAdtechParameter adtech,
boolean newFilter, boolean newFilter,
int page int page,
int debug
) { ) {
public static SearchParameters defaultsForQuery(WebsiteUrl url, String query, int page) { public static SearchParameters defaultsForQuery(WebsiteUrl url, String query, int page) {
@@ -34,7 +35,8 @@ public record SearchParameters(WebsiteUrl url,
SearchTitleParameter.DEFAULT, SearchTitleParameter.DEFAULT,
SearchAdtechParameter.DEFAULT, SearchAdtechParameter.DEFAULT,
false, false,
page); page,
0);
} }
public String profileStr() { public String profileStr() {
@@ -42,30 +44,30 @@ public record SearchParameters(WebsiteUrl url,
} }
public SearchParameters withProfile(SearchProfile profile) { public SearchParameters withProfile(SearchProfile profile) {
return new SearchParameters(url, query, profile, js, recent, searchTitle, adtech, true, page); return new SearchParameters(url, query, profile, js, recent, searchTitle, adtech, true, page, debug);
} }
public SearchParameters withJs(SearchJsParameter js) { public SearchParameters withJs(SearchJsParameter js) {
return new SearchParameters(url, query, profile, js, recent, searchTitle, adtech, true, page); return new SearchParameters(url, query, profile, js, recent, searchTitle, adtech, true, page, debug);
} }
public SearchParameters withAdtech(SearchAdtechParameter adtech) { public SearchParameters withAdtech(SearchAdtechParameter adtech) {
return new SearchParameters(url, query, profile, js, recent, searchTitle, adtech, true, page); return new SearchParameters(url, query, profile, js, recent, searchTitle, adtech, true, page, debug);
} }
public SearchParameters withRecent(SearchRecentParameter recent) { public SearchParameters withRecent(SearchRecentParameter recent) {
return new SearchParameters(url, query, profile, js, recent, searchTitle, adtech, true, page); return new SearchParameters(url, query, profile, js, recent, searchTitle, adtech, true, page, debug);
} }
public SearchParameters withTitle(SearchTitleParameter title) { public SearchParameters withTitle(SearchTitleParameter title) {
return new SearchParameters(url, query, profile, js, recent, title, adtech, true, page); return new SearchParameters(url, query, profile, js, recent, title, adtech, true, page, debug);
} }
public SearchParameters withPage(int page) { public SearchParameters withPage(int page) {
return new SearchParameters(url, query, profile, js, recent, searchTitle, adtech, false, page); return new SearchParameters(url, query, profile, js, recent, searchTitle, adtech, false, page, debug);
} }
public SearchParameters withQuery(String query) { public SearchParameters withQuery(String query) {
return new SearchParameters(url, query, profile, js, recent, searchTitle, adtech, false, page); return new SearchParameters(url, query, profile, js, recent, searchTitle, adtech, false, page, debug);
} }
public String renderUrlWithoutSiteFocus() { public String renderUrlWithoutSiteFocus() {

View File

@@ -3,34 +3,52 @@ package nu.marginalia.search.command.commands;
import com.google.inject.Inject; import com.google.inject.Inject;
import io.jooby.MapModelAndView; import io.jooby.MapModelAndView;
import io.jooby.ModelAndView; import io.jooby.ModelAndView;
import nu.marginalia.search.JteRenderer;
import nu.marginalia.search.SearchOperator; import nu.marginalia.search.SearchOperator;
import nu.marginalia.search.command.SearchCommandInterface; import nu.marginalia.search.command.SearchCommandInterface;
import nu.marginalia.search.command.SearchParameters; import nu.marginalia.search.command.SearchParameters;
import nu.marginalia.search.model.DecoratedSearchResults; import nu.marginalia.search.model.DecoratedSearchResults;
import nu.marginalia.search.model.NavbarModel; import nu.marginalia.search.model.NavbarModel;
import java.io.IOException;
import java.util.Map; import java.util.Map;
import java.util.Optional; import java.util.Optional;
public class SearchCommand implements SearchCommandInterface { public class SearchCommand implements SearchCommandInterface {
private final SearchOperator searchOperator; private final SearchOperator searchOperator;
private final JteRenderer jteRenderer;
@Inject @Inject
public SearchCommand(SearchOperator searchOperator, public SearchCommand(SearchOperator searchOperator) {
JteRenderer jteRenderer) throws IOException {
this.searchOperator = searchOperator; this.searchOperator = searchOperator;
this.jteRenderer = jteRenderer;
} }
@Override @Override
public Optional<ModelAndView<?>> process(SearchParameters parameters) throws InterruptedException { public Optional<ModelAndView<?>> process(SearchParameters parameters) throws InterruptedException {
DecoratedSearchResults results = searchOperator.doSearch(parameters); if (parameters.debug() == 0) {
return Optional.of(new MapModelAndView("serp/main.jte", DecoratedSearchResults results = searchOperator.doSearch(parameters);
Map.of("results", results, "navbar", NavbarModel.SEARCH) return Optional.of(new MapModelAndView("serp/main.jte",
)); Map.of("results", results, "navbar", NavbarModel.SEARCH)
));
}
else if (parameters.debug() == 1) {
DecoratedSearchResults results = searchOperator.doSearchFastTrack1(parameters);
return Optional.of(new MapModelAndView("serp/main.jte",
Map.of("results", results, "navbar", NavbarModel.SEARCH)
));
}
else if (parameters.debug() == 2) {
DecoratedSearchResults results = searchOperator.doSearchFastTrack2(parameters);
return Optional.of(new MapModelAndView("serp/main.jte",
Map.of("results", results, "navbar", NavbarModel.SEARCH)
));
}
else if (parameters.debug() == 3) {
DecoratedSearchResults results = searchOperator.doSearchFastTrack3(parameters);
return Optional.of(new MapModelAndView("serp/main.jte",
Map.of("results", results, "navbar", NavbarModel.SEARCH)
));
}
else {
return Optional.empty();
}
} }
} }

View File

@@ -60,7 +60,8 @@ public class SearchFilters {
SearchTitleParameter.DEFAULT, SearchTitleParameter.DEFAULT,
SearchAdtechParameter.DEFAULT, SearchAdtechParameter.DEFAULT,
false, false,
1)); 1,
0));
} }
public SearchFilters(SearchParameters parameters) { public SearchFilters(SearchParameters parameters) {

View File

@@ -39,7 +39,8 @@ public class SearchQueryService {
@QueryParam String recent, @QueryParam String recent,
@QueryParam String searchTitle, @QueryParam String searchTitle,
@QueryParam String adtech, @QueryParam String adtech,
@QueryParam Integer page @QueryParam Integer page,
@QueryParam Integer debug
) { ) {
try { try {
SearchParameters parameters = new SearchParameters(websiteUrl, SearchParameters parameters = new SearchParameters(websiteUrl,
@@ -50,7 +51,9 @@ public class SearchQueryService {
SearchTitleParameter.parse(searchTitle), SearchTitleParameter.parse(searchTitle),
SearchAdtechParameter.parse(adtech), SearchAdtechParameter.parse(adtech),
false, false,
Objects.requireNonNullElse(page,1)); Objects.requireNonNullElse(page,1),
Objects.requireNonNullElse(debug,0)
);
return searchCommandEvaulator.eval(parameters); return searchCommandEvaulator.eval(parameters);
} }