1
1
mirror of https://github.com/MarginaliaSearch/MarginaliaSearch.git synced 2025-10-06 07:32:38 +02:00

Compare commits

...

4 Commits

Author SHA1 Message Date
Viktor Lofgren
c67a635103 (search, experimental) Add a few debugging tracks to the search UI 2025-01-10 15:44:44 +01:00
Viktor Lofgren
20b24133fb (search, experimental) Add a few debugging tracks to the search UI 2025-01-10 15:34:48 +01:00
Viktor Lofgren
f2567677e8 (index-client) Clean up index client code
Improve error handling.  This should be a relatively rare case, but we don't want one bad index partition to blow up the entire query.
2025-01-10 15:17:07 +01:00
Viktor Lofgren
bc2c2061f2 (index-client) Clean up index client code
This should have the rpc stream reception be performed in parallel in separate threads, rather blocking sequentially in the main thread, hopefully giving a slight performance boost.
2025-01-10 15:14:42 +01:00
7 changed files with 178 additions and 57 deletions

View File

@@ -16,20 +16,19 @@ import org.slf4j.LoggerFactory;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import static java.lang.Math.clamp;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.Consumer;
@Singleton
public class IndexClient {
private static final Logger logger = LoggerFactory.getLogger(IndexClient.class);
private final GrpcMultiNodeChannelPool<IndexApiGrpc.IndexApiBlockingStub> channelPool;
private final DomainBlacklistImpl blacklist;
private static final ExecutorService executor = Executors.newVirtualThreadPerTaskExecutor();
private static final ExecutorService executor = Executors.newCachedThreadPool();
@Inject
public IndexClient(GrpcChannelPoolFactory channelPoolFactory, DomainBlacklistImpl blacklist) {
@@ -51,40 +50,37 @@ public class IndexClient {
/** Execute a query on the index partitions and return the combined results. */
public AggregateQueryResponse executeQueries(RpcIndexQuery indexRequest, Pagination pagination) {
List<CompletableFuture<Iterator<RpcDecoratedResultItem>>> futures =
channelPool.call(IndexApiGrpc.IndexApiBlockingStub::query)
.async(executor)
.runEach(indexRequest);
final int requestedMaxResults = indexRequest.getQueryLimits().getResultsTotal();
final int resultsUpperBound = requestedMaxResults * channelPool.getNumNodes();
List<RpcDecoratedResultItem> results = new ArrayList<>(resultsUpperBound);
AtomicInteger totalNumResults = new AtomicInteger(0);
for (var future : futures) {
try {
future.get().forEachRemaining(results::add);
}
catch (Exception e) {
logger.error("Downstream exception", e);
}
}
List<RpcDecoratedResultItem> results =
channelPool.call(IndexApiGrpc.IndexApiBlockingStub::query)
.async(executor)
.runEach(indexRequest)
.stream()
.map(future -> future.thenApply(iterator -> {
List<RpcDecoratedResultItem> ret = new ArrayList<>(requestedMaxResults);
iterator.forEachRemaining(ret::add);
totalNumResults.addAndGet(ret.size());
return ret;
}))
.mapMulti((CompletableFuture<List<RpcDecoratedResultItem>> fut, Consumer<List<RpcDecoratedResultItem>> c) ->{
try {
c.accept(fut.join());
} catch (Exception e) {
logger.error("Error while fetching results", e);
}
})
.flatMap(List::stream)
.filter(item -> !isBlacklisted(item))
.sorted(comparator)
.skip(Math.max(0, (pagination.page - 1) * pagination.pageSize))
.limit(pagination.pageSize)
.toList();
// Sort the results by ranking score and remove blacklisted domains
results.sort(comparator);
results.removeIf(this::isBlacklisted);
int numReceivedResults = results.size();
// pagination is typically 1-indexed, so we need to adjust the start and end indices
int indexStart = (pagination.page - 1) * pagination.pageSize;
int indexEnd = (pagination.page) * pagination.pageSize;
results = results.subList(
clamp(indexStart, 0, Math.max(0, results.size() - 1)), // from is inclusive, so subtract 1 from size()
clamp(indexEnd, 0, results.size()));
return new AggregateQueryResponse(results, pagination.page(), numReceivedResults);
return new AggregateQueryResponse(results, pagination.page(), totalNumResults.get());
}
private boolean isBlacklisted(RpcDecoratedResultItem item) {

View File

@@ -294,4 +294,105 @@ public class SearchOperator {
}
}
public DecoratedSearchResults doSearchFastTrack1(SearchParameters userParams) {
var queryParams = paramFactory.forRegularSearch(userParams);
QueryResponse queryResponse = queryClient.search(queryParams);
var queryResults = getResultsFromQuery(queryResponse).results;
// Cluster the results based on the query response
List<ClusteredUrlDetails> clusteredResults = SearchResultClusterer
.selectStrategy(queryResponse)
.clusterResults(queryResults, 25);
String focusDomain = queryResponse.domain();
int focusDomainId = (focusDomain == null || focusDomain.isBlank())
? -1
: domainQueries.tryGetDomainId(new EdgeDomain(focusDomain)).orElse(0);
List<ResultsPage> resultPages = IntStream.rangeClosed(1, queryResponse.totalPages())
.mapToObj(number -> new ResultsPage(
number,
number == userParams.page(),
userParams.withPage(number).renderUrl()
))
.toList();
// Return the results to the user
return DecoratedSearchResults.builder()
.params(userParams)
.problems(List.of())
.evalResult("")
.results(clusteredResults)
.filters(new SearchFilters(userParams))
.focusDomain(focusDomain)
.focusDomainId(focusDomainId)
.resultPages(resultPages)
.build();
}
public DecoratedSearchResults doSearchFastTrack2(SearchParameters userParams) {
var queryParams = paramFactory.forRegularSearch(userParams);
QueryResponse queryResponse = queryClient.search(queryParams);
var queryResults = getResultsFromQuery(queryResponse).results;
// Cluster the results based on the query response
List<ClusteredUrlDetails> clusteredResults = SearchResultClusterer
.noOpClustering(queryResults, queryResults.size());
String focusDomain = queryResponse.domain();
int focusDomainId = (focusDomain == null || focusDomain.isBlank())
? -1
: domainQueries.tryGetDomainId(new EdgeDomain(focusDomain)).orElse(0);
List<ResultsPage> resultPages = IntStream.rangeClosed(1, queryResponse.totalPages())
.mapToObj(number -> new ResultsPage(
number,
number == userParams.page(),
userParams.withPage(number).renderUrl()
))
.toList();
// Return the results to the user
return DecoratedSearchResults.builder()
.params(userParams)
.problems(List.of())
.evalResult("")
.results(clusteredResults)
.filters(new SearchFilters(userParams))
.focusDomain(focusDomain)
.focusDomainId(focusDomainId)
.resultPages(resultPages)
.build();
}
public DecoratedSearchResults doSearchFastTrack3(SearchParameters userParams) {
var queryParams = paramFactory.forRegularSearch(userParams);
QueryResponse queryResponse = queryClient.search(queryParams);
var queryResults = getResultsFromQuery(queryResponse).results;
// Cluster the results based on the query response
List<ClusteredUrlDetails> clusteredResults = SearchResultClusterer
.noOpClustering(queryResults, queryResults.size());
List<ResultsPage> resultPages = IntStream.rangeClosed(1, queryResponse.totalPages())
.mapToObj(number -> new ResultsPage(
number,
number == userParams.page(),
userParams.withPage(number).renderUrl()
))
.toList();
// Return the results to the user
return DecoratedSearchResults.builder()
.params(userParams)
.problems(List.of())
.evalResult("")
.results(clusteredResults)
.filters(new SearchFilters(userParams))
.focusDomain(null)
.focusDomainId(-1)
.resultPages(resultPages)
.build();
}
}

View File

@@ -17,13 +17,13 @@ public class SearchResultClusterer {
public static SearchResultClusterStrategy selectStrategy(QueryResponse response) {
if (response.domain() != null && !response.domain().isBlank())
return SearchResultClusterer::noOp;
return SearchResultClusterer::noOpClustering;
return SearchResultClusterer::byDomain;
}
/** No clustering, just return the results as is */
private static List<ClusteredUrlDetails> noOp(List<UrlDetails> results, int total) {
public static List<ClusteredUrlDetails> noOpClustering(List<UrlDetails> results, int total) {
if (results.isEmpty())
return List.of();

View File

@@ -21,7 +21,8 @@ public record SearchParameters(WebsiteUrl url,
SearchTitleParameter searchTitle,
SearchAdtechParameter adtech,
boolean newFilter,
int page
int page,
int debug
) {
public static SearchParameters defaultsForQuery(WebsiteUrl url, String query, int page) {
@@ -34,7 +35,8 @@ public record SearchParameters(WebsiteUrl url,
SearchTitleParameter.DEFAULT,
SearchAdtechParameter.DEFAULT,
false,
page);
page,
0);
}
public String profileStr() {
@@ -42,30 +44,30 @@ public record SearchParameters(WebsiteUrl url,
}
public SearchParameters withProfile(SearchProfile profile) {
return new SearchParameters(url, query, profile, js, recent, searchTitle, adtech, true, page);
return new SearchParameters(url, query, profile, js, recent, searchTitle, adtech, true, page, debug);
}
public SearchParameters withJs(SearchJsParameter js) {
return new SearchParameters(url, query, profile, js, recent, searchTitle, adtech, true, page);
return new SearchParameters(url, query, profile, js, recent, searchTitle, adtech, true, page, debug);
}
public SearchParameters withAdtech(SearchAdtechParameter adtech) {
return new SearchParameters(url, query, profile, js, recent, searchTitle, adtech, true, page);
return new SearchParameters(url, query, profile, js, recent, searchTitle, adtech, true, page, debug);
}
public SearchParameters withRecent(SearchRecentParameter recent) {
return new SearchParameters(url, query, profile, js, recent, searchTitle, adtech, true, page);
return new SearchParameters(url, query, profile, js, recent, searchTitle, adtech, true, page, debug);
}
public SearchParameters withTitle(SearchTitleParameter title) {
return new SearchParameters(url, query, profile, js, recent, title, adtech, true, page);
return new SearchParameters(url, query, profile, js, recent, title, adtech, true, page, debug);
}
public SearchParameters withPage(int page) {
return new SearchParameters(url, query, profile, js, recent, searchTitle, adtech, false, page);
return new SearchParameters(url, query, profile, js, recent, searchTitle, adtech, false, page, debug);
}
public SearchParameters withQuery(String query) {
return new SearchParameters(url, query, profile, js, recent, searchTitle, adtech, false, page);
return new SearchParameters(url, query, profile, js, recent, searchTitle, adtech, false, page, debug);
}
public String renderUrlWithoutSiteFocus() {

View File

@@ -3,34 +3,52 @@ package nu.marginalia.search.command.commands;
import com.google.inject.Inject;
import io.jooby.MapModelAndView;
import io.jooby.ModelAndView;
import nu.marginalia.search.JteRenderer;
import nu.marginalia.search.SearchOperator;
import nu.marginalia.search.command.SearchCommandInterface;
import nu.marginalia.search.command.SearchParameters;
import nu.marginalia.search.model.DecoratedSearchResults;
import nu.marginalia.search.model.NavbarModel;
import java.io.IOException;
import java.util.Map;
import java.util.Optional;
public class SearchCommand implements SearchCommandInterface {
private final SearchOperator searchOperator;
private final JteRenderer jteRenderer;
@Inject
public SearchCommand(SearchOperator searchOperator,
JteRenderer jteRenderer) throws IOException {
public SearchCommand(SearchOperator searchOperator) {
this.searchOperator = searchOperator;
this.jteRenderer = jteRenderer;
}
@Override
public Optional<ModelAndView<?>> process(SearchParameters parameters) throws InterruptedException {
DecoratedSearchResults results = searchOperator.doSearch(parameters);
return Optional.of(new MapModelAndView("serp/main.jte",
Map.of("results", results, "navbar", NavbarModel.SEARCH)
));
if (parameters.debug() == 0) {
DecoratedSearchResults results = searchOperator.doSearch(parameters);
return Optional.of(new MapModelAndView("serp/main.jte",
Map.of("results", results, "navbar", NavbarModel.SEARCH)
));
}
else if (parameters.debug() == 1) {
DecoratedSearchResults results = searchOperator.doSearchFastTrack1(parameters);
return Optional.of(new MapModelAndView("serp/main.jte",
Map.of("results", results, "navbar", NavbarModel.SEARCH)
));
}
else if (parameters.debug() == 2) {
DecoratedSearchResults results = searchOperator.doSearchFastTrack2(parameters);
return Optional.of(new MapModelAndView("serp/main.jte",
Map.of("results", results, "navbar", NavbarModel.SEARCH)
));
}
else if (parameters.debug() == 3) {
DecoratedSearchResults results = searchOperator.doSearchFastTrack3(parameters);
return Optional.of(new MapModelAndView("serp/main.jte",
Map.of("results", results, "navbar", NavbarModel.SEARCH)
));
}
else {
return Optional.empty();
}
}
}

View File

@@ -60,7 +60,8 @@ public class SearchFilters {
SearchTitleParameter.DEFAULT,
SearchAdtechParameter.DEFAULT,
false,
1));
1,
0));
}
public SearchFilters(SearchParameters parameters) {

View File

@@ -39,7 +39,8 @@ public class SearchQueryService {
@QueryParam String recent,
@QueryParam String searchTitle,
@QueryParam String adtech,
@QueryParam Integer page
@QueryParam Integer page,
@QueryParam Integer debug
) {
try {
SearchParameters parameters = new SearchParameters(websiteUrl,
@@ -50,7 +51,9 @@ public class SearchQueryService {
SearchTitleParameter.parse(searchTitle),
SearchAdtechParameter.parse(adtech),
false,
Objects.requireNonNullElse(page,1));
Objects.requireNonNullElse(page,1),
Objects.requireNonNullElse(debug,0)
);
return searchCommandEvaulator.eval(parameters);
}