mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-10-06 07:32:38 +02:00
Compare commits
4 Commits
deploy-005
...
deploy-005
Author | SHA1 | Date | |
---|---|---|---|
|
c67a635103 | ||
|
20b24133fb | ||
|
f2567677e8 | ||
|
bc2c2061f2 |
@@ -16,20 +16,19 @@ import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Comparator;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.CompletableFuture;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.Executors;
|
||||
|
||||
import static java.lang.Math.clamp;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
import java.util.function.Consumer;
|
||||
|
||||
@Singleton
|
||||
public class IndexClient {
|
||||
private static final Logger logger = LoggerFactory.getLogger(IndexClient.class);
|
||||
private final GrpcMultiNodeChannelPool<IndexApiGrpc.IndexApiBlockingStub> channelPool;
|
||||
private final DomainBlacklistImpl blacklist;
|
||||
private static final ExecutorService executor = Executors.newVirtualThreadPerTaskExecutor();
|
||||
private static final ExecutorService executor = Executors.newCachedThreadPool();
|
||||
|
||||
@Inject
|
||||
public IndexClient(GrpcChannelPoolFactory channelPoolFactory, DomainBlacklistImpl blacklist) {
|
||||
@@ -51,40 +50,37 @@ public class IndexClient {
|
||||
|
||||
/** Execute a query on the index partitions and return the combined results. */
|
||||
public AggregateQueryResponse executeQueries(RpcIndexQuery indexRequest, Pagination pagination) {
|
||||
List<CompletableFuture<Iterator<RpcDecoratedResultItem>>> futures =
|
||||
channelPool.call(IndexApiGrpc.IndexApiBlockingStub::query)
|
||||
.async(executor)
|
||||
.runEach(indexRequest);
|
||||
|
||||
final int requestedMaxResults = indexRequest.getQueryLimits().getResultsTotal();
|
||||
final int resultsUpperBound = requestedMaxResults * channelPool.getNumNodes();
|
||||
|
||||
List<RpcDecoratedResultItem> results = new ArrayList<>(resultsUpperBound);
|
||||
AtomicInteger totalNumResults = new AtomicInteger(0);
|
||||
|
||||
for (var future : futures) {
|
||||
try {
|
||||
future.get().forEachRemaining(results::add);
|
||||
}
|
||||
catch (Exception e) {
|
||||
logger.error("Downstream exception", e);
|
||||
}
|
||||
}
|
||||
List<RpcDecoratedResultItem> results =
|
||||
channelPool.call(IndexApiGrpc.IndexApiBlockingStub::query)
|
||||
.async(executor)
|
||||
.runEach(indexRequest)
|
||||
.stream()
|
||||
.map(future -> future.thenApply(iterator -> {
|
||||
List<RpcDecoratedResultItem> ret = new ArrayList<>(requestedMaxResults);
|
||||
iterator.forEachRemaining(ret::add);
|
||||
totalNumResults.addAndGet(ret.size());
|
||||
return ret;
|
||||
}))
|
||||
.mapMulti((CompletableFuture<List<RpcDecoratedResultItem>> fut, Consumer<List<RpcDecoratedResultItem>> c) ->{
|
||||
try {
|
||||
c.accept(fut.join());
|
||||
} catch (Exception e) {
|
||||
logger.error("Error while fetching results", e);
|
||||
}
|
||||
})
|
||||
.flatMap(List::stream)
|
||||
.filter(item -> !isBlacklisted(item))
|
||||
.sorted(comparator)
|
||||
.skip(Math.max(0, (pagination.page - 1) * pagination.pageSize))
|
||||
.limit(pagination.pageSize)
|
||||
.toList();
|
||||
|
||||
// Sort the results by ranking score and remove blacklisted domains
|
||||
results.sort(comparator);
|
||||
results.removeIf(this::isBlacklisted);
|
||||
|
||||
int numReceivedResults = results.size();
|
||||
|
||||
// pagination is typically 1-indexed, so we need to adjust the start and end indices
|
||||
int indexStart = (pagination.page - 1) * pagination.pageSize;
|
||||
int indexEnd = (pagination.page) * pagination.pageSize;
|
||||
|
||||
results = results.subList(
|
||||
clamp(indexStart, 0, Math.max(0, results.size() - 1)), // from is inclusive, so subtract 1 from size()
|
||||
clamp(indexEnd, 0, results.size()));
|
||||
|
||||
return new AggregateQueryResponse(results, pagination.page(), numReceivedResults);
|
||||
return new AggregateQueryResponse(results, pagination.page(), totalNumResults.get());
|
||||
}
|
||||
|
||||
private boolean isBlacklisted(RpcDecoratedResultItem item) {
|
||||
|
@@ -294,4 +294,105 @@ public class SearchOperator {
|
||||
}
|
||||
}
|
||||
|
||||
public DecoratedSearchResults doSearchFastTrack1(SearchParameters userParams) {
|
||||
var queryParams = paramFactory.forRegularSearch(userParams);
|
||||
QueryResponse queryResponse = queryClient.search(queryParams);
|
||||
var queryResults = getResultsFromQuery(queryResponse).results;
|
||||
|
||||
// Cluster the results based on the query response
|
||||
List<ClusteredUrlDetails> clusteredResults = SearchResultClusterer
|
||||
.selectStrategy(queryResponse)
|
||||
.clusterResults(queryResults, 25);
|
||||
|
||||
String focusDomain = queryResponse.domain();
|
||||
int focusDomainId = (focusDomain == null || focusDomain.isBlank())
|
||||
? -1
|
||||
: domainQueries.tryGetDomainId(new EdgeDomain(focusDomain)).orElse(0);
|
||||
|
||||
List<ResultsPage> resultPages = IntStream.rangeClosed(1, queryResponse.totalPages())
|
||||
.mapToObj(number -> new ResultsPage(
|
||||
number,
|
||||
number == userParams.page(),
|
||||
userParams.withPage(number).renderUrl()
|
||||
))
|
||||
.toList();
|
||||
|
||||
// Return the results to the user
|
||||
return DecoratedSearchResults.builder()
|
||||
.params(userParams)
|
||||
.problems(List.of())
|
||||
.evalResult("")
|
||||
.results(clusteredResults)
|
||||
.filters(new SearchFilters(userParams))
|
||||
.focusDomain(focusDomain)
|
||||
.focusDomainId(focusDomainId)
|
||||
.resultPages(resultPages)
|
||||
.build();
|
||||
}
|
||||
|
||||
public DecoratedSearchResults doSearchFastTrack2(SearchParameters userParams) {
|
||||
var queryParams = paramFactory.forRegularSearch(userParams);
|
||||
QueryResponse queryResponse = queryClient.search(queryParams);
|
||||
var queryResults = getResultsFromQuery(queryResponse).results;
|
||||
|
||||
// Cluster the results based on the query response
|
||||
List<ClusteredUrlDetails> clusteredResults = SearchResultClusterer
|
||||
.noOpClustering(queryResults, queryResults.size());
|
||||
|
||||
String focusDomain = queryResponse.domain();
|
||||
int focusDomainId = (focusDomain == null || focusDomain.isBlank())
|
||||
? -1
|
||||
: domainQueries.tryGetDomainId(new EdgeDomain(focusDomain)).orElse(0);
|
||||
|
||||
List<ResultsPage> resultPages = IntStream.rangeClosed(1, queryResponse.totalPages())
|
||||
.mapToObj(number -> new ResultsPage(
|
||||
number,
|
||||
number == userParams.page(),
|
||||
userParams.withPage(number).renderUrl()
|
||||
))
|
||||
.toList();
|
||||
|
||||
// Return the results to the user
|
||||
return DecoratedSearchResults.builder()
|
||||
.params(userParams)
|
||||
.problems(List.of())
|
||||
.evalResult("")
|
||||
.results(clusteredResults)
|
||||
.filters(new SearchFilters(userParams))
|
||||
.focusDomain(focusDomain)
|
||||
.focusDomainId(focusDomainId)
|
||||
.resultPages(resultPages)
|
||||
.build();
|
||||
}
|
||||
|
||||
|
||||
public DecoratedSearchResults doSearchFastTrack3(SearchParameters userParams) {
|
||||
var queryParams = paramFactory.forRegularSearch(userParams);
|
||||
QueryResponse queryResponse = queryClient.search(queryParams);
|
||||
var queryResults = getResultsFromQuery(queryResponse).results;
|
||||
|
||||
// Cluster the results based on the query response
|
||||
List<ClusteredUrlDetails> clusteredResults = SearchResultClusterer
|
||||
.noOpClustering(queryResults, queryResults.size());
|
||||
|
||||
List<ResultsPage> resultPages = IntStream.rangeClosed(1, queryResponse.totalPages())
|
||||
.mapToObj(number -> new ResultsPage(
|
||||
number,
|
||||
number == userParams.page(),
|
||||
userParams.withPage(number).renderUrl()
|
||||
))
|
||||
.toList();
|
||||
|
||||
// Return the results to the user
|
||||
return DecoratedSearchResults.builder()
|
||||
.params(userParams)
|
||||
.problems(List.of())
|
||||
.evalResult("")
|
||||
.results(clusteredResults)
|
||||
.filters(new SearchFilters(userParams))
|
||||
.focusDomain(null)
|
||||
.focusDomainId(-1)
|
||||
.resultPages(resultPages)
|
||||
.build();
|
||||
}
|
||||
}
|
||||
|
@@ -17,13 +17,13 @@ public class SearchResultClusterer {
|
||||
|
||||
public static SearchResultClusterStrategy selectStrategy(QueryResponse response) {
|
||||
if (response.domain() != null && !response.domain().isBlank())
|
||||
return SearchResultClusterer::noOp;
|
||||
return SearchResultClusterer::noOpClustering;
|
||||
|
||||
return SearchResultClusterer::byDomain;
|
||||
}
|
||||
|
||||
/** No clustering, just return the results as is */
|
||||
private static List<ClusteredUrlDetails> noOp(List<UrlDetails> results, int total) {
|
||||
public static List<ClusteredUrlDetails> noOpClustering(List<UrlDetails> results, int total) {
|
||||
if (results.isEmpty())
|
||||
return List.of();
|
||||
|
||||
|
@@ -21,7 +21,8 @@ public record SearchParameters(WebsiteUrl url,
|
||||
SearchTitleParameter searchTitle,
|
||||
SearchAdtechParameter adtech,
|
||||
boolean newFilter,
|
||||
int page
|
||||
int page,
|
||||
int debug
|
||||
) {
|
||||
|
||||
public static SearchParameters defaultsForQuery(WebsiteUrl url, String query, int page) {
|
||||
@@ -34,7 +35,8 @@ public record SearchParameters(WebsiteUrl url,
|
||||
SearchTitleParameter.DEFAULT,
|
||||
SearchAdtechParameter.DEFAULT,
|
||||
false,
|
||||
page);
|
||||
page,
|
||||
0);
|
||||
}
|
||||
|
||||
public String profileStr() {
|
||||
@@ -42,30 +44,30 @@ public record SearchParameters(WebsiteUrl url,
|
||||
}
|
||||
|
||||
public SearchParameters withProfile(SearchProfile profile) {
|
||||
return new SearchParameters(url, query, profile, js, recent, searchTitle, adtech, true, page);
|
||||
return new SearchParameters(url, query, profile, js, recent, searchTitle, adtech, true, page, debug);
|
||||
}
|
||||
|
||||
public SearchParameters withJs(SearchJsParameter js) {
|
||||
return new SearchParameters(url, query, profile, js, recent, searchTitle, adtech, true, page);
|
||||
return new SearchParameters(url, query, profile, js, recent, searchTitle, adtech, true, page, debug);
|
||||
}
|
||||
public SearchParameters withAdtech(SearchAdtechParameter adtech) {
|
||||
return new SearchParameters(url, query, profile, js, recent, searchTitle, adtech, true, page);
|
||||
return new SearchParameters(url, query, profile, js, recent, searchTitle, adtech, true, page, debug);
|
||||
}
|
||||
|
||||
public SearchParameters withRecent(SearchRecentParameter recent) {
|
||||
return new SearchParameters(url, query, profile, js, recent, searchTitle, adtech, true, page);
|
||||
return new SearchParameters(url, query, profile, js, recent, searchTitle, adtech, true, page, debug);
|
||||
}
|
||||
|
||||
public SearchParameters withTitle(SearchTitleParameter title) {
|
||||
return new SearchParameters(url, query, profile, js, recent, title, adtech, true, page);
|
||||
return new SearchParameters(url, query, profile, js, recent, title, adtech, true, page, debug);
|
||||
}
|
||||
|
||||
public SearchParameters withPage(int page) {
|
||||
return new SearchParameters(url, query, profile, js, recent, searchTitle, adtech, false, page);
|
||||
return new SearchParameters(url, query, profile, js, recent, searchTitle, adtech, false, page, debug);
|
||||
}
|
||||
|
||||
public SearchParameters withQuery(String query) {
|
||||
return new SearchParameters(url, query, profile, js, recent, searchTitle, adtech, false, page);
|
||||
return new SearchParameters(url, query, profile, js, recent, searchTitle, adtech, false, page, debug);
|
||||
}
|
||||
|
||||
public String renderUrlWithoutSiteFocus() {
|
||||
|
@@ -3,34 +3,52 @@ package nu.marginalia.search.command.commands;
|
||||
import com.google.inject.Inject;
|
||||
import io.jooby.MapModelAndView;
|
||||
import io.jooby.ModelAndView;
|
||||
import nu.marginalia.search.JteRenderer;
|
||||
import nu.marginalia.search.SearchOperator;
|
||||
import nu.marginalia.search.command.SearchCommandInterface;
|
||||
import nu.marginalia.search.command.SearchParameters;
|
||||
import nu.marginalia.search.model.DecoratedSearchResults;
|
||||
import nu.marginalia.search.model.NavbarModel;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
import java.util.Optional;
|
||||
|
||||
public class SearchCommand implements SearchCommandInterface {
|
||||
private final SearchOperator searchOperator;
|
||||
private final JteRenderer jteRenderer;
|
||||
|
||||
|
||||
@Inject
|
||||
public SearchCommand(SearchOperator searchOperator,
|
||||
JteRenderer jteRenderer) throws IOException {
|
||||
public SearchCommand(SearchOperator searchOperator) {
|
||||
this.searchOperator = searchOperator;
|
||||
this.jteRenderer = jteRenderer;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Optional<ModelAndView<?>> process(SearchParameters parameters) throws InterruptedException {
|
||||
DecoratedSearchResults results = searchOperator.doSearch(parameters);
|
||||
return Optional.of(new MapModelAndView("serp/main.jte",
|
||||
Map.of("results", results, "navbar", NavbarModel.SEARCH)
|
||||
));
|
||||
if (parameters.debug() == 0) {
|
||||
DecoratedSearchResults results = searchOperator.doSearch(parameters);
|
||||
return Optional.of(new MapModelAndView("serp/main.jte",
|
||||
Map.of("results", results, "navbar", NavbarModel.SEARCH)
|
||||
));
|
||||
}
|
||||
else if (parameters.debug() == 1) {
|
||||
DecoratedSearchResults results = searchOperator.doSearchFastTrack1(parameters);
|
||||
return Optional.of(new MapModelAndView("serp/main.jte",
|
||||
Map.of("results", results, "navbar", NavbarModel.SEARCH)
|
||||
));
|
||||
}
|
||||
else if (parameters.debug() == 2) {
|
||||
DecoratedSearchResults results = searchOperator.doSearchFastTrack2(parameters);
|
||||
return Optional.of(new MapModelAndView("serp/main.jte",
|
||||
Map.of("results", results, "navbar", NavbarModel.SEARCH)
|
||||
));
|
||||
}
|
||||
else if (parameters.debug() == 3) {
|
||||
DecoratedSearchResults results = searchOperator.doSearchFastTrack3(parameters);
|
||||
return Optional.of(new MapModelAndView("serp/main.jte",
|
||||
Map.of("results", results, "navbar", NavbarModel.SEARCH)
|
||||
));
|
||||
}
|
||||
else {
|
||||
return Optional.empty();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -60,7 +60,8 @@ public class SearchFilters {
|
||||
SearchTitleParameter.DEFAULT,
|
||||
SearchAdtechParameter.DEFAULT,
|
||||
false,
|
||||
1));
|
||||
1,
|
||||
0));
|
||||
}
|
||||
|
||||
public SearchFilters(SearchParameters parameters) {
|
||||
|
@@ -39,7 +39,8 @@ public class SearchQueryService {
|
||||
@QueryParam String recent,
|
||||
@QueryParam String searchTitle,
|
||||
@QueryParam String adtech,
|
||||
@QueryParam Integer page
|
||||
@QueryParam Integer page,
|
||||
@QueryParam Integer debug
|
||||
) {
|
||||
try {
|
||||
SearchParameters parameters = new SearchParameters(websiteUrl,
|
||||
@@ -50,7 +51,9 @@ public class SearchQueryService {
|
||||
SearchTitleParameter.parse(searchTitle),
|
||||
SearchAdtechParameter.parse(adtech),
|
||||
false,
|
||||
Objects.requireNonNullElse(page,1));
|
||||
Objects.requireNonNullElse(page,1),
|
||||
Objects.requireNonNullElse(debug,0)
|
||||
);
|
||||
|
||||
return searchCommandEvaulator.eval(parameters);
|
||||
}
|
||||
|
Reference in New Issue
Block a user