mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-10-05 21:22:39 +02:00
Compare commits
4 Commits
deploy-018
...
deploy-019
Author | SHA1 | Date | |
---|---|---|---|
|
97a6780ea3 | ||
|
eb634beec8 | ||
|
269ebd1654 | ||
|
39ce40bfeb |
@@ -112,6 +112,13 @@ public class SearchOperator {
|
||||
.selectStrategy(queryResponse)
|
||||
.clusterResults(queryResults, 25);
|
||||
|
||||
if (queryParams.humanQuery().equals("slackware linux")) {
|
||||
logger.info("Query response: {}", queryResponse.results().subList(0, 5));
|
||||
logger.info("Query results: {}", queryResults.subList(0, 5));
|
||||
logger.info("Clustered results: {}", clusteredResults.subList(0, 5));
|
||||
}
|
||||
|
||||
|
||||
// Log the query and results
|
||||
|
||||
logger.info(queryMarker, "Human terms: {}", Strings.join(queryResponse.searchTermsHuman(), ','));
|
||||
|
@@ -25,13 +25,28 @@ public class UrlDeduplicator {
|
||||
}
|
||||
|
||||
public boolean shouldRemove(DecoratedSearchResultItem details) {
|
||||
if (!deduplicateOnSuperficialHash(details))
|
||||
return true;
|
||||
if (!deduplicateOnLSH(details))
|
||||
return true;
|
||||
if (!limitResultsPerDomain(details))
|
||||
return true;
|
||||
|
||||
if (details.url.domain.topDomain.equals("slackware.com")) {
|
||||
if (!deduplicateOnSuperficialHash(details)) {
|
||||
logger.info("Rejecting on superficial hash " + details.url);
|
||||
return true;
|
||||
}
|
||||
if (!deduplicateOnLSH(details)) {
|
||||
logger.info("Rejecting on LSH for " + details.url);
|
||||
return true;
|
||||
}
|
||||
if (!limitResultsPerDomain(details)) {
|
||||
logger.info("Rejecting on limitResultsPerDomain for " + details.url);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (!deduplicateOnSuperficialHash(details))
|
||||
return true;
|
||||
if (!deduplicateOnLSH(details))
|
||||
return true;
|
||||
if (!limitResultsPerDomain(details))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user