1
1
mirror of https://github.com/MarginaliaSearch/MarginaliaSearch.git synced 2025-10-06 07:32:38 +02:00

Compare commits

...

12 Commits

Author SHA1 Message Date
Viktor Lofgren
f59a9eb025 (legacy-search) Soften domain limit constraints in URL deduplication 2025-05-17 00:04:27 +02:00
Viktor Lofgren
599534806b (search) Soften domain limit constraints in URL deduplication 2025-05-17 00:00:42 +02:00
Viktor Lofgren
7e8253dac7 (search) Clean up debug logging 2025-05-17 00:00:28 +02:00
Viktor Lofgren
97a6780ea3 (search) Add debug logging for specific query 2025-05-16 23:41:35 +02:00
Viktor Lofgren
eb634beec8 (search) Add debug logging for specific query 2025-05-16 23:34:03 +02:00
Viktor Lofgren
269ebd1654 Revert "(query) Add debug logging for specific query"
This reverts commit 39ce40bfeb.
2025-05-16 23:29:06 +02:00
Viktor Lofgren
39ce40bfeb (query) Add debug logging for specific query 2025-05-16 23:23:53 +02:00
Viktor Lofgren
c187b2e1c1 (search) Re-enable clustering 2025-05-16 23:20:16 +02:00
Viktor Lofgren
42eaa4588b (search) Disable clustering for a moment 2025-05-16 23:17:01 +02:00
Viktor Lofgren
4f40a5fbeb (search) Reduce log spam 2025-05-16 23:15:07 +02:00
Viktor Lofgren
3f3d42bc01 (search) Re-enable deduplication 2025-05-16 23:14:54 +02:00
Viktor Lofgren
61c8d53e1b (search) Disable deduplication for a moment 2025-05-16 23:10:32 +02:00
6 changed files with 4 additions and 20 deletions

View File

@@ -112,14 +112,6 @@ public class EdgeDomain implements Serializable {
return topDomain; return topDomain;
} }
public String getDomainKey() {
int cutPoint = topDomain.indexOf('.');
if (cutPoint < 0) {
return topDomain;
}
return topDomain.substring(0, cutPoint).toLowerCase();
}
/** If possible, try to provide an alias domain, /** If possible, try to provide an alias domain,
* i.e. a domain name that is very likely to link to this one * i.e. a domain name that is very likely to link to this one
* */ * */

View File

@@ -8,14 +8,6 @@ import static org.junit.jupiter.api.Assertions.assertEquals;
class EdgeDomainTest { class EdgeDomainTest {
@Test
public void testSkepdic() throws URISyntaxException {
var domain = new EdgeUrl("http://www.skepdic.com/astrology.html");
assertEquals("skepdic", domain.getDomain().getDomainKey());
var domain2 = new EdgeUrl("http://skepdic.com/astrology.html");
assertEquals("skepdic", domain2.getDomain().getDomainKey());
}
@Test @Test
public void testHkDomain() throws URISyntaxException { public void testHkDomain() throws URISyntaxException {
var domain = new EdgeUrl("http://l7072i3.l7c.net"); var domain = new EdgeUrl("http://l7072i3.l7c.net");

View File

@@ -61,7 +61,7 @@ public class UrlDeduplicator {
private boolean limitResultsPerDomain(DecoratedSearchResultItem details) { private boolean limitResultsPerDomain(DecoratedSearchResultItem details) {
final var domain = details.getUrl().getDomain(); final var domain = details.getUrl().getDomain();
final String key = domain.getDomainKey(); final String key = domain.toString();
return keyCount.adjustOrPutValue(key, 1, 1) <= resultsPerKey; return keyCount.adjustOrPutValue(key, 1, 1) <= resultsPerKey;
} }

View File

@@ -23,7 +23,7 @@ public class SearchResultClusterer {
} }
/** No clustering, just return the results as is */ /** No clustering, just return the results as is */
private static List<ClusteredUrlDetails> noOp(List<UrlDetails> results, int total) { public static List<ClusteredUrlDetails> noOp(List<UrlDetails> results, int total) {
if (results.isEmpty()) if (results.isEmpty())
return List.of(); return List.of();

View File

@@ -85,7 +85,6 @@ public class SearchService extends JoobyService {
String emptySvg = "<svg xmlns=\"http://www.w3.org/2000/svg\"></svg>"; String emptySvg = "<svg xmlns=\"http://www.w3.org/2000/svg\"></svg>";
jooby.get("/site/{domain}/favicon", ctx -> { jooby.get("/site/{domain}/favicon", ctx -> {
String domain = ctx.path("domain").value(); String domain = ctx.path("domain").value();
logger.info("Finding icon for domain {}", domain);
try { try {
DbDomainQueries.DomainIdWithNode domainIdWithNode = domainQueries.getDomainIdWithNode(new EdgeDomain(domain)); DbDomainQueries.DomainIdWithNode domainIdWithNode = domainQueries.getDomainIdWithNode(new EdgeDomain(domain));
var faviconMaybe = faviconClient.getFavicon(domain, domainIdWithNode.nodeAffinity()); var faviconMaybe = faviconClient.getFavicon(domain, domainIdWithNode.nodeAffinity());

View File

@@ -25,6 +25,7 @@ public class UrlDeduplicator {
} }
public boolean shouldRemove(DecoratedSearchResultItem details) { public boolean shouldRemove(DecoratedSearchResultItem details) {
if (!deduplicateOnSuperficialHash(details)) if (!deduplicateOnSuperficialHash(details))
return true; return true;
if (!deduplicateOnLSH(details)) if (!deduplicateOnLSH(details))
@@ -61,7 +62,7 @@ public class UrlDeduplicator {
private boolean limitResultsPerDomain(DecoratedSearchResultItem details) { private boolean limitResultsPerDomain(DecoratedSearchResultItem details) {
final var domain = details.getUrl().getDomain(); final var domain = details.getUrl().getDomain();
final String key = domain.getDomainKey(); final String key = domain.toString();
return keyCount.adjustOrPutValue(key, 1, 1) <= resultsPerKey; return keyCount.adjustOrPutValue(key, 1, 1) <= resultsPerKey;
} }