1
1
mirror of https://github.com/MarginaliaSearch/MarginaliaSearch.git synced 2025-10-05 21:22:39 +02:00

Compare commits

...

9 Commits

Author SHA1 Message Date
Viktor Lofgren
f59a9eb025 (legacy-search) Soften domain limit constraints in URL deduplication 2025-05-17 00:04:27 +02:00
Viktor Lofgren
599534806b (search) Soften domain limit constraints in URL deduplication 2025-05-17 00:00:42 +02:00
Viktor Lofgren
7e8253dac7 (search) Clean up debug logging 2025-05-17 00:00:28 +02:00
Viktor Lofgren
97a6780ea3 (search) Add debug logging for specific query 2025-05-16 23:41:35 +02:00
Viktor Lofgren
eb634beec8 (search) Add debug logging for specific query 2025-05-16 23:34:03 +02:00
Viktor Lofgren
269ebd1654 Revert "(query) Add debug logging for specific query"
This reverts commit 39ce40bfeb.
2025-05-16 23:29:06 +02:00
Viktor Lofgren
39ce40bfeb (query) Add debug logging for specific query 2025-05-16 23:23:53 +02:00
Viktor Lofgren
c187b2e1c1 (search) Re-enable clustering 2025-05-16 23:20:16 +02:00
Viktor Lofgren
42eaa4588b (search) Disable clustering for a moment 2025-05-16 23:17:01 +02:00
5 changed files with 4 additions and 19 deletions

View File

@@ -112,14 +112,6 @@ public class EdgeDomain implements Serializable {
return topDomain;
}
public String getDomainKey() {
int cutPoint = topDomain.indexOf('.');
if (cutPoint < 0) {
return topDomain;
}
return topDomain.substring(0, cutPoint).toLowerCase();
}
/** If possible, try to provide an alias domain,
* i.e. a domain name that is very likely to link to this one
* */

View File

@@ -8,14 +8,6 @@ import static org.junit.jupiter.api.Assertions.assertEquals;
class EdgeDomainTest {
@Test
public void testSkepdic() throws URISyntaxException {
var domain = new EdgeUrl("http://www.skepdic.com/astrology.html");
assertEquals("skepdic", domain.getDomain().getDomainKey());
var domain2 = new EdgeUrl("http://skepdic.com/astrology.html");
assertEquals("skepdic", domain2.getDomain().getDomainKey());
}
@Test
public void testHkDomain() throws URISyntaxException {
var domain = new EdgeUrl("http://l7072i3.l7c.net");

View File

@@ -61,7 +61,7 @@ public class UrlDeduplicator {
private boolean limitResultsPerDomain(DecoratedSearchResultItem details) {
final var domain = details.getUrl().getDomain();
final String key = domain.getDomainKey();
final String key = domain.toString();
return keyCount.adjustOrPutValue(key, 1, 1) <= resultsPerKey;
}

View File

@@ -23,7 +23,7 @@ public class SearchResultClusterer {
}
/** No clustering, just return the results as is */
private static List<ClusteredUrlDetails> noOp(List<UrlDetails> results, int total) {
public static List<ClusteredUrlDetails> noOp(List<UrlDetails> results, int total) {
if (results.isEmpty())
return List.of();

View File

@@ -25,6 +25,7 @@ public class UrlDeduplicator {
}
public boolean shouldRemove(DecoratedSearchResultItem details) {
if (!deduplicateOnSuperficialHash(details))
return true;
if (!deduplicateOnLSH(details))
@@ -61,7 +62,7 @@ public class UrlDeduplicator {
private boolean limitResultsPerDomain(DecoratedSearchResultItem details) {
final var domain = details.getUrl().getDomain();
final String key = domain.getDomainKey();
final String key = domain.toString();
return keyCount.adjustOrPutValue(key, 1, 1) <= resultsPerKey;
}