1
1
mirror of https://github.com/MarginaliaSearch/MarginaliaSearch.git synced 2025-10-07 13:52:39 +02:00

Compare commits

...

1 Commits

Author SHA1 Message Date
Viktor Lofgren
5387e2bd80 (crawler) Adjust crawl order to get a better mixture of domains 2025-03-27 11:12:48 +01:00

View File

@@ -319,7 +319,7 @@ public class CrawlerMain extends ProcessMainClass {
randomOrder.put(spec.domain, r.nextInt()); randomOrder.put(spec.domain, r.nextInt());
} }
return Comparator.comparing((CrawlSpecRecord spec) -> topDomainCounts.getOrDefault(EdgeDomain.getTopDomain(spec.domain), 0)) return Comparator.comparing((CrawlSpecRecord spec) -> topDomainCounts.getOrDefault(EdgeDomain.getTopDomain(spec.domain), 0) >= 8)
.reversed() .reversed()
.thenComparing(spec -> randomOrder.get(spec.domain)) .thenComparing(spec -> randomOrder.get(spec.domain))
.thenComparing(Record::hashCode); // non-deterministic tie-breaker to .thenComparing(Record::hashCode); // non-deterministic tie-breaker to