1
1
mirror of https://github.com/MarginaliaSearch/MarginaliaSearch.git synced 2025-10-06 07:32:38 +02:00

Compare commits

...

5 Commits

Author SHA1 Message Date
Viktor Lofgren
58e6f141ce (crawler) Reduce congestion throttle go-rate 2025-04-17 20:36:58 +02:00
Viktor Lofgren
500f63e921 (crawler) Lower max conn per route 2025-04-17 18:36:16 +02:00
Viktor Lofgren
6dfbedda1e (crawler) Increase max conn per route and connection timeout 2025-04-17 18:31:46 +02:00
Viktor Lofgren
9715ddb105 (crawler) Increase max pool size to a large value 2025-04-17 18:22:58 +02:00
Viktor Lofgren
1fc6313a77 (crawler) Remove log noise when retrying a bad URL 2025-04-17 17:10:46 +02:00
2 changed files with 4 additions and 4 deletions

View File

@@ -80,11 +80,12 @@ public class HttpFetcherImpl implements HttpFetcher, HttpRequestRetryStrategy {
private CloseableHttpClient createClient() throws NoSuchAlgorithmException {
final ConnectionConfig connectionConfig = ConnectionConfig.custom()
.setSocketTimeout(10, TimeUnit.SECONDS)
.setConnectTimeout(10, TimeUnit.SECONDS)
.setConnectTimeout(30, TimeUnit.SECONDS)
.build();
final PoolingHttpClientConnectionManager connectionManager = PoolingHttpClientConnectionManagerBuilder.create()
.setMaxConnPerRoute(4)
.setMaxConnPerRoute(2)
.setMaxConnTotal(5000)
.setDefaultConnectionConfig(connectionConfig)
.setTlsSocketStrategy(new DefaultClientTlsStrategy(SSLContext.getDefault()))
.build();
@@ -613,7 +614,6 @@ public class HttpFetcherImpl implements HttpFetcher, HttpRequestRetryStrategy {
@Override
public TimeValue getRetryInterval(HttpRequest request, IOException exception, int executionCount, HttpContext context) {
logger.info("Error", exception);
return TimeValue.ofSeconds(1);
}

View File

@@ -53,7 +53,7 @@ public class CrawlerRetreiver implements AutoCloseable {
private final CrawlerRevisitor crawlerRevisitor;
private static final CrawlerConnectionThrottle connectionThrottle = new CrawlerConnectionThrottle(
Duration.ofSeconds(1) // pace the connections to avoid network congestion by waiting 1 second between establishing them
Duration.ofMillis(50) // pace the connections to avoid network congestion at startup
);
int errorCount = 0;