1
1
mirror of https://github.com/MarginaliaSearch/MarginaliaSearch.git synced 2025-10-06 07:32:38 +02:00

Compare commits

...

2 Commits

Author SHA1 Message Date
Viktor Lofgren
e84d5c497a (crawler) Reduce the likelihood of crawler tasks locking on domains before they are ready
Change to a bounded queue and adding a sleep to reduce the amount of effectively busy looping threads.
2025-04-21 00:39:26 +02:00
Viktor Lofgren
2d2d3e2466 (crawler) Reduce the likelihood of crawler tasks locking on domains before they are ready
Change to a bounded queue and adding a sleep to reduce the amount of effectively busy looping threads.
2025-04-21 00:36:48 +02:00

View File

@@ -42,8 +42,8 @@ import java.nio.file.Path;
import java.nio.file.StandardCopyOption;
import java.security.Security;
import java.util.*;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
@@ -66,7 +66,7 @@ public class CrawlerMain extends ProcessMainClass {
private final DomainLocks domainLocks = new DomainLocks();
private final Map<String, CrawlTask> pendingCrawlTasks = new ConcurrentHashMap<>();
private final LinkedBlockingQueue<CrawlTask> retryQueue = new LinkedBlockingQueue<>();
private final ArrayBlockingQueue<CrawlTask> retryQueue = new ArrayBlockingQueue<>(64);
private final AtomicInteger tasksDone = new AtomicInteger(0);
private final HttpFetcherImpl fetcher;
@@ -445,7 +445,13 @@ public class CrawlerMain extends ProcessMainClass {
// We don't have a lock, so we can't run this task
// we return to avoid blocking the pool for too long
if (lock.isEmpty()) {
retryQueue.add(this);
if (retryQueue.remainingCapacity() > 0) {
// Sleep a moment to avoid busy looping via the retry queue
// in the case when few tasks remain
Thread.sleep(10);
}
retryQueue.put(this);
return;
}
DomainLocks.DomainLock domainLock = lock.get();