1
1
mirror of https://github.com/MarginaliaSearch/MarginaliaSearch.git synced 2025-10-06 07:32:38 +02:00

Compare commits

...

7 Commits

8 changed files with 220 additions and 41 deletions

View File

@@ -12,7 +12,7 @@ public enum ExecutorActor {
RECRAWL(NodeProfile.BATCH_CRAWL, NodeProfile.MIXED), RECRAWL(NodeProfile.BATCH_CRAWL, NodeProfile.MIXED),
RECRAWL_SINGLE_DOMAIN(NodeProfile.BATCH_CRAWL, NodeProfile.MIXED), RECRAWL_SINGLE_DOMAIN(NodeProfile.BATCH_CRAWL, NodeProfile.MIXED),
PROC_CRAWLER_SPAWNER(NodeProfile.BATCH_CRAWL, NodeProfile.MIXED), PROC_CRAWLER_SPAWNER(NodeProfile.BATCH_CRAWL, NodeProfile.MIXED),
PROC_PING_SPAWNER(NodeProfile.BATCH_CRAWL, NodeProfile.MIXED, NodeProfile.SIDELOAD), PROC_PING_SPAWNER(NodeProfile.BATCH_CRAWL, NodeProfile.MIXED, NodeProfile.REALTIME),
PROC_EXPORT_TASKS_SPAWNER(NodeProfile.BATCH_CRAWL, NodeProfile.MIXED), PROC_EXPORT_TASKS_SPAWNER(NodeProfile.BATCH_CRAWL, NodeProfile.MIXED),
ADJACENCY_CALCULATION(NodeProfile.BATCH_CRAWL, NodeProfile.MIXED), ADJACENCY_CALCULATION(NodeProfile.BATCH_CRAWL, NodeProfile.MIXED),
EXPORT_DATA(NodeProfile.BATCH_CRAWL, NodeProfile.MIXED), EXPORT_DATA(NodeProfile.BATCH_CRAWL, NodeProfile.MIXED),

View File

@@ -3,24 +3,184 @@ package nu.marginalia.actor.proc;
import com.google.gson.Gson; import com.google.gson.Gson;
import com.google.inject.Inject; import com.google.inject.Inject;
import com.google.inject.Singleton; import com.google.inject.Singleton;
import nu.marginalia.actor.monitor.AbstractProcessSpawnerActor; import nu.marginalia.actor.prototype.RecordActorPrototype;
import nu.marginalia.actor.state.ActorResumeBehavior;
import nu.marginalia.actor.state.ActorStep;
import nu.marginalia.actor.state.Resume;
import nu.marginalia.actor.state.Terminal;
import nu.marginalia.mq.MqMessageState;
import nu.marginalia.mq.persistence.MqMessageHandlerRegistry;
import nu.marginalia.mq.persistence.MqPersistence; import nu.marginalia.mq.persistence.MqPersistence;
import nu.marginalia.mqapi.ProcessInboxNames; import nu.marginalia.mqapi.ProcessInboxNames;
import nu.marginalia.mqapi.ping.PingRequest;
import nu.marginalia.nodecfg.NodeConfigurationService;
import nu.marginalia.nodecfg.model.NodeProfile;
import nu.marginalia.process.ProcessService; import nu.marginalia.process.ProcessService;
import nu.marginalia.service.module.ServiceConfiguration; import nu.marginalia.service.module.ServiceConfiguration;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.sql.SQLException;
import java.util.Set;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
@Singleton @Singleton
public class PingMonitorActor extends AbstractProcessSpawnerActor { public class PingMonitorActor extends RecordActorPrototype {
private final MqPersistence persistence;
private final ProcessService processService;
private final Logger logger = LoggerFactory.getLogger(getClass());
public static final int MAX_ATTEMPTS = 3;
private final String inboxName;
private final ProcessService.ProcessId processId;
private final ExecutorService executorService = Executors.newSingleThreadExecutor();
private final int node;
private final boolean isPrimaryNode;
private final Gson gson;
public record Initial() implements ActorStep {}
@Resume(behavior = ActorResumeBehavior.RETRY)
public record Monitor(int errorAttempts) implements ActorStep {}
@Resume(behavior = ActorResumeBehavior.RESTART)
public record Run(int attempts) implements ActorStep {}
@Terminal
public record Aborted() implements ActorStep {}
@Override
public ActorStep transition(ActorStep self) throws Exception {
return switch (self) {
case Initial i -> {
PingRequest request = new PingRequest(isPrimaryNode ? "primary": "secondary");
persistence.sendNewMessage(inboxName, null, null,
"PingRequest",
gson.toJson(request),
null);
yield new Monitor(0);
}
case Monitor(int errorAttempts) -> {
for (;;) {
var messages = persistence.eavesdrop(inboxName, 1);
if (messages.isEmpty() && !processService.isRunning(processId)) {
synchronized (processId) {
processId.wait(5000);
}
if (errorAttempts > 0) { // Reset the error counter if there is silence in the inbox
yield new Monitor(0);
}
// else continue
} else {
// Special: Associate this thread with the message so that we can get tracking
MqMessageHandlerRegistry.register(messages.getFirst().msgId());
yield new Run(0);
}
}
}
case Run(int attempts) -> {
try {
long startTime = System.currentTimeMillis();
var exec = new TaskExecution();
long endTime = System.currentTimeMillis();
if (exec.isError()) {
if (attempts < MAX_ATTEMPTS)
yield new Run(attempts + 1);
else
yield new Error();
}
else if (endTime - startTime < TimeUnit.SECONDS.toMillis(1)) {
// To avoid boot loops, we transition to error if the process
// didn't run for longer than 1 seconds. This might happen if
// the process crashes before it can reach the heartbeat and inbox
// stages of execution. In this case it would not report having acted
// on its message, and the process would be restarted forever without
// the attempts counter incrementing.
yield new Error("Process terminated within 1 seconds of starting");
}
}
catch (InterruptedException ex) {
// We get this exception when the process is cancelled by the user
processService.kill(processId);
setCurrentMessageToDead();
yield new Aborted();
}
yield new Monitor(attempts);
}
default -> new Error();
};
}
public String describe() {
return "Spawns a(n) " + processId + " process and monitors its inbox for messages";
}
@Inject @Inject
public PingMonitorActor(Gson gson, ServiceConfiguration configuration, MqPersistence persistence, ProcessService processService) { public PingMonitorActor(Gson gson,
super(gson, NodeConfigurationService nodeConfigurationService,
configuration, ServiceConfiguration configuration,
persistence, MqPersistence persistence,
processService, ProcessService processService) throws SQLException {
ProcessInboxNames.PING_INBOX, super(gson);
ProcessService.ProcessId.PING); this.gson = gson;
this.node = configuration.node();
this.persistence = persistence;
this.processService = processService;
this.inboxName = ProcessInboxNames.PING_INBOX + ":" + node;
this.processId = ProcessService.ProcessId.PING;
this.isPrimaryNode = Set.of(NodeProfile.BATCH_CRAWL, NodeProfile.MIXED)
.contains(nodeConfigurationService.get(node).profile());
} }
/** Sets the message to dead in the database to avoid
* the service respawning on the same task when we
* re-enable this actor */
private void setCurrentMessageToDead() {
try {
var messages = persistence.eavesdrop(inboxName, 1);
if (messages.isEmpty()) // Possibly a race condition where the task is already finished
return;
var theMessage = messages.iterator().next();
persistence.updateMessageState(theMessage.msgId(), MqMessageState.DEAD);
}
catch (SQLException ex) {
logger.error("Tried but failed to set the message for " + processId + " to dead", ex);
}
}
/** Encapsulates the execution of the process in a separate thread so that
* we can interrupt the thread if the process is cancelled */
private class TaskExecution {
private final AtomicBoolean error = new AtomicBoolean(false);
public TaskExecution() throws ExecutionException, InterruptedException {
// Run this call in a separate thread so that this thread can be interrupted waiting for it
executorService.submit(() -> {
try {
processService.trigger(processId);
} catch (Exception e) {
logger.warn("Error in triggering process", e);
error.set(true);
}
}).get(); // Wait for the process to start
}
public boolean isError() {
return error.get();
}
}
} }

View File

@@ -27,10 +27,12 @@ public class DbBrowseDomainsRandom {
public List<BrowseResult> getRandomDomains(int count, DomainBlacklist blacklist, int set) { public List<BrowseResult> getRandomDomains(int count, DomainBlacklist blacklist, int set) {
final String q = """ final String q = """
SELECT DOMAIN_ID, DOMAIN_NAME, INDEXED SELECT EC_RANDOM_DOMAINS.DOMAIN_ID, DOMAIN_NAME, INDEXED
FROM EC_RANDOM_DOMAINS FROM EC_RANDOM_DOMAINS
INNER JOIN EC_DOMAIN ON EC_DOMAIN.ID=DOMAIN_ID INNER JOIN EC_DOMAIN ON EC_DOMAIN.ID=DOMAIN_ID
LEFT JOIN DOMAIN_AVAILABILITY_INFORMATION DAI ON DAI.DOMAIN_ID=EC_RANDOM_DOMAINS.DOMAIN_ID
WHERE STATE<2 WHERE STATE<2
AND SERVER_AVAILABLE
AND DOMAIN_SET=? AND DOMAIN_SET=?
AND DOMAIN_ALIAS IS NULL AND DOMAIN_ALIAS IS NULL
ORDER BY RAND() ORDER BY RAND()

View File

@@ -137,7 +137,7 @@ public class PingDao {
public HistoricalAvailabilityData getHistoricalAvailabilityData(long domainId) throws SQLException { public HistoricalAvailabilityData getHistoricalAvailabilityData(long domainId) throws SQLException {
var query = """ var query = """
SELECT EC_DOMAIN.ID, EC_DOMAIN.DOMAIN_NAME, DOMAIN_AVAILABILITY_INFORMATION.*, DOMAIN_SECURITY_INFORMATION.* SELECT EC_DOMAIN.ID, EC_DOMAIN.DOMAIN_NAME, EC_DOMAIN.NODE_AFFINITY, DOMAIN_AVAILABILITY_INFORMATION.*, DOMAIN_SECURITY_INFORMATION.*
FROM EC_DOMAIN FROM EC_DOMAIN
LEFT JOIN DOMAIN_SECURITY_INFORMATION ON DOMAIN_SECURITY_INFORMATION.DOMAIN_ID = EC_DOMAIN.ID LEFT JOIN DOMAIN_SECURITY_INFORMATION ON DOMAIN_SECURITY_INFORMATION.DOMAIN_ID = EC_DOMAIN.ID
LEFT JOIN DOMAIN_AVAILABILITY_INFORMATION ON DOMAIN_AVAILABILITY_INFORMATION.DOMAIN_ID = EC_DOMAIN.ID LEFT JOIN DOMAIN_AVAILABILITY_INFORMATION ON DOMAIN_AVAILABILITY_INFORMATION.DOMAIN_ID = EC_DOMAIN.ID
@@ -168,7 +168,7 @@ public class PingDao {
if (dar == null) { if (dar == null) {
return new HistoricalAvailabilityData.JustDomainReference(new DomainReference( return new HistoricalAvailabilityData.JustDomainReference(new DomainReference(
rs.getInt("EC_DOMAIN.ID"), rs.getInt("EC_DOMAIN.ID"),
rs.getInt("EC_DOMAIN.NODE_ID"), rs.getInt("EC_DOMAIN.NODE_AFFINITY"),
domainName.toLowerCase() domainName.toLowerCase()
)); ));
} }

View File

@@ -7,6 +7,7 @@ import nu.marginalia.ping.svc.HttpPingService;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import javax.annotation.Nullable;
import java.time.Duration; import java.time.Duration;
import java.time.Instant; import java.time.Instant;
import java.util.ArrayList; import java.util.ArrayList;
@@ -85,6 +86,8 @@ public class PingJobScheduler {
} }
public void pause(int nodeId) { public void pause(int nodeId) {
logger.info("Pausing PingJobScheduler for nodeId: {}", nodeId);
if (this.nodeId != null && this.nodeId != nodeId) { if (this.nodeId != null && this.nodeId != nodeId) {
logger.warn("Attempted to pause PingJobScheduler with mismatched nodeId: expected {}, got {}", this.nodeId, nodeId); logger.warn("Attempted to pause PingJobScheduler with mismatched nodeId: expected {}, got {}", this.nodeId, nodeId);
return; return;
@@ -98,6 +101,7 @@ public class PingJobScheduler {
} }
public synchronized void resume(int nodeId) { public synchronized void resume(int nodeId) {
logger.info("Resuming PingJobScheduler for nodeId: {}", nodeId);
if (this.nodeId != null) { if (this.nodeId != null) {
logger.warn("Attempted to resume PingJobScheduler with mismatched nodeId: expected {}, got {}", this.nodeId, nodeId); logger.warn("Attempted to resume PingJobScheduler with mismatched nodeId: expected {}, got {}", this.nodeId, nodeId);
return; return;
@@ -139,24 +143,14 @@ public class PingJobScheduler {
try { try {
List<WritableModel> objects = switch (data) { List<WritableModel> objects = switch (data) {
case HistoricalAvailabilityData.JustDomainReference(DomainReference reference) -> { case HistoricalAvailabilityData.JustDomainReference(DomainReference reference)
logger.info("Processing availability job for domain: {}", reference.domainName()); -> httpPingService.pingDomain(reference, null, null);
yield httpPingService.pingDomain(reference, null, null); case HistoricalAvailabilityData.JustAvailability(String domain, DomainAvailabilityRecord record)
} -> httpPingService.pingDomain(
case HistoricalAvailabilityData.JustAvailability(String domain, DomainAvailabilityRecord record) -> { new DomainReference(record.domainId(), record.nodeId(), domain), record, null);
logger.info("Availability check with no security info: {}", domain); case HistoricalAvailabilityData.AvailabilityAndSecurity(String domain, DomainAvailabilityRecord availability, DomainSecurityRecord security)
yield httpPingService.pingDomain( -> httpPingService.pingDomain(
new DomainReference(record.domainId(), record.nodeId(), domain), new DomainReference(availability.domainId(), availability.nodeId(), domain), availability, security);
record,
null);
}
case HistoricalAvailabilityData.AvailabilityAndSecurity(String domain, DomainAvailabilityRecord availability, DomainSecurityRecord security) -> {
logger.info("Availability check with full historical data: {}", domain);
yield httpPingService.pingDomain(
new DomainReference(availability.domainId(), availability.nodeId(), domain),
availability,
security);
}
}; };
pingDao.write(objects); pingDao.write(objects);
@@ -201,8 +195,8 @@ public class PingJobScheduler {
yield dnsPingService.pingDomain(oldRecord.rootDomainName(), oldRecord); yield dnsPingService.pingDomain(oldRecord.rootDomainName(), oldRecord);
} }
case RootDomainReference.ByName(String name) -> { case RootDomainReference.ByName(String name) -> {
var oldRecord = pingDao.getDomainDnsRecord(name); @Nullable var oldRecord = pingDao.getDomainDnsRecord(name);
yield dnsPingService.pingDomain(oldRecord.rootDomainName(), oldRecord); yield dnsPingService.pingDomain(name, oldRecord);
} }
}; };

View File

@@ -83,7 +83,7 @@ public class PingHttpFetcher {
} catch (SocketTimeoutException ex) { } catch (SocketTimeoutException ex) {
return new TimeoutResponse(ex.getMessage()); return new TimeoutResponse(ex.getMessage());
} catch (IOException e) { } catch (IOException e) {
return new ConnectionError(e.getMessage()); return new ConnectionError(e.getClass().getSimpleName());
} }
} }

View File

@@ -48,7 +48,6 @@ public class DnsPingService {
switch (changes) { switch (changes) {
case DnsRecordChange.None _ -> {} case DnsRecordChange.None _ -> {}
case DnsRecordChange.Changed changed -> { case DnsRecordChange.Changed changed -> {
logger.info("DNS record for {} changed: {}", newRecord.dnsRootDomainId(), changed);
generatedRecords.add(DomainDnsEvent.builder() generatedRecords.add(DomainDnsEvent.builder()
.rootDomainId(newRecord.dnsRootDomainId()) .rootDomainId(newRecord.dnsRootDomainId())
.nodeId(newRecord.nodeAffinity()) .nodeId(newRecord.nodeAffinity())

View File

@@ -18,6 +18,7 @@ import java.net.InetAddress;
import java.net.UnknownHostException; import java.net.UnknownHostException;
import java.security.cert.X509Certificate; import java.security.cert.X509Certificate;
import java.sql.SQLException; import java.sql.SQLException;
import java.time.Duration;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.List; import java.util.List;
@@ -75,8 +76,8 @@ public class HttpPingService {
result = pingHttpFetcher.fetchUrl(url, Method.HEAD, null, null); result = pingHttpFetcher.fetchUrl(url, Method.HEAD, null, null);
if (result instanceof HttpsResponse response && response.httpStatus() == 405) { if (result instanceof HttpsResponse response && shouldTryGET(response.httpStatus())) {
// If we get a 405, we try the GET method instead as not all servers support HEAD requests sleep(Duration.ofSeconds(2));
result = pingHttpFetcher.fetchUrl(url, Method.GET, null, null); result = pingHttpFetcher.fetchUrl(url, Method.GET, null, null);
} }
else if (result instanceof ConnectionError) { else if (result instanceof ConnectionError) {
@@ -84,8 +85,8 @@ public class HttpPingService {
if (!(result2 instanceof ConnectionError)) { if (!(result2 instanceof ConnectionError)) {
result = result2; result = result2;
} }
if (result instanceof HttpResponse response && response.httpStatus() == 405) { if (result instanceof HttpResponse response && shouldTryGET(response.httpStatus())) {
// If we get a 405, we try the GET method instead as not all servers support HEAD requests sleep(Duration.ofSeconds(2));
result = pingHttpFetcher.fetchUrl(alternateUrl, Method.GET, null, null); result = pingHttpFetcher.fetchUrl(alternateUrl, Method.GET, null, null);
} }
} }
@@ -116,7 +117,7 @@ public class HttpPingService {
domainReference.nodeId(), domainReference.nodeId(),
oldPingStatus, oldPingStatus,
ErrorClassification.CONNECTION_ERROR, ErrorClassification.CONNECTION_ERROR,
null); rsp.errorMessage());
newSecurityInformation = null; newSecurityInformation = null;
} }
case TimeoutResponse rsp -> { case TimeoutResponse rsp -> {
@@ -190,6 +191,29 @@ public class HttpPingService {
return generatedRecords; return generatedRecords;
} }
private boolean shouldTryGET(int statusCode) {
if (statusCode < 400) {
return false;
}
if (statusCode == 429) { // Too many requests, we should not retry with GET
return false;
}
// For all other status codes, we can try a GET request, as many severs do not
// cope with HEAD requests properly.
return statusCode < 600;
}
private void sleep(Duration duration) {
try {
Thread.sleep(duration.toMillis());
} catch (InterruptedException e) {
Thread.currentThread().interrupt(); // Restore the interrupted status
logger.warn("Sleep interrupted", e);
}
}
private void comparePingStatuses(List<WritableModel> generatedRecords, private void comparePingStatuses(List<WritableModel> generatedRecords,
DomainAvailabilityRecord oldPingStatus, DomainAvailabilityRecord oldPingStatus,
DomainAvailabilityRecord newPingStatus) { DomainAvailabilityRecord newPingStatus) {