1
1
mirror of https://github.com/MarginaliaSearch/MarginaliaSearch.git synced 2025-10-06 07:32:38 +02:00

Compare commits

...

7 Commits

11 changed files with 92 additions and 35 deletions

View File

@@ -0,0 +1,6 @@
-- Add additional summary columns to DOMAIN_SECURITY_EVENTS table
-- to make it easier to make sense of certificate changes
ALTER TABLE DOMAIN_SECURITY_EVENTS ADD COLUMN CHANGE_CERTIFICATE_SERIAL_NUMBER BOOLEAN NOT NULL DEFAULT FALSE;
ALTER TABLE DOMAIN_SECURITY_EVENTS ADD COLUMN CHANGE_CERTIFICATE_ISSUER BOOLEAN NOT NULL DEFAULT FALSE;
OPTIMIZE TABLE DOMAIN_SECURITY_EVENTS;

View File

@@ -12,7 +12,7 @@ public enum ExecutorActor {
RECRAWL(NodeProfile.BATCH_CRAWL, NodeProfile.MIXED), RECRAWL(NodeProfile.BATCH_CRAWL, NodeProfile.MIXED),
RECRAWL_SINGLE_DOMAIN(NodeProfile.BATCH_CRAWL, NodeProfile.MIXED), RECRAWL_SINGLE_DOMAIN(NodeProfile.BATCH_CRAWL, NodeProfile.MIXED),
PROC_CRAWLER_SPAWNER(NodeProfile.BATCH_CRAWL, NodeProfile.MIXED), PROC_CRAWLER_SPAWNER(NodeProfile.BATCH_CRAWL, NodeProfile.MIXED),
PROC_PING_SPAWNER(NodeProfile.BATCH_CRAWL, NodeProfile.MIXED, NodeProfile.SIDELOAD), PROC_PING_SPAWNER(NodeProfile.BATCH_CRAWL, NodeProfile.MIXED, NodeProfile.REALTIME),
PROC_EXPORT_TASKS_SPAWNER(NodeProfile.BATCH_CRAWL, NodeProfile.MIXED), PROC_EXPORT_TASKS_SPAWNER(NodeProfile.BATCH_CRAWL, NodeProfile.MIXED),
ADJACENCY_CALCULATION(NodeProfile.BATCH_CRAWL, NodeProfile.MIXED), ADJACENCY_CALCULATION(NodeProfile.BATCH_CRAWL, NodeProfile.MIXED),
EXPORT_DATA(NodeProfile.BATCH_CRAWL, NodeProfile.MIXED), EXPORT_DATA(NodeProfile.BATCH_CRAWL, NodeProfile.MIXED),

View File

@@ -27,10 +27,12 @@ public class DbBrowseDomainsRandom {
public List<BrowseResult> getRandomDomains(int count, DomainBlacklist blacklist, int set) { public List<BrowseResult> getRandomDomains(int count, DomainBlacklist blacklist, int set) {
final String q = """ final String q = """
SELECT DOMAIN_ID, DOMAIN_NAME, INDEXED SELECT EC_RANDOM_DOMAINS.DOMAIN_ID, DOMAIN_NAME, INDEXED
FROM EC_RANDOM_DOMAINS FROM EC_RANDOM_DOMAINS
INNER JOIN EC_DOMAIN ON EC_DOMAIN.ID=DOMAIN_ID INNER JOIN EC_DOMAIN ON EC_DOMAIN.ID=DOMAIN_ID
LEFT JOIN DOMAIN_AVAILABILITY_INFORMATION DAI ON DAI.DOMAIN_ID=EC_RANDOM_DOMAINS.DOMAIN_ID
WHERE STATE<2 WHERE STATE<2
AND SERVER_AVAILABLE
AND DOMAIN_SET=? AND DOMAIN_SET=?
AND DOMAIN_ALIAS IS NULL AND DOMAIN_ALIAS IS NULL
ORDER BY RAND() ORDER BY RAND()

View File

@@ -7,6 +7,7 @@ import nu.marginalia.ping.svc.HttpPingService;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import javax.annotation.Nullable;
import java.time.Duration; import java.time.Duration;
import java.time.Instant; import java.time.Instant;
import java.util.ArrayList; import java.util.ArrayList;
@@ -85,6 +86,8 @@ public class PingJobScheduler {
} }
public void pause(int nodeId) { public void pause(int nodeId) {
logger.info("Pausing PingJobScheduler for nodeId: {}", nodeId);
if (this.nodeId != null && this.nodeId != nodeId) { if (this.nodeId != null && this.nodeId != nodeId) {
logger.warn("Attempted to pause PingJobScheduler with mismatched nodeId: expected {}, got {}", this.nodeId, nodeId); logger.warn("Attempted to pause PingJobScheduler with mismatched nodeId: expected {}, got {}", this.nodeId, nodeId);
return; return;
@@ -98,6 +101,7 @@ public class PingJobScheduler {
} }
public synchronized void resume(int nodeId) { public synchronized void resume(int nodeId) {
logger.info("Resuming PingJobScheduler for nodeId: {}", nodeId);
if (this.nodeId != null) { if (this.nodeId != null) {
logger.warn("Attempted to resume PingJobScheduler with mismatched nodeId: expected {}, got {}", this.nodeId, nodeId); logger.warn("Attempted to resume PingJobScheduler with mismatched nodeId: expected {}, got {}", this.nodeId, nodeId);
return; return;
@@ -139,24 +143,14 @@ public class PingJobScheduler {
try { try {
List<WritableModel> objects = switch (data) { List<WritableModel> objects = switch (data) {
case HistoricalAvailabilityData.JustDomainReference(DomainReference reference) -> { case HistoricalAvailabilityData.JustDomainReference(DomainReference reference)
logger.info("Processing availability job for domain: {}", reference.domainName()); -> httpPingService.pingDomain(reference, null, null);
yield httpPingService.pingDomain(reference, null, null); case HistoricalAvailabilityData.JustAvailability(String domain, DomainAvailabilityRecord record)
} -> httpPingService.pingDomain(
case HistoricalAvailabilityData.JustAvailability(String domain, DomainAvailabilityRecord record) -> { new DomainReference(record.domainId(), record.nodeId(), domain), record, null);
logger.info("Availability check with no security info: {}", domain); case HistoricalAvailabilityData.AvailabilityAndSecurity(String domain, DomainAvailabilityRecord availability, DomainSecurityRecord security)
yield httpPingService.pingDomain( -> httpPingService.pingDomain(
new DomainReference(record.domainId(), record.nodeId(), domain), new DomainReference(availability.domainId(), availability.nodeId(), domain), availability, security);
record,
null);
}
case HistoricalAvailabilityData.AvailabilityAndSecurity(String domain, DomainAvailabilityRecord availability, DomainSecurityRecord security) -> {
logger.info("Availability check with full historical data: {}", domain);
yield httpPingService.pingDomain(
new DomainReference(availability.domainId(), availability.nodeId(), domain),
availability,
security);
}
}; };
pingDao.write(objects); pingDao.write(objects);
@@ -201,8 +195,8 @@ public class PingJobScheduler {
yield dnsPingService.pingDomain(oldRecord.rootDomainName(), oldRecord); yield dnsPingService.pingDomain(oldRecord.rootDomainName(), oldRecord);
} }
case RootDomainReference.ByName(String name) -> { case RootDomainReference.ByName(String name) -> {
var oldRecord = pingDao.getDomainDnsRecord(name); @Nullable var oldRecord = pingDao.getDomainDnsRecord(name);
yield dnsPingService.pingDomain(oldRecord.rootDomainName(), oldRecord); yield dnsPingService.pingDomain(name, oldRecord);
} }
}; };

View File

@@ -83,7 +83,7 @@ public class PingHttpFetcher {
} catch (SocketTimeoutException ex) { } catch (SocketTimeoutException ex) {
return new TimeoutResponse(ex.getMessage()); return new TimeoutResponse(ex.getMessage());
} catch (IOException e) { } catch (IOException e) {
return new ConnectionError(e.getMessage()); return new ConnectionError(e.getClass().getSimpleName());
} }
} }

View File

@@ -16,6 +16,8 @@ public record DomainSecurityEvent(
boolean certificateProfileChanged, boolean certificateProfileChanged,
boolean certificateSanChanged, boolean certificateSanChanged,
boolean certificatePublicKeyChanged, boolean certificatePublicKeyChanged,
boolean certificateSerialNumberChanged,
boolean certificateIssuerChanged,
Duration oldCertificateTimeToExpiry, Duration oldCertificateTimeToExpiry,
boolean securityHeadersChanged, boolean securityHeadersChanged,
boolean ipChanged, boolean ipChanged,
@@ -41,8 +43,10 @@ public record DomainSecurityEvent(
change_software, change_software,
old_cert_time_to_expiry, old_cert_time_to_expiry,
security_signature_before, security_signature_before,
security_signature_after security_signature_after,
) VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?) change_certificate_serial_number,
change_certificate_issuer
) VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)
""")) """))
{ {
@@ -75,6 +79,9 @@ public record DomainSecurityEvent(
ps.setBytes(14, securitySignatureAfter().compressed()); ps.setBytes(14, securitySignatureAfter().compressed());
} }
ps.setBoolean(15, certificateSerialNumberChanged());
ps.setBoolean(16, certificateIssuerChanged());
ps.executeUpdate(); ps.executeUpdate();
} }
} }

View File

@@ -15,6 +15,8 @@ public record SecurityInformationChange(
boolean isCertificateProfileChanged, boolean isCertificateProfileChanged,
boolean isCertificateSanChanged, boolean isCertificateSanChanged,
boolean isCertificatePublicKeyChanged, boolean isCertificatePublicKeyChanged,
boolean isCertificateSerialNumberChanged,
boolean isCertificateIssuerChanged,
Duration oldCertificateTimeToExpiry, Duration oldCertificateTimeToExpiry,
boolean isSecurityHeadersChanged, boolean isSecurityHeadersChanged,
boolean isIpAddressChanged, boolean isIpAddressChanged,
@@ -30,8 +32,10 @@ public record SecurityInformationChange(
boolean certificateFingerprintChanged = 0 != Arrays.compare(before.sslCertFingerprintSha256(), after.sslCertFingerprintSha256()); boolean certificateFingerprintChanged = 0 != Arrays.compare(before.sslCertFingerprintSha256(), after.sslCertFingerprintSha256());
boolean certificateProfileChanged = before.certificateProfileHash() != after.certificateProfileHash(); boolean certificateProfileChanged = before.certificateProfileHash() != after.certificateProfileHash();
boolean certificateSerialNumberChanged = !Objects.equals(before.sslCertSerialNumber(), after.sslCertSerialNumber());
boolean certificatePublicKeyChanged = 0 != Arrays.compare(before.sslCertPublicKeyHash(), after.sslCertPublicKeyHash()); boolean certificatePublicKeyChanged = 0 != Arrays.compare(before.sslCertPublicKeyHash(), after.sslCertPublicKeyHash());
boolean certificateSanChanged = !Objects.equals(before.sslCertSan(), after.sslCertSan()); boolean certificateSanChanged = !Objects.equals(before.sslCertSan(), after.sslCertSan());
boolean certificateIssuerChanged = !Objects.equals(before.sslCertIssuer(), after.sslCertIssuer());
Duration oldCertificateTimeToExpiry = before.sslCertNotAfter() == null ? null : Duration.between( Duration oldCertificateTimeToExpiry = before.sslCertNotAfter() == null ? null : Duration.between(
Instant.now(), Instant.now(),
@@ -50,6 +54,7 @@ public record SecurityInformationChange(
boolean isChanged = asnChanged boolean isChanged = asnChanged
|| certificateFingerprintChanged || certificateFingerprintChanged
|| securityHeadersChanged || securityHeadersChanged
|| certificateProfileChanged
|| softwareChanged; || softwareChanged;
return new SecurityInformationChange( return new SecurityInformationChange(
@@ -59,6 +64,8 @@ public record SecurityInformationChange(
certificateProfileChanged, certificateProfileChanged,
certificateSanChanged, certificateSanChanged,
certificatePublicKeyChanged, certificatePublicKeyChanged,
certificateSerialNumberChanged,
certificateIssuerChanged,
oldCertificateTimeToExpiry, oldCertificateTimeToExpiry,
securityHeadersChanged, securityHeadersChanged,
ipChanged, ipChanged,

View File

@@ -48,7 +48,6 @@ public class DnsPingService {
switch (changes) { switch (changes) {
case DnsRecordChange.None _ -> {} case DnsRecordChange.None _ -> {}
case DnsRecordChange.Changed changed -> { case DnsRecordChange.Changed changed -> {
logger.info("DNS record for {} changed: {}", newRecord.dnsRootDomainId(), changed);
generatedRecords.add(DomainDnsEvent.builder() generatedRecords.add(DomainDnsEvent.builder()
.rootDomainId(newRecord.dnsRootDomainId()) .rootDomainId(newRecord.dnsRootDomainId())
.nodeId(newRecord.nodeAffinity()) .nodeId(newRecord.nodeAffinity())

View File

@@ -8,6 +8,7 @@ import nu.marginalia.ping.ssl.PKIXValidationResult;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import javax.annotation.Nullable;
import java.security.MessageDigest; import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException; import java.security.NoSuchAlgorithmException;
import java.security.cert.CertificateEncodingException; import java.security.cert.CertificateEncodingException;
@@ -21,13 +22,17 @@ public class DomainSecurityInformationFactory {
private static final Logger logger = LoggerFactory.getLogger(DomainSecurityInformationFactory.class); private static final Logger logger = LoggerFactory.getLogger(DomainSecurityInformationFactory.class);
// Vanilla HTTP (not HTTPS) response does not have SSL session information, so we return null // Vanilla HTTP (not HTTPS) response does not have SSL session information, so we return null
public DomainSecurityRecord createHttpSecurityInformation(HttpResponse httpResponse, int domainId, int nodeId) { public DomainSecurityRecord createHttpSecurityInformation(HttpResponse httpResponse,
int domainId, int nodeId,
@Nullable Integer asn
) {
var headers = httpResponse.headers(); var headers = httpResponse.headers();
return DomainSecurityRecord.builder() return DomainSecurityRecord.builder()
.domainId(domainId) .domainId(domainId)
.nodeId(nodeId) .nodeId(nodeId)
.asn(asn)
.httpSchema(HttpSchema.HTTP) .httpSchema(HttpSchema.HTTP)
.httpVersion(httpResponse.version()) .httpVersion(httpResponse.version())
.headerServer(headers.getFirst("Server")) .headerServer(headers.getFirst("Server"))
@@ -47,7 +52,13 @@ public class DomainSecurityInformationFactory {
} }
// HTTPS response // HTTPS response
public DomainSecurityRecord createHttpsSecurityInformation(HttpsResponse httpResponse, PKIXValidationResult validationResult, int domainId, int nodeId) { public DomainSecurityRecord createHttpsSecurityInformation(
HttpsResponse httpResponse,
PKIXValidationResult validationResult,
int domainId,
int nodeId,
@Nullable Integer asn
) {
var headers = httpResponse.headers(); var headers = httpResponse.headers();
@@ -86,6 +97,7 @@ public class DomainSecurityInformationFactory {
return DomainSecurityRecord.builder() return DomainSecurityRecord.builder()
.domainId(domainId) .domainId(domainId)
.nodeId(nodeId) .nodeId(nodeId)
.asn(asn)
.httpSchema(HttpSchema.HTTPS) .httpSchema(HttpSchema.HTTPS)
.headerServer(headers.getFirst("Server")) .headerServer(headers.getFirst("Server"))
.headerCorsAllowOrigin(headers.getFirst("Access-Control-Allow-Origin")) .headerCorsAllowOrigin(headers.getFirst("Access-Control-Allow-Origin"))

View File

@@ -18,6 +18,7 @@ import java.net.InetAddress;
import java.net.UnknownHostException; import java.net.UnknownHostException;
import java.security.cert.X509Certificate; import java.security.cert.X509Certificate;
import java.sql.SQLException; import java.sql.SQLException;
import java.time.Duration;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.List; import java.util.List;
@@ -75,8 +76,8 @@ public class HttpPingService {
result = pingHttpFetcher.fetchUrl(url, Method.HEAD, null, null); result = pingHttpFetcher.fetchUrl(url, Method.HEAD, null, null);
if (result instanceof HttpsResponse response && response.httpStatus() == 405) { if (result instanceof HttpsResponse response && shouldTryGET(response.httpStatus())) {
// If we get a 405, we try the GET method instead as not all servers support HEAD requests sleep(Duration.ofSeconds(2));
result = pingHttpFetcher.fetchUrl(url, Method.GET, null, null); result = pingHttpFetcher.fetchUrl(url, Method.GET, null, null);
} }
else if (result instanceof ConnectionError) { else if (result instanceof ConnectionError) {
@@ -84,8 +85,8 @@ public class HttpPingService {
if (!(result2 instanceof ConnectionError)) { if (!(result2 instanceof ConnectionError)) {
result = result2; result = result2;
} }
if (result instanceof HttpResponse response && response.httpStatus() == 405) { if (result instanceof HttpResponse response && shouldTryGET(response.httpStatus())) {
// If we get a 405, we try the GET method instead as not all servers support HEAD requests sleep(Duration.ofSeconds(2));
result = pingHttpFetcher.fetchUrl(alternateUrl, Method.GET, null, null); result = pingHttpFetcher.fetchUrl(alternateUrl, Method.GET, null, null);
} }
} }
@@ -116,7 +117,7 @@ public class HttpPingService {
domainReference.nodeId(), domainReference.nodeId(),
oldPingStatus, oldPingStatus,
ErrorClassification.CONNECTION_ERROR, ErrorClassification.CONNECTION_ERROR,
null); rsp.errorMessage());
newSecurityInformation = null; newSecurityInformation = null;
} }
case TimeoutResponse rsp -> { case TimeoutResponse rsp -> {
@@ -148,7 +149,8 @@ public class HttpPingService {
newSecurityInformation = domainSecurityInformationFactory.createHttpSecurityInformation( newSecurityInformation = domainSecurityInformationFactory.createHttpSecurityInformation(
httpResponse, httpResponse,
domainReference.domainId(), domainReference.domainId(),
domainReference.nodeId() domainReference.nodeId(),
newPingStatus.asn()
); );
} }
case HttpsResponse httpsResponse -> { case HttpsResponse httpsResponse -> {
@@ -166,7 +168,8 @@ public class HttpPingService {
httpsResponse, httpsResponse,
validationResult, validationResult,
domainReference.domainId(), domainReference.domainId(),
domainReference.nodeId() domainReference.nodeId(),
newPingStatus.asn()
); );
} }
} }
@@ -190,6 +193,29 @@ public class HttpPingService {
return generatedRecords; return generatedRecords;
} }
private boolean shouldTryGET(int statusCode) {
if (statusCode < 400) {
return false;
}
if (statusCode == 429) { // Too many requests, we should not retry with GET
return false;
}
// For all other status codes, we can try a GET request, as many severs do not
// cope with HEAD requests properly.
return statusCode < 600;
}
private void sleep(Duration duration) {
try {
Thread.sleep(duration.toMillis());
} catch (InterruptedException e) {
Thread.currentThread().interrupt(); // Restore the interrupted status
logger.warn("Sleep interrupted", e);
}
}
private void comparePingStatuses(List<WritableModel> generatedRecords, private void comparePingStatuses(List<WritableModel> generatedRecords,
DomainAvailabilityRecord oldPingStatus, DomainAvailabilityRecord oldPingStatus,
DomainAvailabilityRecord newPingStatus) { DomainAvailabilityRecord newPingStatus) {
@@ -258,6 +284,8 @@ public class HttpPingService {
change.isCertificateProfileChanged(), change.isCertificateProfileChanged(),
change.isCertificateSanChanged(), change.isCertificateSanChanged(),
change.isCertificatePublicKeyChanged(), change.isCertificatePublicKeyChanged(),
change.isCertificateSerialNumberChanged(),
change.isCertificateIssuerChanged(),
change.oldCertificateTimeToExpiry(), change.oldCertificateTimeToExpiry(),
change.isSecurityHeadersChanged(), change.isSecurityHeadersChanged(),
change.isIpAddressChanged(), change.isIpAddressChanged(),

View File

@@ -318,6 +318,8 @@ class PingDaoTest {
true, true,
false, false,
true, true,
true,
false,
Duration.ofDays(30), Duration.ofDays(30),
false, false,
false, false,