1
1
mirror of https://github.com/MarginaliaSearch/MarginaliaSearch.git synced 2025-10-06 07:32:38 +02:00

Compare commits

...

11 Commits

Author SHA1 Message Date
Viktor Lofgren
fd83a9d0b8 (ping) Handle null case for Subject Alternative Names in SSL certificates 2025-06-16 00:27:37 +02:00
Viktor Lofgren
d556f8ae3a (ping) Ping server should not validate certificates 2025-06-16 00:08:30 +02:00
Viktor Lofgren
e37559837b (crawler) Crawler should validate certificates 2025-06-16 00:06:57 +02:00
Viktor Lofgren
3564c4aaee (ping) Route SSLHandshakeException to ConnectionError as well
This will mean we re-try these as an unencrypted Http connection
2025-06-15 20:31:33 +02:00
Viktor Lofgren
92c54563ab (ping) Reduce retry count on connection errors 2025-06-15 18:39:54 +02:00
Viktor Lofgren
d7a5d90b07 (ping) Store redirect location in availability record 2025-06-15 18:39:33 +02:00
Viktor Lofgren
0a0e88fd6e (ping) Fix schema drift between prod and flyway migrations 2025-06-15 17:20:21 +02:00
Viktor Lofgren
b4fc0c4368 (ping) Fix schema drift between prod and flyway migrations 2025-06-15 17:17:11 +02:00
Viktor Lofgren
87ee8765b8 (ping) Ensure ProtocolError->HTTP_CLIENT_ERROR retains its error message information 2025-06-15 16:54:27 +02:00
Viktor Lofgren
1adf4835fa (ping) Add schema change information to domain security events
Particularly the HTTPS->HTTP-change event appears to be a strong indicator of domain parking.
2025-06-15 16:47:49 +02:00
Viktor Lofgren
b7b5d0bf46 (ping) More accurately detect connection errors 2025-06-15 16:47:07 +02:00
13 changed files with 121 additions and 52 deletions

View File

@@ -0,0 +1,5 @@
-- Add additional summary columns to DOMAIN_SECURITY_EVENTS table
-- to make it easier to make sense of certificate changes
ALTER TABLE DOMAIN_SECURITY_EVENTS ADD COLUMN CHANGE_SCHEMA ENUM('NONE', 'HTTP_TO_HTTPS', 'HTTPS_TO_HTTP', 'UNKNOWN') NOT NULL DEFAULT 'UNKNOWN';
OPTIMIZE TABLE DOMAIN_SECURITY_EVENTS;

View File

@@ -36,7 +36,6 @@ import org.apache.hc.core5.http.io.support.ClassicRequestBuilder;
import org.apache.hc.core5.http.message.MessageSupport; import org.apache.hc.core5.http.message.MessageSupport;
import org.apache.hc.core5.http.protocol.HttpContext; import org.apache.hc.core5.http.protocol.HttpContext;
import org.apache.hc.core5.pool.PoolStats; import org.apache.hc.core5.pool.PoolStats;
import org.apache.hc.core5.ssl.SSLContextBuilder;
import org.apache.hc.core5.util.TimeValue; import org.apache.hc.core5.util.TimeValue;
import org.apache.hc.core5.util.Timeout; import org.apache.hc.core5.util.Timeout;
import org.jsoup.Jsoup; import org.jsoup.Jsoup;
@@ -49,15 +48,12 @@ import org.slf4j.MarkerFactory;
import javax.net.ssl.SSLContext; import javax.net.ssl.SSLContext;
import javax.net.ssl.SSLException; import javax.net.ssl.SSLException;
import javax.net.ssl.TrustManager;
import javax.net.ssl.X509TrustManager;
import java.io.IOException; import java.io.IOException;
import java.net.SocketTimeoutException; import java.net.SocketTimeoutException;
import java.net.URISyntaxException; import java.net.URISyntaxException;
import java.net.UnknownHostException; import java.net.UnknownHostException;
import java.security.KeyManagementException; import java.security.KeyManagementException;
import java.security.NoSuchAlgorithmException; import java.security.NoSuchAlgorithmException;
import java.security.cert.X509Certificate;
import java.time.Duration; import java.time.Duration;
import java.time.Instant; import java.time.Instant;
import java.util.*; import java.util.*;
@@ -99,42 +95,12 @@ public class HttpFetcherImpl implements HttpFetcher, HttpRequestRetryStrategy {
.setValidateAfterInactivity(TimeValue.ofSeconds(5)) .setValidateAfterInactivity(TimeValue.ofSeconds(5))
.build(); .build();
// No-op up front validation of server certificates.
//
// We will validate certificates later, after the connection is established
// as we want to store the certificate chain and validation
// outcome to the database.
var trustMeBro = new X509TrustManager() {
private X509Certificate[] lastServerCertChain;
@Override
public void checkClientTrusted(X509Certificate[] chain, String authType) {
}
@Override
public void checkServerTrusted(X509Certificate[] chain, String authType) {
this.lastServerCertChain = chain.clone();
}
@Override
public X509Certificate[] getAcceptedIssuers() {
return new X509Certificate[0];
}
public X509Certificate[] getLastServerCertChain() {
return lastServerCertChain != null ? lastServerCertChain.clone() : null;
}
};
SSLContext sslContext = SSLContextBuilder.create().build();
sslContext.init(null, new TrustManager[]{trustMeBro}, null);
connectionManager = PoolingHttpClientConnectionManagerBuilder.create() connectionManager = PoolingHttpClientConnectionManagerBuilder.create()
.setMaxConnPerRoute(2) .setMaxConnPerRoute(2)
.setMaxConnTotal(5000) .setMaxConnTotal(5000)
.setDefaultConnectionConfig(connectionConfig) .setDefaultConnectionConfig(connectionConfig)
.setTlsSocketStrategy(new DefaultClientTlsStrategy(sslContext)) .setTlsSocketStrategy(new DefaultClientTlsStrategy(SSLContext.getDefault()))
.build(); .build();
connectionManager.setDefaultSocketConfig(SocketConfig.custom() connectionManager.setDefaultSocketConfig(SocketConfig.custom()

View File

@@ -4,12 +4,14 @@ import com.google.inject.Inject;
import nu.marginalia.UserAgent; import nu.marginalia.UserAgent;
import nu.marginalia.WmsaHome; import nu.marginalia.WmsaHome;
import nu.marginalia.ping.fetcher.response.*; import nu.marginalia.ping.fetcher.response.*;
import org.apache.hc.client5.http.HttpHostConnectException;
import org.apache.hc.client5.http.classic.HttpClient; import org.apache.hc.client5.http.classic.HttpClient;
import org.apache.hc.client5.http.protocol.HttpClientContext; import org.apache.hc.client5.http.protocol.HttpClientContext;
import org.apache.hc.core5.http.Header; import org.apache.hc.core5.http.Header;
import org.apache.hc.core5.http.io.entity.EntityUtils; import org.apache.hc.core5.http.io.entity.EntityUtils;
import org.apache.hc.core5.http.io.support.ClassicRequestBuilder; import org.apache.hc.core5.http.io.support.ClassicRequestBuilder;
import javax.net.ssl.SSLHandshakeException;
import java.io.IOException; import java.io.IOException;
import java.net.SocketTimeoutException; import java.net.SocketTimeoutException;
import java.time.Duration; import java.time.Duration;
@@ -82,9 +84,12 @@ public class PingHttpFetcher {
}); });
} catch (SocketTimeoutException ex) { } catch (SocketTimeoutException ex) {
return new TimeoutResponse(ex.getMessage()); return new TimeoutResponse(ex.getMessage());
} catch (IOException e) { } catch (HttpHostConnectException | SSLHandshakeException e) {
return new ConnectionError(e.getClass().getSimpleName()); return new ConnectionError(e.getClass().getSimpleName());
} catch (IOException e) {
return new ProtocolError(e.getClass().getSimpleName());
} }
} }
} }

View File

@@ -18,13 +18,18 @@ import org.apache.hc.core5.http.HttpResponse;
import org.apache.hc.core5.http.io.SocketConfig; import org.apache.hc.core5.http.io.SocketConfig;
import org.apache.hc.core5.http.message.MessageSupport; import org.apache.hc.core5.http.message.MessageSupport;
import org.apache.hc.core5.http.protocol.HttpContext; import org.apache.hc.core5.http.protocol.HttpContext;
import org.apache.hc.core5.ssl.SSLContextBuilder;
import org.apache.hc.core5.util.TimeValue; import org.apache.hc.core5.util.TimeValue;
import org.apache.hc.core5.util.Timeout; import org.apache.hc.core5.util.Timeout;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import javax.net.ssl.SSLContext; import javax.net.ssl.SSLContext;
import javax.net.ssl.TrustManager;
import javax.net.ssl.X509TrustManager;
import java.security.KeyManagementException;
import java.security.NoSuchAlgorithmException; import java.security.NoSuchAlgorithmException;
import java.security.cert.X509Certificate;
import java.util.Iterator; import java.util.Iterator;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
@@ -37,24 +42,55 @@ public class HttpClientProvider implements Provider<HttpClient> {
static { static {
try { try {
client = createClient(); client = createClient();
} catch (NoSuchAlgorithmException e) { } catch (Exception e) {
throw new RuntimeException(e); throw new RuntimeException(e);
} }
} }
private static CloseableHttpClient createClient() throws NoSuchAlgorithmException { private static CloseableHttpClient createClient() throws NoSuchAlgorithmException, KeyManagementException {
final ConnectionConfig connectionConfig = ConnectionConfig.custom() final ConnectionConfig connectionConfig = ConnectionConfig.custom()
.setSocketTimeout(15, TimeUnit.SECONDS) .setSocketTimeout(15, TimeUnit.SECONDS)
.setConnectTimeout(15, TimeUnit.SECONDS) .setConnectTimeout(15, TimeUnit.SECONDS)
.setValidateAfterInactivity(TimeValue.ofSeconds(5)) .setValidateAfterInactivity(TimeValue.ofSeconds(5))
.build(); .build();
// No-op up front validation of server certificates.
//
// We will validate certificates later, after the connection is established
// as we want to store the certificate chain and validation
// outcome to the database.
var trustMeBro = new X509TrustManager() {
private X509Certificate[] lastServerCertChain;
@Override
public void checkClientTrusted(X509Certificate[] chain, String authType) {
}
@Override
public void checkServerTrusted(X509Certificate[] chain, String authType) {
this.lastServerCertChain = chain.clone();
}
@Override
public X509Certificate[] getAcceptedIssuers() {
return new X509Certificate[0];
}
public X509Certificate[] getLastServerCertChain() {
return lastServerCertChain != null ? lastServerCertChain.clone() : null;
}
};
SSLContext sslContext = SSLContextBuilder.create().build();
sslContext.init(null, new TrustManager[]{trustMeBro}, null);
connectionManager = PoolingHttpClientConnectionManagerBuilder.create() connectionManager = PoolingHttpClientConnectionManagerBuilder.create()
.setMaxConnPerRoute(2) .setMaxConnPerRoute(2)
.setMaxConnTotal(50) .setMaxConnTotal(50)
.setDefaultConnectionConfig(connectionConfig) .setDefaultConnectionConfig(connectionConfig)
.setTlsSocketStrategy( .setTlsSocketStrategy(
new DefaultClientTlsStrategy(SSLContext.getDefault(), NoopHostnameVerifier.INSTANCE)) new DefaultClientTlsStrategy(sslContext, NoopHostnameVerifier.INSTANCE))
.build(); .build();
connectionManager.setDefaultSocketConfig(SocketConfig.custom() connectionManager.setDefaultSocketConfig(SocketConfig.custom()

View File

@@ -1,5 +1,6 @@
package nu.marginalia.ping.io; package nu.marginalia.ping.io;
import org.apache.hc.client5.http.HttpHostConnectException;
import org.apache.hc.client5.http.HttpRequestRetryStrategy; import org.apache.hc.client5.http.HttpRequestRetryStrategy;
import org.apache.hc.core5.http.HttpRequest; import org.apache.hc.core5.http.HttpRequest;
import org.apache.hc.core5.http.HttpResponse; import org.apache.hc.core5.http.HttpResponse;
@@ -22,6 +23,7 @@ public class RetryStrategy implements HttpRequestRetryStrategy {
case SocketTimeoutException ste -> false; case SocketTimeoutException ste -> false;
case SSLException ssle -> false; case SSLException ssle -> false;
case UnknownHostException uhe -> false; case UnknownHostException uhe -> false;
case HttpHostConnectException ex -> executionCount <= 2; // Only retry once for connection errors
default -> executionCount <= 3; default -> executionCount <= 3;
}; };
} }

View File

@@ -1,5 +1,7 @@
package nu.marginalia.ping.model; package nu.marginalia.ping.model;
import org.apache.commons.lang3.StringUtils;
import javax.annotation.Nullable; import javax.annotation.Nullable;
import java.sql.Connection; import java.sql.Connection;
import java.sql.ResultSet; import java.sql.ResultSet;
@@ -279,7 +281,7 @@ implements WritableModel
} }
public Builder httpLocation(String httpLocation) { public Builder httpLocation(String httpLocation) {
this.httpLocation = httpLocation; this.httpLocation = StringUtils.abbreviate(httpLocation, "...",255);
return this; return this;
} }

View File

@@ -18,6 +18,7 @@ public record DomainSecurityEvent(
boolean certificatePublicKeyChanged, boolean certificatePublicKeyChanged,
boolean certificateSerialNumberChanged, boolean certificateSerialNumberChanged,
boolean certificateIssuerChanged, boolean certificateIssuerChanged,
SchemaChange schemaChange,
Duration oldCertificateTimeToExpiry, Duration oldCertificateTimeToExpiry,
boolean securityHeadersChanged, boolean securityHeadersChanged,
boolean ipChanged, boolean ipChanged,
@@ -45,8 +46,9 @@ public record DomainSecurityEvent(
security_signature_before, security_signature_before,
security_signature_after, security_signature_after,
change_certificate_serial_number, change_certificate_serial_number,
change_certificate_issuer change_certificate_issuer,
) VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?) change_schema
) VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)
""")) """))
{ {
@@ -81,6 +83,7 @@ public record DomainSecurityEvent(
ps.setBoolean(15, certificateSerialNumberChanged()); ps.setBoolean(15, certificateSerialNumberChanged());
ps.setBoolean(16, certificateIssuerChanged()); ps.setBoolean(16, certificateIssuerChanged());
ps.setString(17, schemaChange.name());
ps.executeUpdate(); ps.executeUpdate();
} }

View File

@@ -0,0 +1,12 @@
package nu.marginalia.ping.model;
public enum SchemaChange {
UNKNOWN,
NONE,
HTTP_TO_HTTPS,
HTTPS_TO_HTTP;
public boolean isSignificant() {
return this != NONE && this != UNKNOWN;
}
}

View File

@@ -2,6 +2,9 @@ package nu.marginalia.ping.model.comparison;
import nu.marginalia.ping.model.DomainAvailabilityRecord; import nu.marginalia.ping.model.DomainAvailabilityRecord;
import nu.marginalia.ping.model.DomainSecurityRecord; import nu.marginalia.ping.model.DomainSecurityRecord;
import nu.marginalia.ping.model.HttpSchema;
import nu.marginalia.ping.model.SchemaChange;
import org.jetbrains.annotations.NotNull;
import java.time.Duration; import java.time.Duration;
import java.time.Instant; import java.time.Instant;
@@ -20,7 +23,8 @@ public record SecurityInformationChange(
Duration oldCertificateTimeToExpiry, Duration oldCertificateTimeToExpiry,
boolean isSecurityHeadersChanged, boolean isSecurityHeadersChanged,
boolean isIpAddressChanged, boolean isIpAddressChanged,
boolean isSoftwareHeaderChanged boolean isSoftwareHeaderChanged,
SchemaChange schemaChange
) { ) {
public static SecurityInformationChange between( public static SecurityInformationChange between(
DomainSecurityRecord before, DomainAvailabilityRecord availabilityBefore, DomainSecurityRecord before, DomainAvailabilityRecord availabilityBefore,
@@ -43,9 +47,10 @@ public record SecurityInformationChange(
); );
boolean securityHeadersChanged = before.securityHeadersHash() != after.securityHeadersHash(); boolean securityHeadersChanged = before.securityHeadersHash() != after.securityHeadersHash();
boolean softwareChanged = !Objects.equals(before.headerServer(), after.headerServer()); boolean softwareChanged = !Objects.equals(before.headerServer(), after.headerServer());
SchemaChange schemaChange = getSchemaChange(before, after);
// Note we don't include IP address changes in the overall change status, // Note we don't include IP address changes in the overall change status,
// as this is not alone considered a change in security information; we may have // as this is not alone considered a change in security information; we may have
// multiple IP addresses for a domain, and the IP address may change frequently // multiple IP addresses for a domain, and the IP address may change frequently
@@ -55,7 +60,8 @@ public record SecurityInformationChange(
|| certificateFingerprintChanged || certificateFingerprintChanged
|| securityHeadersChanged || securityHeadersChanged
|| certificateProfileChanged || certificateProfileChanged
|| softwareChanged; || softwareChanged
|| schemaChange.isSignificant();
return new SecurityInformationChange( return new SecurityInformationChange(
isChanged, isChanged,
@@ -69,9 +75,36 @@ public record SecurityInformationChange(
oldCertificateTimeToExpiry, oldCertificateTimeToExpiry,
securityHeadersChanged, securityHeadersChanged,
ipChanged, ipChanged,
softwareChanged softwareChanged,
schemaChange
); );
} }
private static @NotNull SchemaChange getSchemaChange(DomainSecurityRecord before, DomainSecurityRecord after) {
if (before.httpSchema() == null || after.httpSchema() == null) {
return SchemaChange.UNKNOWN;
}
boolean beforeIsHttp = before.httpSchema() == HttpSchema.HTTP;
boolean afterIsHttp = after.httpSchema() == HttpSchema.HTTP;
boolean beforeIsHttps = before.httpSchema() == HttpSchema.HTTPS;
boolean afterIsHttps = after.httpSchema() == HttpSchema.HTTPS;
SchemaChange schemaChange;
if (beforeIsHttp && afterIsHttp) {
schemaChange = SchemaChange.NONE;
} else if (beforeIsHttps && afterIsHttps) {
schemaChange = SchemaChange.NONE;
} else if (beforeIsHttp && afterIsHttps) {
schemaChange = SchemaChange.HTTP_TO_HTTPS;
} else if (beforeIsHttps && afterIsHttp) {
schemaChange = SchemaChange.HTTPS_TO_HTTP;
} else {
schemaChange = SchemaChange.UNKNOWN;
}
return schemaChange;
}
} }

View File

@@ -96,6 +96,7 @@ public class DomainAvailabilityInformationFactory {
.serverIp(address != null ? address.getAddress() : null) .serverIp(address != null ? address.getAddress() : null)
.serverIpAsn(getAsn(address)) .serverIpAsn(getAsn(address))
.httpSchema(HttpSchema.HTTP) .httpSchema(HttpSchema.HTTP)
.httpLocation(rsp.headers().getFirst("Location"))
.httpStatus(rsp.httpStatus()) .httpStatus(rsp.httpStatus())
.errorClassification(errorClassification) .errorClassification(errorClassification)
.httpResponseTime(rsp.httpResponseTime()) .httpResponseTime(rsp.httpResponseTime())
@@ -164,6 +165,7 @@ public class DomainAvailabilityInformationFactory {
.serverIp(address != null ? address.getAddress() : null) .serverIp(address != null ? address.getAddress() : null)
.serverIpAsn(getAsn(address)) .serverIpAsn(getAsn(address))
.httpSchema(HttpSchema.HTTPS) .httpSchema(HttpSchema.HTTPS)
.httpLocation(rsp.headers().getFirst("Location"))
.httpStatus(rsp.httpStatus()) .httpStatus(rsp.httpStatus())
.errorClassification(errorClassification) .errorClassification(errorClassification)
.httpResponseTime(rsp.httpResponseTime()) // Placeholder, actual timing not implemented .httpResponseTime(rsp.httpResponseTime()) // Placeholder, actual timing not implemented

View File

@@ -14,9 +14,7 @@ import java.security.NoSuchAlgorithmException;
import java.security.cert.CertificateEncodingException; import java.security.cert.CertificateEncodingException;
import java.security.cert.X509Certificate; import java.security.cert.X509Certificate;
import java.time.Instant; import java.time.Instant;
import java.util.HashSet; import java.util.*;
import java.util.Set;
import java.util.StringJoiner;
public class DomainSecurityInformationFactory { public class DomainSecurityInformationFactory {
private static final Logger logger = LoggerFactory.getLogger(DomainSecurityInformationFactory.class); private static final Logger logger = LoggerFactory.getLogger(DomainSecurityInformationFactory.class);
@@ -69,8 +67,11 @@ public class DomainSecurityInformationFactory {
boolean isWildcard = false; boolean isWildcard = false;
try { try {
if (sslCertificates != null && sslCertificates.length > 0) { if (sslCertificates != null && sslCertificates.length > 0) {
for (var sanEntry : sslCertificates[0].getSubjectAlternativeNames()) { Collection<List<?>> sans = sslCertificates[0].getSubjectAlternativeNames();
if (sans == null) {
sans = Collections.emptyList();
}
for (var sanEntry : sans) {
if (sanEntry != null && sanEntry.size() >= 2) { if (sanEntry != null && sanEntry.size() >= 2) {
// Check if the SAN entry is a DNS or IP address // Check if the SAN entry is a DNS or IP address

View File

@@ -145,7 +145,7 @@ public class HttpPingService {
domainReference.nodeId(), domainReference.nodeId(),
oldPingStatus, oldPingStatus,
ErrorClassification.HTTP_CLIENT_ERROR, ErrorClassification.HTTP_CLIENT_ERROR,
null); rsp.errorMessage());
newSecurityInformation = null; newSecurityInformation = null;
} }
case HttpResponse httpResponse -> { case HttpResponse httpResponse -> {
@@ -296,6 +296,7 @@ public class HttpPingService {
change.isCertificatePublicKeyChanged(), change.isCertificatePublicKeyChanged(),
change.isCertificateSerialNumberChanged(), change.isCertificateSerialNumberChanged(),
change.isCertificateIssuerChanged(), change.isCertificateIssuerChanged(),
change.schemaChange(),
change.oldCertificateTimeToExpiry(), change.oldCertificateTimeToExpiry(),
change.isSecurityHeadersChanged(), change.isSecurityHeadersChanged(),
change.isIpAddressChanged(), change.isIpAddressChanged(),

View File

@@ -320,6 +320,7 @@ class PingDaoTest {
true, true,
true, true,
false, false,
SchemaChange.NONE,
Duration.ofDays(30), Duration.ofDays(30),
false, false,
false, false,