1
1
mirror of https://github.com/MarginaliaSearch/MarginaliaSearch.git synced 2025-10-06 07:32:38 +02:00

Compare commits

...

11 Commits

Author SHA1 Message Date
Viktor Lofgren
e2c56dc3ca (search) Clean up the rate limiting
We fail quietly to make life harder for the bot farmers
2025-06-18 11:26:30 +02:00
Viktor Lofgren
470b866008 (search) Clean up the rate limiting
We fail quietly to make life harder for the bot farmers
2025-06-18 11:22:26 +02:00
Viktor Lofgren
4895a2ac7a (search) Clean up the rate limiting
We fail quietly to make life harder for the bot farmers
2025-06-18 11:20:24 +02:00
Viktor Lofgren
fd32ae9fa7 (search) Add automatic rate limiting to /site
Fix typo
2025-06-18 11:10:08 +02:00
Viktor Lofgren
470651ea4c (search) Add automatic rate limiting to /site 2025-06-18 11:04:36 +02:00
Viktor Lofgren
8d4829e783 (ping) Change cookie specification to ignore cookies 2025-06-17 12:26:34 +02:00
Viktor Lofgren
1290bc15dc (ping) Reduce retries for SocketException and pals 2025-06-16 22:35:33 +02:00
Viktor Lofgren
e7fa558954 (ping) Disable some cert validation logic for now 2025-06-16 22:00:32 +02:00
Viktor Lofgren
720685bf3f (ping) Persist more detailed information about why a cert is invalid
The change also alters the validator to be less judgemental, and accept some invalid chains based on looking like we've simply not got access to a (valid) intermediate cert.
2025-06-16 19:44:22 +02:00
Viktor Lofgren
cbec63c7da (ping) Pull root certificates from cacerts.pem 2025-06-16 19:21:05 +02:00
Viktor Lofgren
b03ca75785 (ping) Correct test so that it does not spam an innocent webmaster with requests 2025-06-16 17:06:14 +02:00
13 changed files with 247 additions and 50 deletions

View File

@@ -0,0 +1,7 @@
-- Add additional summary columns to DOMAIN_SECURITY_INFORMATION table
-- to make it easier to get more information about the SSL certificate's validity
ALTER TABLE DOMAIN_SECURITY_INFORMATION ADD COLUMN SSL_CHAIN_VALID BOOLEAN DEFAULT NULL;
ALTER TABLE DOMAIN_SECURITY_INFORMATION ADD COLUMN SSL_HOST_VALID BOOLEAN DEFAULT NULL;
ALTER TABLE DOMAIN_SECURITY_INFORMATION ADD COLUMN SSL_DATE_VALID BOOLEAN DEFAULT NULL;
OPTIMIZE TABLE DOMAIN_SECURITY_INFORMATION;

View File

@@ -112,7 +112,7 @@ public class HttpClientProvider implements Provider<HttpClient> {
});
final RequestConfig defaultRequestConfig = RequestConfig.custom()
.setCookieSpec(StandardCookieSpec.RELAXED)
.setCookieSpec(StandardCookieSpec.IGNORE)
.setResponseTimeout(10, TimeUnit.SECONDS)
.setConnectionRequestTimeout(5, TimeUnit.MINUTES)
.build();

View File

@@ -11,6 +11,7 @@ import org.slf4j.LoggerFactory;
import javax.net.ssl.SSLException;
import java.io.IOException;
import java.net.SocketException;
import java.net.SocketTimeoutException;
import java.net.UnknownHostException;
@@ -23,7 +24,8 @@ public class RetryStrategy implements HttpRequestRetryStrategy {
case SocketTimeoutException ste -> false;
case SSLException ssle -> false;
case UnknownHostException uhe -> false;
case HttpHostConnectException ex -> executionCount <= 2; // Only retry once for connection errors
case HttpHostConnectException ex -> executionCount < 2;
case SocketException ex -> executionCount < 2;
default -> executionCount <= 3;
};
}

View File

@@ -43,7 +43,10 @@ public record DomainSecurityRecord(
@Nullable String headerXXssProtection,
@Nullable String headerServer,
@Nullable String headerXPoweredBy,
@Nullable Instant tsLastUpdate
@Nullable Instant tsLastUpdate,
@Nullable Boolean sslChainValid,
@Nullable Boolean sslHostValid,
@Nullable Boolean sslDateValid
)
implements WritableModel
{
@@ -103,7 +106,11 @@ public record DomainSecurityRecord(
rs.getString("DOMAIN_SECURITY_INFORMATION.HEADER_X_XSS_PROTECTION"),
rs.getString("DOMAIN_SECURITY_INFORMATION.HEADER_SERVER"),
rs.getString("DOMAIN_SECURITY_INFORMATION.HEADER_X_POWERED_BY"),
rs.getObject("DOMAIN_SECURITY_INFORMATION.TS_LAST_UPDATE", Instant.class));
rs.getObject("DOMAIN_SECURITY_INFORMATION.TS_LAST_UPDATE", Instant.class),
rs.getObject("SSL_CHAIN_VALID", Boolean.class),
rs.getObject("SSL_HOST_VALID", Boolean.class),
rs.getObject("SSL_DATE_VALID", Boolean.class)
);
}
private static HttpSchema httpSchemaFromString(@Nullable String schema) {
@@ -150,8 +157,11 @@ public record DomainSecurityRecord(
header_x_powered_by,
ssl_cert_public_key_hash,
asn,
ts_last_update)
VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)
ts_last_update,
ssl_chain_valid,
ssl_host_valid,
ssl_date_valid)
VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)
"""))
{
ps.setInt(1, domainId());
@@ -295,6 +305,25 @@ public record DomainSecurityRecord(
} else {
ps.setTimestamp(32, java.sql.Timestamp.from(tsLastUpdate()));
}
if (sslChainValid() == null) {
ps.setNull(33, java.sql.Types.BOOLEAN);
} else {
ps.setBoolean(33, sslChainValid());
}
if (sslHostValid() == null) {
ps.setNull(34, java.sql.Types.BOOLEAN);
} else {
ps.setBoolean(34, sslHostValid());
}
if (sslDateValid() == null) {
ps.setNull(35, java.sql.Types.BOOLEAN);
} else {
ps.setBoolean(35, sslDateValid());
}
ps.executeUpdate();
}
}
@@ -333,6 +362,11 @@ public record DomainSecurityRecord(
private String headerXPoweredBy;
private Instant tsLastUpdate;
private Boolean isCertChainValid;
private Boolean isCertHostValid;
private Boolean isCertDateValid;
private static Instant MAX_UNIX_TIMESTAMP = Instant.ofEpochSecond(Integer.MAX_VALUE);
public Builder() {
@@ -508,6 +542,21 @@ public record DomainSecurityRecord(
return this;
}
public Builder sslChainValid(@Nullable Boolean isCertChainValid) {
this.isCertChainValid = isCertChainValid;
return this;
}
public Builder sslHostValid(@Nullable Boolean isCertHostValid) {
this.isCertHostValid = isCertHostValid;
return this;
}
public Builder sslDateValid(@Nullable Boolean isCertDateValid) {
this.isCertDateValid = isCertDateValid;
return this;
}
public DomainSecurityRecord build() {
return new DomainSecurityRecord(
domainId,
@@ -541,7 +590,10 @@ public record DomainSecurityRecord(
headerXXssProtection,
headerServer,
headerXPoweredBy,
tsLastUpdate
tsLastUpdate,
isCertChainValid,
isCertHostValid,
isCertDateValid
);
}
}

View File

@@ -13,20 +13,25 @@ import org.bouncycastle.asn1.x509.*;
import org.bouncycastle.cert.X509CertificateHolder;
import org.bouncycastle.cert.jcajce.JcaX509CertificateConverter;
import org.bouncycastle.cms.CMSSignedData;
import org.bouncycastle.openssl.PEMParser;
import org.bouncycastle.util.Store;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.ByteArrayInputStream;
import java.io.StringReader;
import java.nio.charset.StandardCharsets;
import java.security.cert.CertificateFactory;
import java.security.cert.TrustAnchor;
import java.security.cert.X509Certificate;
import java.time.Duration;
import java.util.ArrayList;
import java.util.List;
import java.util.Set;
public class AIACertificateFetcher {
public class CertificateFetcher {
private static final Logger logger = LoggerFactory.getLogger(AIACertificateFetcher.class);
private static final Logger logger = LoggerFactory.getLogger(CertificateFetcher.class);
private static HttpClient client = HttpClientBuilder.create()
.build();
@@ -107,6 +112,27 @@ public class AIACertificateFetcher {
}
}
private static List<X509Certificate> parseMultiplePEM(byte[] data) throws Exception {
List<X509Certificate> certificates = new ArrayList<>();
try (StringReader stringReader = new StringReader(new String(data, StandardCharsets.UTF_8));
PEMParser pemParser = new PEMParser(stringReader)) {
JcaX509CertificateConverter converter = new JcaX509CertificateConverter();
Object object;
while ((object = pemParser.readObject()) != null) {
if (object instanceof X509CertificateHolder) {
X509CertificateHolder certHolder = (X509CertificateHolder) object;
certificates.add(converter.getCertificate(certHolder));
} else if (object instanceof X509Certificate) {
certificates.add((X509Certificate) object);
}
}
}
return certificates;
}
private static X509Certificate parseX509(byte[] data) throws Exception {
CertificateFactory cf = CertificateFactory.getInstance("X.509");
return (X509Certificate) cf.generateCertificate(new ByteArrayInputStream(data));
@@ -215,4 +241,33 @@ public class AIACertificateFetcher {
return ocspUrls;
}
public static Set<TrustAnchor> getRootCerts(String bundleUrl) throws Exception {
ClassicHttpRequest request = ClassicRequestBuilder.create("GET")
.addHeader("User-Agent", WmsaHome.getUserAgent() + " (Certificate Fetcher)")
.setUri(bundleUrl)
.build();
byte[] data = client.execute(request, rsp -> {
var entity = rsp.getEntity();
if (entity == null) {
logger.warn("GET request returned no content for {}", bundleUrl);
return null;
}
return entity.getContent().readAllBytes();
});
List<TrustAnchor> anchors = new ArrayList<>();
for (var cert : parseMultiplePEM(data)) {
try {
anchors.add(new TrustAnchor(cert, null));
} catch (Exception e) {
logger.warn("Failed to create TrustAnchor for certificate: {}", e.getMessage());
}
}
logger.info("Loaded {} root certificates from {}", anchors.size(), bundleUrl);
return Set.copyOf(anchors);
}
}

View File

@@ -4,11 +4,7 @@ import org.bouncycastle.asn1.ASN1OctetString;
import org.bouncycastle.asn1.ASN1Primitive;
import org.bouncycastle.asn1.x509.*;
import javax.net.ssl.TrustManager;
import javax.net.ssl.TrustManagerFactory;
import javax.net.ssl.X509TrustManager;
import javax.security.auth.x500.X500Principal;
import java.security.KeyStore;
import java.security.cert.TrustAnchor;
import java.security.cert.X509Certificate;
import java.util.*;
@@ -16,16 +12,24 @@ import java.util.*;
/** Utility class for validating X.509 certificates.
* This class provides methods to validate certificate chains, check expiration,
* hostname validity, and revocation status.
* <p></p>
* This is extremely unsuitable for actual SSL/TLS validation,
* and is only to be used in analyzing certificates for fingerprinting
* and diagnosing servers!
*/
public class CertificateValidator {
// If true, will attempt to fetch missing intermediate certificates via AIA urls.
private static final boolean TRY_FETCH_MISSING_CERTS = false;
public static class ValidationResult {
public boolean chainValid = false;
public boolean certificateExpired = false;
public boolean certificateRevoked = false;
public boolean selfSigned = false;
public boolean hostnameValid = false;
public boolean isValid() {
return chainValid && !certificateExpired && !certificateRevoked && hostnameValid;
return !selfSigned && !certificateExpired && !certificateRevoked && hostnameValid;
}
public List<String> errors = new ArrayList<>();
@@ -40,6 +44,7 @@ public class CertificateValidator {
sb.append("Not Expired: ").append(!certificateExpired ? "" : "").append("\n");
sb.append("Not Revoked: ").append(!certificateRevoked ? "" : "").append("\n");
sb.append("Hostname Valid: ").append(hostnameValid ? "" : "").append("\n");
sb.append("Self-Signed: ").append(selfSigned ? "" : "").append("\n");
if (!errors.isEmpty()) {
sb.append("\nErrors:\n");
@@ -66,32 +71,6 @@ public class CertificateValidator {
}
}
private static final Set<TrustAnchor> trustAnchors = loadDefaultTrustAnchors();
private static Set<TrustAnchor> loadDefaultTrustAnchors() {
try {
Set<TrustAnchor> trustAnchors = new HashSet<>();
TrustManagerFactory tmf = TrustManagerFactory
.getInstance(TrustManagerFactory.getDefaultAlgorithm());
tmf.init((KeyStore) null);
for (TrustManager tm : tmf.getTrustManagers()) {
if (tm instanceof X509TrustManager x509tm) {
for (X509Certificate cert : x509tm.getAcceptedIssuers()) {
trustAnchors.add(new TrustAnchor(cert, null));
}
}
}
return trustAnchors;
}
catch (Exception e) {
throw new RuntimeException(e);
}
}
public static ValidationResult validateCertificate(X509Certificate[] certChain,
String hostname) {
return validateCertificate(certChain, hostname, false);
@@ -115,11 +94,15 @@ public class CertificateValidator {
// 2. Check hostname validity
result.hostnameValid = checkHostname(leafCert, hostname, result);
// 3. Check certificate chain validity (with AIA fetching)
result.chainValid = checkChainValidity(certChain, trustAnchors, result, autoTrustFetchedRoots);
// 3. Not really checking if it's self-signed, but if the chain is incomplete (and likely self-signed)
result.selfSigned = certChain.length <= 1;
// 4. Check revocation status
result.certificateRevoked = checkRevocation(leafCert, result);
// 4. Check certificate chain validity (optionally with AIA fetching)
result.chainValid = checkChainValidity(certChain, RootCerts.getTrustAnchors(), result, autoTrustFetchedRoots);
// 5. Check revocation status
result.certificateRevoked = false; // not implemented
// checkRevocation(leafCert, result);
return result;
}
@@ -199,8 +182,15 @@ public class CertificateValidator {
return true;
}
else if (!TRY_FETCH_MISSING_CERTS) {
result.errors.addAll(originalResult.issues);
result.details.put("chainLength", originalChain.length);
result.details.put("chainExtended", false);
return false;
}
try {
List<X509Certificate> repairedChain = AIACertificateFetcher.buildCompleteChain(originalChain[0]);
List<X509Certificate> repairedChain = CertificateFetcher.buildCompleteChain(originalChain[0]);
if (!repairedChain.isEmpty()) {
@@ -397,7 +387,7 @@ public class CertificateValidator {
private static boolean checkOCSP(X509Certificate cert, ValidationResult result) {
// For now, just extract OCSP URL and note that we found it
try {
List<String> ocspUrls = AIACertificateFetcher.getOCSPUrls(cert);
List<String> ocspUrls = CertificateFetcher.getOCSPUrls(cert);
if (!ocspUrls.isEmpty()) {
result.details.put("ocspUrls", ocspUrls);
result.warnings.add("OCSP checking not implemented - found OCSP URLs: " + ocspUrls);

View File

@@ -0,0 +1,57 @@
package nu.marginalia.ping.ssl;
import java.security.cert.TrustAnchor;
import java.time.Duration;
import java.util.Set;
public class RootCerts {
private static final String MOZILLA_CA_BUNDLE_URL = "https://curl.se/ca/cacert.pem";
volatile static boolean initialized = false;
volatile static Set<TrustAnchor> trustAnchors;
public static Set<TrustAnchor> getTrustAnchors() {
if (!initialized) {
try {
synchronized (RootCerts.class) {
while (!initialized) {
RootCerts.class.wait(100);
}
}
}
catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new RuntimeException("RootCerts initialization interrupted", e);
}
}
return trustAnchors;
}
static {
Thread.ofPlatform()
.name("RootCertsUpdater")
.daemon()
.unstarted(RootCerts::updateTrustAnchors)
.start();
}
private static void updateTrustAnchors() {
while (true) {
try {
trustAnchors = CertificateFetcher.getRootCerts(MOZILLA_CA_BUNDLE_URL);
synchronized (RootCerts.class) {
initialized = true;
RootCerts.class.notifyAll(); // Notify any waiting threads
}
Thread.sleep(Duration.ofHours(24));
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
break; // Exit if interrupted
} catch (Exception e) {
// Log the exception and continue to retry
System.err.println("Failed to update trust anchors: " + e.getMessage());
}
}
}
}

View File

@@ -126,6 +126,9 @@ public class DomainSecurityInformationFactory {
.sslCertWildcard(isWildcard)
.sslCertificateChainLength(sslCertificates.length)
.sslCertificateValid(validationResult.isValid())
.sslHostValid(validationResult.hostnameValid)
.sslChainValid(validationResult.chainValid)
.sslDateValid(!validationResult.certificateExpired)
.httpVersion(httpResponse.version())
.tsLastUpdate(Instant.now())
.build();

View File

@@ -243,6 +243,7 @@ class PingDaoTest {
.headerServer("Apache/2.4.41 (Ubuntu)")
.headerXPoweredBy("PHP/7.4.3")
.tsLastUpdate(Instant.now())
.sslHostValid(true)
.build();
var svc = new PingDao(dataSource);
svc.write(foo);

View File

@@ -60,6 +60,7 @@ class PingHttpServiceTest {
}
@Tag("flaky") // Do not run this test in CI
@Test
public void testGetSslInfo() throws Exception {
var provider = new HttpClientProvider();
@@ -71,7 +72,7 @@ class PingHttpServiceTest {
),
new DomainSecurityInformationFactory());
var output = pingService.pingDomain(new DomainReference(1, 1, "thecavalierclub.co.uk"), null, null);
var output = pingService.pingDomain(new DomainReference(1, 1, "www.marginalia.nu"), null, null);
for (var model : output) {
System.out.println(model);
}

View File

@@ -22,6 +22,7 @@ import nu.marginalia.search.model.NavbarModel;
import nu.marginalia.search.model.ResultsPage;
import nu.marginalia.search.model.UrlDetails;
import nu.marginalia.search.svc.SearchFlagSiteService.FlagSiteFormData;
import nu.marginalia.service.server.RateLimiter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -47,6 +48,8 @@ public class SearchSiteInfoService {
private final HikariDataSource dataSource;
private final SearchSiteSubscriptionService searchSiteSubscriptions;
private final RateLimiter rateLimiter = RateLimiter.custom(60);
@Inject
public SearchSiteInfoService(SearchOperator searchOperator,
DomainInfoClient domainInfoClient,
@@ -238,6 +241,7 @@ public class SearchSiteInfoService {
boolean hasScreenshot = screenshotService.hasScreenshot(domainId);
boolean isSubscribed = searchSiteSubscriptions.isSubscribed(context, domain);
boolean rateLimited = !rateLimiter.isAllowed();
if (domainId < 0) {
domainInfoFuture = CompletableFuture.failedFuture(new Exception("Unknown Domain ID"));
similarSetFuture = CompletableFuture.failedFuture(new Exception("Unknown Domain ID"));
@@ -250,6 +254,12 @@ public class SearchSiteInfoService {
linkingDomainsFuture = CompletableFuture.failedFuture(new Exception("Assistant Service Unavailable"));
feedItemsFuture = CompletableFuture.failedFuture(new Exception("Assistant Service Unavailable"));
}
else if (rateLimited) {
domainInfoFuture = domainInfoClient.domainInformation(domainId);
similarSetFuture = CompletableFuture.failedFuture(new Exception("Rate limit exceeded"));
linkingDomainsFuture = CompletableFuture.failedFuture(new Exception("Rate limit exceeded"));
feedItemsFuture = CompletableFuture.failedFuture(new Exception("Rate limit exceeded"));
}
else {
domainInfoFuture = domainInfoClient.domainInformation(domainId);
similarSetFuture = domainInfoClient.similarDomains(domainId, 25);
@@ -257,7 +267,14 @@ public class SearchSiteInfoService {
feedItemsFuture = feedsClient.getFeed(domainId);
}
List<UrlDetails> sampleResults = searchOperator.doSiteSearch(domainName, domainId,5, 1).results;
List<UrlDetails> sampleResults;
if (rateLimited) {
sampleResults = List.of();
}
else {
sampleResults = searchOperator.doSiteSearch(domainName, domainId, 5, 1).results;
}
if (!sampleResults.isEmpty()) {
url = sampleResults.getFirst().url.withPathAndParam("/", null).toString();
}
@@ -276,8 +293,9 @@ public class SearchSiteInfoService {
sampleResults
);
requestMissingScreenshots(result);
if (!rateLimited) {
requestMissingScreenshots(result);
}
return result;
}

View File

@@ -0,0 +1,11 @@
@param String message
<!DOCTYPE html>
<html lang="en">
<head><meta charset="UTF-8">
<title>Unavailable</title></head>
<body>
<h1>Service Overloaded</h1>
<p>${message}</p>
</body>
</html>