mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-10-06 17:32:39 +02:00
Compare commits
21 Commits
deploy-024
...
deploy-026
Author | SHA1 | Date | |
---|---|---|---|
|
e50d09cc01 | ||
|
bce3892ce0 | ||
|
36581b25c2 | ||
|
52ff7fb4dd | ||
|
a4e49e658a | ||
|
e2c56dc3ca | ||
|
470b866008 | ||
|
4895a2ac7a | ||
|
fd32ae9fa7 | ||
|
470651ea4c | ||
|
8d4829e783 | ||
|
1290bc15dc | ||
|
e7fa558954 | ||
|
720685bf3f | ||
|
cbec63c7da | ||
|
b03ca75785 | ||
|
184aedc071 | ||
|
0275bad281 | ||
|
fd83a9d0b8 | ||
|
d556f8ae3a | ||
|
e37559837b |
@@ -0,0 +1,7 @@
|
|||||||
|
-- Add additional summary columns to DOMAIN_SECURITY_INFORMATION table
|
||||||
|
-- to make it easier to get more information about the SSL certificate's validity
|
||||||
|
|
||||||
|
ALTER TABLE DOMAIN_SECURITY_INFORMATION ADD COLUMN SSL_CHAIN_VALID BOOLEAN DEFAULT NULL;
|
||||||
|
ALTER TABLE DOMAIN_SECURITY_INFORMATION ADD COLUMN SSL_HOST_VALID BOOLEAN DEFAULT NULL;
|
||||||
|
ALTER TABLE DOMAIN_SECURITY_INFORMATION ADD COLUMN SSL_DATE_VALID BOOLEAN DEFAULT NULL;
|
||||||
|
OPTIMIZE TABLE DOMAIN_SECURITY_INFORMATION;
|
@@ -0,0 +1,12 @@
|
|||||||
|
-- Table holding domains to be processed by the NDP in order to figure out whether to add them to
|
||||||
|
-- be crawled.
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS NDP_NEW_DOMAINS(
|
||||||
|
DOMAIN_ID INT NOT NULL PRIMARY KEY,
|
||||||
|
STATE ENUM ('NEW', 'ACCEPTED', 'REJECTED') NOT NULL DEFAULT 'NEW',
|
||||||
|
PRIORITY INT NOT NULL DEFAULT 0,
|
||||||
|
TS_CHANGE TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
|
||||||
|
CHECK_COUNT INT NOT NULL DEFAULT 0
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS NDP_NEW_DOMAINS__STATE_PRIORITY ON NDP_NEW_DOMAINS (STATE, PRIORITY DESC);
|
@@ -20,6 +20,7 @@ dependencies {
|
|||||||
implementation project(':code:processes:live-crawling-process')
|
implementation project(':code:processes:live-crawling-process')
|
||||||
implementation project(':code:processes:loading-process')
|
implementation project(':code:processes:loading-process')
|
||||||
implementation project(':code:processes:ping-process')
|
implementation project(':code:processes:ping-process')
|
||||||
|
implementation project(':code:processes:new-domain-process')
|
||||||
implementation project(':code:processes:converting-process')
|
implementation project(':code:processes:converting-process')
|
||||||
implementation project(':code:processes:index-constructor-process')
|
implementation project(':code:processes:index-constructor-process')
|
||||||
|
|
||||||
@@ -41,7 +42,6 @@ dependencies {
|
|||||||
implementation project(':code:functions:nsfw-domain-filter')
|
implementation project(':code:functions:nsfw-domain-filter')
|
||||||
implementation project(':code:execution:api')
|
implementation project(':code:execution:api')
|
||||||
|
|
||||||
implementation project(':code:processes:crawling-process:model')
|
|
||||||
implementation project(':code:processes:crawling-process:model')
|
implementation project(':code:processes:crawling-process:model')
|
||||||
implementation project(':code:processes:crawling-process:ft-link-parser')
|
implementation project(':code:processes:crawling-process:ft-link-parser')
|
||||||
implementation project(':code:index:index-journal')
|
implementation project(':code:index:index-journal')
|
||||||
|
@@ -14,6 +14,7 @@ public enum ExecutorActor {
|
|||||||
PROC_CRAWLER_SPAWNER(NodeProfile.BATCH_CRAWL, NodeProfile.MIXED),
|
PROC_CRAWLER_SPAWNER(NodeProfile.BATCH_CRAWL, NodeProfile.MIXED),
|
||||||
PROC_PING_SPAWNER(NodeProfile.BATCH_CRAWL, NodeProfile.MIXED, NodeProfile.REALTIME),
|
PROC_PING_SPAWNER(NodeProfile.BATCH_CRAWL, NodeProfile.MIXED, NodeProfile.REALTIME),
|
||||||
PROC_EXPORT_TASKS_SPAWNER(NodeProfile.BATCH_CRAWL, NodeProfile.MIXED),
|
PROC_EXPORT_TASKS_SPAWNER(NodeProfile.BATCH_CRAWL, NodeProfile.MIXED),
|
||||||
|
PROC_NDP_SPAWNER(NodeProfile.MIXED, NodeProfile.REALTIME),
|
||||||
ADJACENCY_CALCULATION(NodeProfile.BATCH_CRAWL, NodeProfile.MIXED),
|
ADJACENCY_CALCULATION(NodeProfile.BATCH_CRAWL, NodeProfile.MIXED),
|
||||||
EXPORT_DATA(NodeProfile.BATCH_CRAWL, NodeProfile.MIXED),
|
EXPORT_DATA(NodeProfile.BATCH_CRAWL, NodeProfile.MIXED),
|
||||||
EXPORT_SEGMENTATION_MODEL(NodeProfile.BATCH_CRAWL, NodeProfile.MIXED),
|
EXPORT_SEGMENTATION_MODEL(NodeProfile.BATCH_CRAWL, NodeProfile.MIXED),
|
||||||
|
@@ -49,6 +49,7 @@ public class ExecutorActorControlService {
|
|||||||
RecrawlSingleDomainActor recrawlSingleDomainActor,
|
RecrawlSingleDomainActor recrawlSingleDomainActor,
|
||||||
RestoreBackupActor restoreBackupActor,
|
RestoreBackupActor restoreBackupActor,
|
||||||
ConverterMonitorActor converterMonitorFSM,
|
ConverterMonitorActor converterMonitorFSM,
|
||||||
|
NdpMonitorActor ndpMonitorActor,
|
||||||
PingMonitorActor pingMonitorActor,
|
PingMonitorActor pingMonitorActor,
|
||||||
CrawlerMonitorActor crawlerMonitorActor,
|
CrawlerMonitorActor crawlerMonitorActor,
|
||||||
LiveCrawlerMonitorActor liveCrawlerMonitorActor,
|
LiveCrawlerMonitorActor liveCrawlerMonitorActor,
|
||||||
@@ -93,7 +94,7 @@ public class ExecutorActorControlService {
|
|||||||
register(ExecutorActor.PROC_PING_SPAWNER, pingMonitorActor);
|
register(ExecutorActor.PROC_PING_SPAWNER, pingMonitorActor);
|
||||||
register(ExecutorActor.PROC_LIVE_CRAWL_SPAWNER, liveCrawlerMonitorActor);
|
register(ExecutorActor.PROC_LIVE_CRAWL_SPAWNER, liveCrawlerMonitorActor);
|
||||||
register(ExecutorActor.PROC_EXPORT_TASKS_SPAWNER, exportTasksMonitorActor);
|
register(ExecutorActor.PROC_EXPORT_TASKS_SPAWNER, exportTasksMonitorActor);
|
||||||
|
register(ExecutorActor.PROC_NDP_SPAWNER, ndpMonitorActor);
|
||||||
register(ExecutorActor.MONITOR_PROCESS_LIVENESS, processMonitorFSM);
|
register(ExecutorActor.MONITOR_PROCESS_LIVENESS, processMonitorFSM);
|
||||||
register(ExecutorActor.MONITOR_FILE_STORAGE, fileStorageMonitorActor);
|
register(ExecutorActor.MONITOR_FILE_STORAGE, fileStorageMonitorActor);
|
||||||
|
|
||||||
|
@@ -0,0 +1,29 @@
|
|||||||
|
package nu.marginalia.actor.proc;
|
||||||
|
|
||||||
|
import com.google.gson.Gson;
|
||||||
|
import com.google.inject.Inject;
|
||||||
|
import com.google.inject.Singleton;
|
||||||
|
import nu.marginalia.actor.monitor.AbstractProcessSpawnerActor;
|
||||||
|
import nu.marginalia.mq.persistence.MqPersistence;
|
||||||
|
import nu.marginalia.mqapi.ProcessInboxNames;
|
||||||
|
import nu.marginalia.process.ProcessService;
|
||||||
|
import nu.marginalia.service.module.ServiceConfiguration;
|
||||||
|
|
||||||
|
@Singleton
|
||||||
|
public class NdpMonitorActor extends AbstractProcessSpawnerActor {
|
||||||
|
|
||||||
|
@Inject
|
||||||
|
public NdpMonitorActor(Gson gson,
|
||||||
|
ServiceConfiguration configuration,
|
||||||
|
MqPersistence persistence,
|
||||||
|
ProcessService processService) {
|
||||||
|
super(gson,
|
||||||
|
configuration,
|
||||||
|
persistence,
|
||||||
|
processService,
|
||||||
|
ProcessInboxNames.NDP_INBOX,
|
||||||
|
ProcessService.ProcessId.NDP);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
@@ -8,6 +8,7 @@ import nu.marginalia.crawl.CrawlerMain;
|
|||||||
import nu.marginalia.index.IndexConstructorMain;
|
import nu.marginalia.index.IndexConstructorMain;
|
||||||
import nu.marginalia.livecrawler.LiveCrawlerMain;
|
import nu.marginalia.livecrawler.LiveCrawlerMain;
|
||||||
import nu.marginalia.loading.LoaderMain;
|
import nu.marginalia.loading.LoaderMain;
|
||||||
|
import nu.marginalia.ndp.NdpMain;
|
||||||
import nu.marginalia.ping.PingMain;
|
import nu.marginalia.ping.PingMain;
|
||||||
import nu.marginalia.service.control.ServiceEventLog;
|
import nu.marginalia.service.control.ServiceEventLog;
|
||||||
import nu.marginalia.service.server.BaseServiceParams;
|
import nu.marginalia.service.server.BaseServiceParams;
|
||||||
@@ -57,6 +58,7 @@ public class ProcessService {
|
|||||||
CONVERTER(ConverterMain.class),
|
CONVERTER(ConverterMain.class),
|
||||||
LOADER(LoaderMain.class),
|
LOADER(LoaderMain.class),
|
||||||
INDEX_CONSTRUCTOR(IndexConstructorMain.class),
|
INDEX_CONSTRUCTOR(IndexConstructorMain.class),
|
||||||
|
NDP(NdpMain.class),
|
||||||
EXPORT_TASKS(ExportTasksMain.class),
|
EXPORT_TASKS(ExportTasksMain.class),
|
||||||
;
|
;
|
||||||
|
|
||||||
@@ -72,6 +74,7 @@ public class ProcessService {
|
|||||||
case CONVERTER -> "CONVERTER_PROCESS_OPTS";
|
case CONVERTER -> "CONVERTER_PROCESS_OPTS";
|
||||||
case LOADER -> "LOADER_PROCESS_OPTS";
|
case LOADER -> "LOADER_PROCESS_OPTS";
|
||||||
case PING -> "PING_PROCESS_OPTS";
|
case PING -> "PING_PROCESS_OPTS";
|
||||||
|
case NDP -> "NDP_PROCESS_OPTS";
|
||||||
case INDEX_CONSTRUCTOR -> "INDEX_CONSTRUCTION_PROCESS_OPTS";
|
case INDEX_CONSTRUCTOR -> "INDEX_CONSTRUCTION_PROCESS_OPTS";
|
||||||
case EXPORT_TASKS -> "EXPORT_TASKS_PROCESS_OPTS";
|
case EXPORT_TASKS -> "EXPORT_TASKS_PROCESS_OPTS";
|
||||||
};
|
};
|
||||||
|
@@ -36,7 +36,6 @@ import org.apache.hc.core5.http.io.support.ClassicRequestBuilder;
|
|||||||
import org.apache.hc.core5.http.message.MessageSupport;
|
import org.apache.hc.core5.http.message.MessageSupport;
|
||||||
import org.apache.hc.core5.http.protocol.HttpContext;
|
import org.apache.hc.core5.http.protocol.HttpContext;
|
||||||
import org.apache.hc.core5.pool.PoolStats;
|
import org.apache.hc.core5.pool.PoolStats;
|
||||||
import org.apache.hc.core5.ssl.SSLContextBuilder;
|
|
||||||
import org.apache.hc.core5.util.TimeValue;
|
import org.apache.hc.core5.util.TimeValue;
|
||||||
import org.apache.hc.core5.util.Timeout;
|
import org.apache.hc.core5.util.Timeout;
|
||||||
import org.jsoup.Jsoup;
|
import org.jsoup.Jsoup;
|
||||||
@@ -49,15 +48,12 @@ import org.slf4j.MarkerFactory;
|
|||||||
|
|
||||||
import javax.net.ssl.SSLContext;
|
import javax.net.ssl.SSLContext;
|
||||||
import javax.net.ssl.SSLException;
|
import javax.net.ssl.SSLException;
|
||||||
import javax.net.ssl.TrustManager;
|
|
||||||
import javax.net.ssl.X509TrustManager;
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.net.SocketTimeoutException;
|
import java.net.SocketTimeoutException;
|
||||||
import java.net.URISyntaxException;
|
import java.net.URISyntaxException;
|
||||||
import java.net.UnknownHostException;
|
import java.net.UnknownHostException;
|
||||||
import java.security.KeyManagementException;
|
import java.security.KeyManagementException;
|
||||||
import java.security.NoSuchAlgorithmException;
|
import java.security.NoSuchAlgorithmException;
|
||||||
import java.security.cert.X509Certificate;
|
|
||||||
import java.time.Duration;
|
import java.time.Duration;
|
||||||
import java.time.Instant;
|
import java.time.Instant;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
@@ -99,42 +95,12 @@ public class HttpFetcherImpl implements HttpFetcher, HttpRequestRetryStrategy {
|
|||||||
.setValidateAfterInactivity(TimeValue.ofSeconds(5))
|
.setValidateAfterInactivity(TimeValue.ofSeconds(5))
|
||||||
.build();
|
.build();
|
||||||
|
|
||||||
// No-op up front validation of server certificates.
|
|
||||||
//
|
|
||||||
// We will validate certificates later, after the connection is established
|
|
||||||
// as we want to store the certificate chain and validation
|
|
||||||
// outcome to the database.
|
|
||||||
|
|
||||||
var trustMeBro = new X509TrustManager() {
|
|
||||||
private X509Certificate[] lastServerCertChain;
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void checkClientTrusted(X509Certificate[] chain, String authType) {
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void checkServerTrusted(X509Certificate[] chain, String authType) {
|
|
||||||
this.lastServerCertChain = chain.clone();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public X509Certificate[] getAcceptedIssuers() {
|
|
||||||
return new X509Certificate[0];
|
|
||||||
}
|
|
||||||
|
|
||||||
public X509Certificate[] getLastServerCertChain() {
|
|
||||||
return lastServerCertChain != null ? lastServerCertChain.clone() : null;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
SSLContext sslContext = SSLContextBuilder.create().build();
|
|
||||||
sslContext.init(null, new TrustManager[]{trustMeBro}, null);
|
|
||||||
|
|
||||||
connectionManager = PoolingHttpClientConnectionManagerBuilder.create()
|
connectionManager = PoolingHttpClientConnectionManagerBuilder.create()
|
||||||
.setMaxConnPerRoute(2)
|
.setMaxConnPerRoute(2)
|
||||||
.setMaxConnTotal(5000)
|
.setMaxConnTotal(5000)
|
||||||
.setDefaultConnectionConfig(connectionConfig)
|
.setDefaultConnectionConfig(connectionConfig)
|
||||||
.setTlsSocketStrategy(new DefaultClientTlsStrategy(sslContext))
|
.setTlsSocketStrategy(new DefaultClientTlsStrategy(SSLContext.getDefault()))
|
||||||
.build();
|
.build();
|
||||||
|
|
||||||
connectionManager.setDefaultSocketConfig(SocketConfig.custom()
|
connectionManager.setDefaultSocketConfig(SocketConfig.custom()
|
||||||
|
@@ -115,9 +115,13 @@ public class CrawlerRetreiver implements AutoCloseable {
|
|||||||
final SimpleRobotRules robotsRules = fetcher.fetchRobotRules(probedUrl.domain, warcRecorder);
|
final SimpleRobotRules robotsRules = fetcher.fetchRobotRules(probedUrl.domain, warcRecorder);
|
||||||
final CrawlDelayTimer delayTimer = new CrawlDelayTimer(robotsRules.getCrawlDelay());
|
final CrawlDelayTimer delayTimer = new CrawlDelayTimer(robotsRules.getCrawlDelay());
|
||||||
|
|
||||||
|
if (!robotsRules.isAllowed(probedUrl.toString())) {
|
||||||
|
warcRecorder.flagAsRobotsTxtError(probedUrl);
|
||||||
|
yield 1; // Nothing we can do here, we aren't allowed to fetch the root URL
|
||||||
|
}
|
||||||
delayTimer.waitFetchDelay(0); // initial delay after robots.txt
|
delayTimer.waitFetchDelay(0); // initial delay after robots.txt
|
||||||
|
|
||||||
DomainStateDb.SummaryRecord summaryRecord = sniffRootDocument(probedUrl, delayTimer);
|
DomainStateDb.SummaryRecord summaryRecord = sniffRootDocument(probedUrl, robotsRules, delayTimer);
|
||||||
domainStateDb.save(summaryRecord);
|
domainStateDb.save(summaryRecord);
|
||||||
|
|
||||||
if (Thread.interrupted()) {
|
if (Thread.interrupted()) {
|
||||||
@@ -270,11 +274,11 @@ public class CrawlerRetreiver implements AutoCloseable {
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
private DomainStateDb.SummaryRecord sniffRootDocument(EdgeUrl rootUrl, CrawlDelayTimer timer) {
|
private DomainStateDb.SummaryRecord sniffRootDocument(EdgeUrl rootUrl, SimpleRobotRules robotsRules, CrawlDelayTimer timer) {
|
||||||
Optional<String> feedLink = Optional.empty();
|
Optional<String> feedLink = Optional.empty();
|
||||||
|
|
||||||
try {
|
try {
|
||||||
var url = rootUrl.withPathAndParam("/", null);
|
EdgeUrl url = rootUrl.withPathAndParam("/", null);
|
||||||
|
|
||||||
HttpFetchResult result = fetcher.fetchContent(url, warcRecorder, cookies, timer, ContentTags.empty(), HttpFetcher.ProbeType.DISABLED);
|
HttpFetchResult result = fetcher.fetchContent(url, warcRecorder, cookies, timer, ContentTags.empty(), HttpFetcher.ProbeType.DISABLED);
|
||||||
timer.waitFetchDelay(0);
|
timer.waitFetchDelay(0);
|
||||||
@@ -331,7 +335,7 @@ public class CrawlerRetreiver implements AutoCloseable {
|
|||||||
|
|
||||||
|
|
||||||
if (feedLink.isEmpty()) {
|
if (feedLink.isEmpty()) {
|
||||||
feedLink = guessFeedUrl(timer);
|
feedLink = guessFeedUrl(timer, robotsRules);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Download the sitemap if available
|
// Download the sitemap if available
|
||||||
@@ -339,7 +343,10 @@ public class CrawlerRetreiver implements AutoCloseable {
|
|||||||
|
|
||||||
// Grab the favicon if it exists
|
// Grab the favicon if it exists
|
||||||
|
|
||||||
if (fetcher.fetchContent(faviconUrl, warcRecorder, cookies, timer, ContentTags.empty(), HttpFetcher.ProbeType.DISABLED) instanceof HttpFetchResult.ResultOk iconResult) {
|
if (robotsRules.isAllowed(faviconUrl.toString())) {
|
||||||
|
if (fetcher.fetchContent(faviconUrl, warcRecorder, cookies, timer, ContentTags.empty(), HttpFetcher.ProbeType.DISABLED)
|
||||||
|
instanceof HttpFetchResult.ResultOk iconResult)
|
||||||
|
{
|
||||||
String contentType = iconResult.header("Content-Type");
|
String contentType = iconResult.header("Content-Type");
|
||||||
byte[] iconData = iconResult.getBodyBytes();
|
byte[] iconData = iconResult.getBodyBytes();
|
||||||
|
|
||||||
@@ -348,6 +355,7 @@ public class CrawlerRetreiver implements AutoCloseable {
|
|||||||
new DomainStateDb.FaviconRecord(contentType, iconData)
|
new DomainStateDb.FaviconRecord(contentType, iconData)
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
timer.waitFetchDelay(0);
|
timer.waitFetchDelay(0);
|
||||||
|
|
||||||
}
|
}
|
||||||
@@ -383,7 +391,7 @@ public class CrawlerRetreiver implements AutoCloseable {
|
|||||||
"blog/rss"
|
"blog/rss"
|
||||||
);
|
);
|
||||||
|
|
||||||
private Optional<String> guessFeedUrl(CrawlDelayTimer timer) throws InterruptedException {
|
private Optional<String> guessFeedUrl(CrawlDelayTimer timer, SimpleRobotRules robotsRules) throws InterruptedException {
|
||||||
var oldDomainStateRecord = domainStateDb.getSummary(domain);
|
var oldDomainStateRecord = domainStateDb.getSummary(domain);
|
||||||
|
|
||||||
// If we are already aware of an old feed URL, then we can just revalidate it
|
// If we are already aware of an old feed URL, then we can just revalidate it
|
||||||
@@ -396,6 +404,9 @@ public class CrawlerRetreiver implements AutoCloseable {
|
|||||||
|
|
||||||
for (String endpoint : likelyFeedEndpoints) {
|
for (String endpoint : likelyFeedEndpoints) {
|
||||||
String url = "https://" + domain + "/" + endpoint;
|
String url = "https://" + domain + "/" + endpoint;
|
||||||
|
if (!robotsRules.isAllowed(url)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
if (validateFeedUrl(url, timer)) {
|
if (validateFeedUrl(url, timer)) {
|
||||||
return Optional.of(url);
|
return Optional.of(url);
|
||||||
}
|
}
|
||||||
|
73
code/processes/new-domain-process/build.gradle
Normal file
73
code/processes/new-domain-process/build.gradle
Normal file
@@ -0,0 +1,73 @@
|
|||||||
|
plugins {
|
||||||
|
id 'java'
|
||||||
|
|
||||||
|
id 'application'
|
||||||
|
id 'jvm-test-suite'
|
||||||
|
}
|
||||||
|
|
||||||
|
java {
|
||||||
|
toolchain {
|
||||||
|
languageVersion.set(JavaLanguageVersion.of(rootProject.ext.jvmVersion))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
application {
|
||||||
|
mainClass = 'nu.marginalia.ping.PingMain'
|
||||||
|
applicationName = 'ping-process'
|
||||||
|
}
|
||||||
|
|
||||||
|
tasks.distZip.enabled = false
|
||||||
|
|
||||||
|
apply from: "$rootProject.projectDir/srcsets.gradle"
|
||||||
|
|
||||||
|
dependencies {
|
||||||
|
|
||||||
|
implementation project(':code:common:db')
|
||||||
|
implementation project(':code:common:model')
|
||||||
|
implementation project(':code:common:config')
|
||||||
|
implementation project(':code:common:service')
|
||||||
|
|
||||||
|
implementation project(':code:libraries:domain-lock')
|
||||||
|
implementation project(':code:libraries:geo-ip')
|
||||||
|
implementation project(':code:libraries:message-queue')
|
||||||
|
implementation project(':code:libraries:blocking-thread-pool')
|
||||||
|
|
||||||
|
implementation project(':code:processes:process-mq-api')
|
||||||
|
implementation project(':code:processes:crawling-process:ft-content-type')
|
||||||
|
implementation project(':code:processes:crawling-process:ft-link-parser')
|
||||||
|
|
||||||
|
|
||||||
|
implementation libs.bundles.slf4j
|
||||||
|
implementation libs.notnull
|
||||||
|
implementation libs.guava
|
||||||
|
|
||||||
|
implementation dependencies.create(libs.guice.get()) {
|
||||||
|
exclude group: 'com.google.guava'
|
||||||
|
}
|
||||||
|
implementation libs.gson
|
||||||
|
implementation libs.zstd
|
||||||
|
implementation libs.bucket4j
|
||||||
|
implementation libs.crawlercommons
|
||||||
|
implementation libs.jsoup
|
||||||
|
implementation libs.fastutil
|
||||||
|
implementation libs.bundles.curator
|
||||||
|
implementation libs.bundles.mariadb
|
||||||
|
implementation libs.bundles.httpcomponents
|
||||||
|
implementation libs.commons.lang3
|
||||||
|
|
||||||
|
testImplementation libs.bundles.slf4j.test
|
||||||
|
testImplementation libs.bundles.junit
|
||||||
|
testImplementation libs.mockito
|
||||||
|
|
||||||
|
testImplementation libs.wiremock
|
||||||
|
|
||||||
|
|
||||||
|
testImplementation platform('org.testcontainers:testcontainers-bom:1.17.4')
|
||||||
|
testImplementation libs.commons.codec
|
||||||
|
testImplementation 'org.testcontainers:mariadb:1.17.4'
|
||||||
|
testImplementation 'org.testcontainers:junit-jupiter:1.17.4'
|
||||||
|
testImplementation project(':code:libraries:test-helpers')
|
||||||
|
|
||||||
|
testImplementation project(':code:processes:test-data')
|
||||||
|
}
|
||||||
|
|
@@ -0,0 +1,146 @@
|
|||||||
|
package nu.marginalia.ndp;
|
||||||
|
|
||||||
|
|
||||||
|
import com.google.inject.Inject;
|
||||||
|
import com.google.inject.Singleton;
|
||||||
|
import nu.marginalia.WmsaHome;
|
||||||
|
import nu.marginalia.contenttype.ContentType;
|
||||||
|
import nu.marginalia.contenttype.DocumentBodyToString;
|
||||||
|
import nu.marginalia.coordination.DomainCoordinator;
|
||||||
|
import nu.marginalia.link_parser.LinkParser;
|
||||||
|
import nu.marginalia.model.EdgeDomain;
|
||||||
|
import nu.marginalia.model.EdgeUrl;
|
||||||
|
import nu.marginalia.ndp.io.HttpClientProvider;
|
||||||
|
import org.apache.hc.client5.http.classic.HttpClient;
|
||||||
|
import org.apache.hc.core5.http.io.entity.EntityUtils;
|
||||||
|
import org.apache.hc.core5.http.io.support.ClassicRequestBuilder;
|
||||||
|
import org.jsoup.Jsoup;
|
||||||
|
import org.jsoup.nodes.Document;
|
||||||
|
import org.jsoup.nodes.Element;
|
||||||
|
|
||||||
|
import java.io.InputStream;
|
||||||
|
import java.security.KeyManagementException;
|
||||||
|
import java.security.NoSuchAlgorithmException;
|
||||||
|
import java.time.Duration;
|
||||||
|
import java.util.Objects;
|
||||||
|
import java.util.Optional;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
|
||||||
|
/** Evaluates a domain to determine if it is worth indexing.
|
||||||
|
* This class fetches the root document, checks the response code, content type,
|
||||||
|
* and parses the HTML to ensure it smells alright.
|
||||||
|
*/
|
||||||
|
@Singleton
|
||||||
|
public class DomainEvaluator {
|
||||||
|
private final HttpClient client;
|
||||||
|
private final String userAgentString = WmsaHome.getUserAgent().uaString();
|
||||||
|
|
||||||
|
private final LinkParser linkParser = new LinkParser();
|
||||||
|
private final DomainCoordinator domainCoordinator;
|
||||||
|
|
||||||
|
@Inject
|
||||||
|
public DomainEvaluator(DomainCoordinator domainCoordinator) throws NoSuchAlgorithmException, KeyManagementException {
|
||||||
|
this.domainCoordinator = domainCoordinator;
|
||||||
|
client = HttpClientProvider.createClient();
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean evaluateDomain(String domainName) {
|
||||||
|
var edgeDomain = new EdgeDomain(domainName);
|
||||||
|
|
||||||
|
// Grab a lock on the domain to prevent concurrent evaluations between processes
|
||||||
|
try (var lock = domainCoordinator.lockDomain(edgeDomain)) {
|
||||||
|
var rootUrl = edgeDomain.toRootUrlHttps();
|
||||||
|
|
||||||
|
var request = ClassicRequestBuilder.get(rootUrl.asURI())
|
||||||
|
.addHeader("User-Agent", userAgentString)
|
||||||
|
.addHeader("Accept-Encoding", "gzip")
|
||||||
|
.addHeader("Accept", "text/html,application/xhtml+xml;q=0.9")
|
||||||
|
.build();
|
||||||
|
|
||||||
|
return client.execute(request, (rsp) -> {
|
||||||
|
if (rsp.getEntity() == null)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Check if the response code indicates a successful fetch
|
||||||
|
if (200 != rsp.getCode()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
byte[] content;
|
||||||
|
// Read the content from the response entity
|
||||||
|
try (InputStream contentStream = rsp.getEntity().getContent()) {
|
||||||
|
content = contentStream.readNBytes(8192);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parse the content (if it's valid)
|
||||||
|
ContentType contentType = ContentType.parse(rsp.getEntity().getContentType());
|
||||||
|
|
||||||
|
// Validate the content type
|
||||||
|
if (!contentType.contentType().startsWith("text/html") && !contentType.contentType().startsWith("application/xhtml+xml"))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
// Parse the document body to a Jsoup Document
|
||||||
|
final Document document = Jsoup.parse(DocumentBodyToString.getStringData(contentType, content));
|
||||||
|
final String text = document.body().text();
|
||||||
|
|
||||||
|
if (text.length() < 100)
|
||||||
|
return false;
|
||||||
|
if (text.contains("404 Not Found") || text.contains("Page not found"))
|
||||||
|
return false;
|
||||||
|
if (hasMetaRefresh(document))
|
||||||
|
return false; // This almost always indicates a parked domain
|
||||||
|
if (!hasInternalLink(document, edgeDomain, rootUrl))
|
||||||
|
return false; // No internal links means it's not worth indexing
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
catch (Exception e) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
finally {
|
||||||
|
// May or may not be necessary, but let's ensure we clean up the response entity
|
||||||
|
// to avoid resource leaks
|
||||||
|
EntityUtils.consumeQuietly(rsp.getEntity());
|
||||||
|
|
||||||
|
// Sleep for a while before yielding the lock, to avoid immediately hammering the domain
|
||||||
|
// from another process
|
||||||
|
sleepQuietly(Duration.ofSeconds(1));
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
catch (Exception ex) {
|
||||||
|
return false; // If we fail to fetch or parse the domain, we consider it invalid
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean hasInternalLink(Document document, EdgeDomain currentDomain, EdgeUrl rootUrl) {
|
||||||
|
for (Element atag : document.select("a")) {
|
||||||
|
Optional<EdgeDomain> destDomain = linkParser
|
||||||
|
.parseLink(rootUrl, atag)
|
||||||
|
.map(EdgeUrl::getDomain);
|
||||||
|
|
||||||
|
if (destDomain.isPresent() && Objects.equals(currentDomain, destDomain.get()))
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean hasMetaRefresh(Document document) {
|
||||||
|
for (Element metaTag : document.select("meta")) {
|
||||||
|
if ("refresh".equalsIgnoreCase(metaTag.attr("http-equiv")))
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void sleepQuietly(Duration duration) {
|
||||||
|
try {
|
||||||
|
TimeUnit.MILLISECONDS.sleep(duration.toMillis());
|
||||||
|
} catch (InterruptedException e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
@@ -0,0 +1,131 @@
|
|||||||
|
package nu.marginalia.ndp;
|
||||||
|
|
||||||
|
import com.google.inject.Inject;
|
||||||
|
import com.zaxxer.hikari.HikariDataSource;
|
||||||
|
import nu.marginalia.nodecfg.NodeConfigurationService;
|
||||||
|
import org.jetbrains.annotations.NotNull;
|
||||||
|
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.PriorityQueue;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
/** DomainAllocator is responsible for assigning domains to partitions/nodes.
|
||||||
|
* This is ensured to make sure that domains are evenly distributed across the nodes.
|
||||||
|
*/
|
||||||
|
public class DomainNodeAllocator {
|
||||||
|
|
||||||
|
private final NodeConfigurationService nodeConfigurationService;
|
||||||
|
private final HikariDataSource dataSource;
|
||||||
|
private final PriorityQueue<NodeCount> countPerNode = new PriorityQueue<>();
|
||||||
|
|
||||||
|
private volatile boolean initialized = false;
|
||||||
|
|
||||||
|
private record NodeCount(int nodeId, int count)
|
||||||
|
implements Comparable<NodeCount>
|
||||||
|
{
|
||||||
|
public NodeCount incrementCount() {
|
||||||
|
return new NodeCount(nodeId, count + 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int compareTo(@NotNull DomainNodeAllocator.NodeCount o) {
|
||||||
|
return Integer.compare(this.count, o.count);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Inject
|
||||||
|
public DomainNodeAllocator(NodeConfigurationService nodeConfigurationService, HikariDataSource dataSource) {
|
||||||
|
this.nodeConfigurationService = nodeConfigurationService;
|
||||||
|
this.dataSource = dataSource;
|
||||||
|
|
||||||
|
Thread.ofPlatform()
|
||||||
|
.name("DomainNodeAllocator::initialize()")
|
||||||
|
.start(this::initialize);
|
||||||
|
}
|
||||||
|
|
||||||
|
public synchronized int totalCount() {
|
||||||
|
ensureInitialized();
|
||||||
|
return countPerNode.stream().mapToInt(NodeCount::count).sum();
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Returns the next node ID to assign a domain to.
|
||||||
|
* This method is synchronized to ensure thread safety when multiple threads are allocating domains.
|
||||||
|
* The node ID returned is guaranteed to be one of the viable nodes configured in the system.
|
||||||
|
*/
|
||||||
|
public synchronized int nextNodeId() {
|
||||||
|
ensureInitialized();
|
||||||
|
|
||||||
|
// Synchronized is fine here as this is not a hot path
|
||||||
|
// (and PriorityBlockingQueue won't help since we're re-adding the same element with a new count all the time)
|
||||||
|
|
||||||
|
NodeCount allocation = countPerNode.remove();
|
||||||
|
countPerNode.add(allocation.incrementCount());
|
||||||
|
return allocation.nodeId();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private void ensureInitialized() {
|
||||||
|
if (initialized) return;
|
||||||
|
|
||||||
|
synchronized (this) {
|
||||||
|
while (!initialized) {
|
||||||
|
try {
|
||||||
|
// Wait until the initialization is complete
|
||||||
|
this.wait(1000);
|
||||||
|
} catch (InterruptedException e) {
|
||||||
|
Thread.currentThread().interrupt();
|
||||||
|
throw new RuntimeException("DomainAllocator initialization interrupted", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public void initialize() {
|
||||||
|
if (initialized) return;
|
||||||
|
|
||||||
|
Set<Integer> viableNodes = new HashSet<>();
|
||||||
|
|
||||||
|
// Find all viable nodes that can handle batch crawls
|
||||||
|
for (var node : nodeConfigurationService.getAll()) {
|
||||||
|
if (node.disabled())
|
||||||
|
continue;
|
||||||
|
if (node.profile().permitBatchCrawl())
|
||||||
|
viableNodes.add(node.node());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fetch the current counts of domains per node from the database
|
||||||
|
try (var conn = dataSource.getConnection();
|
||||||
|
var stmt = conn.prepareStatement("""
|
||||||
|
SELECT COUNT(*) AS CNT, NODE_AFFINITY
|
||||||
|
FROM EC_DOMAIN
|
||||||
|
WHERE NODE_AFFINITY>0
|
||||||
|
GROUP BY NODE_AFFINITY
|
||||||
|
"""))
|
||||||
|
{
|
||||||
|
|
||||||
|
var rs = stmt.executeQuery();
|
||||||
|
while (rs.next()) {
|
||||||
|
|
||||||
|
int nodeId = rs.getInt("NODE_AFFINITY");
|
||||||
|
int count = rs.getInt("CNT");
|
||||||
|
|
||||||
|
if (viableNodes.remove(nodeId)) {
|
||||||
|
countPerNode.add(new NodeCount(nodeId, count));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
throw new RuntimeException("Failed to load domain counts from database", e);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add any remaining viable nodes that were not found in the database
|
||||||
|
for (int nodeId : viableNodes) {
|
||||||
|
countPerNode.add(new NodeCount(nodeId, 0));
|
||||||
|
}
|
||||||
|
|
||||||
|
initialized = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
@@ -0,0 +1,149 @@
|
|||||||
|
package nu.marginalia.ndp;
|
||||||
|
|
||||||
|
import com.google.inject.Inject;
|
||||||
|
import com.zaxxer.hikari.HikariDataSource;
|
||||||
|
import nu.marginalia.ndp.model.DomainToTest;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import java.sql.Connection;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.concurrent.ArrayBlockingQueue;
|
||||||
|
import java.util.concurrent.ConcurrentHashMap;
|
||||||
|
|
||||||
|
public class DomainTestingQueue {
|
||||||
|
private static Logger logger = LoggerFactory.getLogger(DomainTestingQueue.class);
|
||||||
|
|
||||||
|
private final ArrayBlockingQueue<DomainToTest> queue = new ArrayBlockingQueue<>(2);
|
||||||
|
|
||||||
|
// This will grow quite large, but should be manageable in memory, as theoretical maximum is around 100M domains,
|
||||||
|
// order of 2 GB in memory.
|
||||||
|
private final ConcurrentHashMap<String, Boolean> takenDomains = new ConcurrentHashMap<>();
|
||||||
|
|
||||||
|
private final HikariDataSource dataSource;
|
||||||
|
|
||||||
|
|
||||||
|
@Inject
|
||||||
|
public DomainTestingQueue(HikariDataSource dataSource) {
|
||||||
|
this.dataSource = dataSource;
|
||||||
|
|
||||||
|
Thread.ofPlatform()
|
||||||
|
.name("DomainTestingQueue::fetch()")
|
||||||
|
.start(this::fetch);
|
||||||
|
}
|
||||||
|
|
||||||
|
public DomainToTest next() throws InterruptedException {
|
||||||
|
return queue.take();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void accept(DomainToTest domain, int nodeId) {
|
||||||
|
try (var conn = dataSource.getConnection();
|
||||||
|
var flagOkStmt = conn.prepareStatement("""
|
||||||
|
UPDATE NDP_NEW_DOMAINS
|
||||||
|
SET STATE='ACCEPTED'
|
||||||
|
WHERE DOMAIN_ID=?
|
||||||
|
""");
|
||||||
|
var assigNodeStmt = conn.prepareStatement("""
|
||||||
|
UPDATE EC_DOMAIN SET NODE_AFFINITY=?
|
||||||
|
WHERE ID=?
|
||||||
|
""")
|
||||||
|
)
|
||||||
|
{
|
||||||
|
conn.setAutoCommit(false);
|
||||||
|
flagOkStmt.setInt(1, domain.domainId());
|
||||||
|
flagOkStmt.executeUpdate();
|
||||||
|
|
||||||
|
assigNodeStmt.setInt(1, nodeId);
|
||||||
|
assigNodeStmt.setInt(2, domain.domainId());
|
||||||
|
assigNodeStmt.executeUpdate();
|
||||||
|
conn.commit();
|
||||||
|
} catch (Exception e) {
|
||||||
|
throw new RuntimeException("Failed to accept domain in database", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void reject(DomainToTest domain) {
|
||||||
|
try (var conn = dataSource.getConnection();
|
||||||
|
var stmt = conn.prepareStatement("""
|
||||||
|
UPDATE NDP_NEW_DOMAINS
|
||||||
|
SET STATE='REJECTED', CHECK_COUNT=CHECK_COUNT + 1
|
||||||
|
WHERE DOMAIN_ID=?
|
||||||
|
"""))
|
||||||
|
{
|
||||||
|
conn.setAutoCommit(false);
|
||||||
|
stmt.setInt(1, domain.domainId());
|
||||||
|
stmt.executeUpdate();
|
||||||
|
conn.commit();
|
||||||
|
} catch (Exception e) {
|
||||||
|
throw new RuntimeException("Failed to reject domain in database", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void fetch() {
|
||||||
|
while (true) {
|
||||||
|
List<DomainToTest> domains = new ArrayList<>(2000);
|
||||||
|
try (var conn = dataSource.getConnection();
|
||||||
|
var stmt = conn.prepareStatement("""
|
||||||
|
SELECT DOMAIN_ID, DOMAIN_NAME
|
||||||
|
FROM NDP_NEW_DOMAINS
|
||||||
|
INNER JOIN EC_DOMAIN ON ID=DOMAIN_ID
|
||||||
|
WHERE NDP_NEW_DOMAINS.STATE = 'NEW'
|
||||||
|
ORDER BY PRIORITY DESC
|
||||||
|
LIMIT 2000
|
||||||
|
"""))
|
||||||
|
{
|
||||||
|
var rs = stmt.executeQuery();
|
||||||
|
|
||||||
|
while (rs.next()) {
|
||||||
|
int domainId = rs.getInt("DOMAIN_ID");
|
||||||
|
String domainName = rs.getString("DOMAIN_NAME");
|
||||||
|
if (takenDomains.put(domainName, true) != null) {
|
||||||
|
logger.warn("Domain {} is already processed, skipping", domainName);
|
||||||
|
continue; // Skip if already taken
|
||||||
|
}
|
||||||
|
domains.add(new DomainToTest(domainName, domainId));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (domains.isEmpty()) {
|
||||||
|
refreshQueue(conn);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catch (Exception e) {
|
||||||
|
throw new RuntimeException("Failed to fetch domains from database", e);
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
for (var domain : domains) {
|
||||||
|
queue.put(domain);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catch (InterruptedException e) {
|
||||||
|
Thread.currentThread().interrupt();
|
||||||
|
throw new RuntimeException("Domain fetching interrupted", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void refreshQueue(Connection conn) {
|
||||||
|
logger.info("Refreshing domain queue in database");
|
||||||
|
try (var stmt = conn.createStatement()) {
|
||||||
|
conn.setAutoCommit(false);
|
||||||
|
logger.info("Revitalizing rejected domains");
|
||||||
|
|
||||||
|
// Revitalize rejected domains
|
||||||
|
stmt.executeUpdate("""
|
||||||
|
UPDATE NDP_NEW_DOMAINS
|
||||||
|
SET STATE='NEW'
|
||||||
|
WHERE NDP_NEW_DOMAINS.STATE = 'REJECTED'
|
||||||
|
AND DATE_ADD(TS_CHANGE, INTERVAL CHECK_COUNT DAY) > NOW()
|
||||||
|
""");
|
||||||
|
conn.commit();
|
||||||
|
|
||||||
|
logger.info("Queue refreshed successfully");
|
||||||
|
} catch (Exception e) {
|
||||||
|
throw new RuntimeException("Failed to refresh queue in database", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@@ -0,0 +1,159 @@
|
|||||||
|
package nu.marginalia.ndp;
|
||||||
|
|
||||||
|
import com.google.gson.Gson;
|
||||||
|
import com.google.inject.Guice;
|
||||||
|
import com.google.inject.Inject;
|
||||||
|
import com.google.inject.Injector;
|
||||||
|
import nu.marginalia.WmsaHome;
|
||||||
|
import nu.marginalia.coordination.DomainCoordinationModule;
|
||||||
|
import nu.marginalia.db.DomainBlacklist;
|
||||||
|
import nu.marginalia.geoip.GeoIpDictionary;
|
||||||
|
import nu.marginalia.mq.MessageQueueFactory;
|
||||||
|
import nu.marginalia.mqapi.ProcessInboxNames;
|
||||||
|
import nu.marginalia.mqapi.ndp.NdpRequest;
|
||||||
|
import nu.marginalia.ndp.model.DomainToTest;
|
||||||
|
import nu.marginalia.process.ProcessConfiguration;
|
||||||
|
import nu.marginalia.process.ProcessConfigurationModule;
|
||||||
|
import nu.marginalia.process.ProcessMainClass;
|
||||||
|
import nu.marginalia.process.control.ProcessHeartbeat;
|
||||||
|
import nu.marginalia.service.module.DatabaseModule;
|
||||||
|
import nu.marginalia.service.module.ServiceDiscoveryModule;
|
||||||
|
import nu.marginalia.util.SimpleBlockingThreadPool;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import java.security.Security;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
import java.util.concurrent.atomic.AtomicInteger;
|
||||||
|
|
||||||
|
public class NdpMain extends ProcessMainClass {
|
||||||
|
|
||||||
|
private static final Logger logger = LoggerFactory.getLogger(NdpMain.class);
|
||||||
|
private final DomainNodeAllocator domainNodeAllocator;
|
||||||
|
private final DomainTestingQueue domainTestingQueue;
|
||||||
|
private final ProcessHeartbeat processHeartbeat;
|
||||||
|
private final DomainEvaluator domainEvaluator;
|
||||||
|
private final DomainBlacklist domainBlacklist;
|
||||||
|
|
||||||
|
private final AtomicInteger domainCount = new AtomicInteger(0);
|
||||||
|
|
||||||
|
@Inject
|
||||||
|
public NdpMain(MessageQueueFactory messageQueueFactory,
|
||||||
|
ProcessConfiguration config,
|
||||||
|
DomainNodeAllocator domainNodeAllocator,
|
||||||
|
DomainTestingQueue domainTestingQueue,
|
||||||
|
DomainEvaluator domainEvaluator,
|
||||||
|
DomainBlacklist domainBlacklist,
|
||||||
|
ProcessHeartbeat processHeartbeat,
|
||||||
|
Gson gson)
|
||||||
|
{
|
||||||
|
super(messageQueueFactory, config, gson, ProcessInboxNames.NDP_INBOX);
|
||||||
|
|
||||||
|
this.domainNodeAllocator = domainNodeAllocator;
|
||||||
|
this.domainEvaluator = domainEvaluator;
|
||||||
|
this.domainBlacklist = domainBlacklist;
|
||||||
|
this.domainTestingQueue = domainTestingQueue;
|
||||||
|
this.processHeartbeat = processHeartbeat;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public void run(int goalCount) throws InterruptedException {
|
||||||
|
logger.info("Wait for blacklist to load...");
|
||||||
|
domainBlacklist.waitUntilLoaded();
|
||||||
|
|
||||||
|
SimpleBlockingThreadPool threadPool = new SimpleBlockingThreadPool(
|
||||||
|
"NDP-Worker",
|
||||||
|
8,
|
||||||
|
10,
|
||||||
|
SimpleBlockingThreadPool.ThreadType.PLATFORM
|
||||||
|
);
|
||||||
|
|
||||||
|
logger.info("Starting NDP process");
|
||||||
|
|
||||||
|
int toInsertCount = goalCount - domainNodeAllocator.totalCount();
|
||||||
|
|
||||||
|
if (toInsertCount <= 0) {
|
||||||
|
logger.info("No new domains to process. Current count: " + domainNodeAllocator.totalCount());
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
try (var hb = processHeartbeat.createAdHocTaskHeartbeat("Growing Index")) {
|
||||||
|
int cnt;
|
||||||
|
while ((cnt = domainCount.get()) < toInsertCount) {
|
||||||
|
if (cnt % 100 == 0) {
|
||||||
|
hb.progress("Discovery Process", cnt, toInsertCount);
|
||||||
|
}
|
||||||
|
|
||||||
|
final DomainToTest nextDomain = domainTestingQueue.next();
|
||||||
|
threadPool.submit(() -> {
|
||||||
|
try {
|
||||||
|
if (domainEvaluator.evaluateDomain(nextDomain.domainName())) {
|
||||||
|
logger.info("Accepting: {}", nextDomain.domainName());
|
||||||
|
domainCount.incrementAndGet();
|
||||||
|
domainTestingQueue.accept(nextDomain, domainNodeAllocator.nextNodeId());
|
||||||
|
} else {
|
||||||
|
logger.info("Rejecting: {}", nextDomain.domainName());
|
||||||
|
domainTestingQueue.reject(nextDomain);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catch (Exception e) {
|
||||||
|
domainTestingQueue.reject(nextDomain);
|
||||||
|
logger.error("Error evaluating domain: " + nextDomain.domainId(), e);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
threadPool.shutDown();
|
||||||
|
// Wait for all tasks to complete or give up after 1 hour
|
||||||
|
threadPool.awaitTermination(1, TimeUnit.HOURS);
|
||||||
|
|
||||||
|
logger.info("NDP process completed. Total domains processed: " + domainCount.get());
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void main(String[] args) throws Exception {
|
||||||
|
// Prevent Java from caching DNS lookups forever (filling up the system RAM as a result)
|
||||||
|
Security.setProperty("networkaddress.cache.ttl" , "3600");
|
||||||
|
|
||||||
|
// This must run *early*
|
||||||
|
System.setProperty("http.agent", WmsaHome.getUserAgent().uaString());
|
||||||
|
|
||||||
|
// If these aren't set properly, the JVM will hang forever on some requests
|
||||||
|
System.setProperty("sun.net.client.defaultConnectTimeout", "30000");
|
||||||
|
System.setProperty("sun.net.client.defaultReadTimeout", "30000");
|
||||||
|
|
||||||
|
// Set the maximum number of connections to keep alive in the connection pool
|
||||||
|
System.setProperty("jdk.httpclient.idleTimeout", "15"); // 15 seconds
|
||||||
|
System.setProperty("jdk.httpclient.connectionPoolSize", "256");
|
||||||
|
|
||||||
|
// We don't want to use too much memory caching sessions for https
|
||||||
|
System.setProperty("javax.net.ssl.sessionCacheSize", "2048");
|
||||||
|
|
||||||
|
|
||||||
|
Injector injector = Guice.createInjector(
|
||||||
|
new NdpModule(),
|
||||||
|
new ServiceDiscoveryModule(),
|
||||||
|
new DomainCoordinationModule(),
|
||||||
|
new ProcessConfigurationModule("ndp"),
|
||||||
|
new DatabaseModule(false)
|
||||||
|
);
|
||||||
|
|
||||||
|
GeoIpDictionary geoIpDictionary = injector.getInstance(GeoIpDictionary.class);
|
||||||
|
|
||||||
|
geoIpDictionary.waitReady(); // Ensure the GeoIpDictionary is ready before proceeding
|
||||||
|
|
||||||
|
NdpMain main = injector.getInstance(NdpMain.class);
|
||||||
|
|
||||||
|
var instructions = main.fetchInstructions(NdpRequest.class);
|
||||||
|
|
||||||
|
try {
|
||||||
|
main.run(instructions.value().goal());
|
||||||
|
instructions.ok();
|
||||||
|
}
|
||||||
|
catch (Throwable ex) {
|
||||||
|
logger.error("Error running ping process", ex);
|
||||||
|
instructions.err();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@@ -0,0 +1,8 @@
|
|||||||
|
package nu.marginalia.ndp;
|
||||||
|
|
||||||
|
import com.google.inject.AbstractModule;
|
||||||
|
|
||||||
|
public class NdpModule extends AbstractModule {
|
||||||
|
public void configure() {
|
||||||
|
}
|
||||||
|
}
|
@@ -0,0 +1,126 @@
|
|||||||
|
package nu.marginalia.ndp.io;
|
||||||
|
|
||||||
|
import com.google.inject.Provider;
|
||||||
|
import org.apache.hc.client5.http.ConnectionKeepAliveStrategy;
|
||||||
|
import org.apache.hc.client5.http.classic.HttpClient;
|
||||||
|
import org.apache.hc.client5.http.config.ConnectionConfig;
|
||||||
|
import org.apache.hc.client5.http.config.RequestConfig;
|
||||||
|
import org.apache.hc.client5.http.cookie.StandardCookieSpec;
|
||||||
|
import org.apache.hc.client5.http.impl.classic.CloseableHttpClient;
|
||||||
|
import org.apache.hc.client5.http.impl.classic.HttpClients;
|
||||||
|
import org.apache.hc.client5.http.impl.io.PoolingHttpClientConnectionManager;
|
||||||
|
import org.apache.hc.client5.http.impl.io.PoolingHttpClientConnectionManagerBuilder;
|
||||||
|
import org.apache.hc.core5.http.HeaderElement;
|
||||||
|
import org.apache.hc.core5.http.HeaderElements;
|
||||||
|
import org.apache.hc.core5.http.HttpResponse;
|
||||||
|
import org.apache.hc.core5.http.io.SocketConfig;
|
||||||
|
import org.apache.hc.core5.http.message.MessageSupport;
|
||||||
|
import org.apache.hc.core5.http.protocol.HttpContext;
|
||||||
|
import org.apache.hc.core5.util.TimeValue;
|
||||||
|
import org.apache.hc.core5.util.Timeout;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import java.security.KeyManagementException;
|
||||||
|
import java.security.NoSuchAlgorithmException;
|
||||||
|
import java.util.Iterator;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
|
||||||
|
public class HttpClientProvider implements Provider<HttpClient> {
|
||||||
|
private static final HttpClient client;
|
||||||
|
private static PoolingHttpClientConnectionManager connectionManager;
|
||||||
|
|
||||||
|
private static final Logger logger = LoggerFactory.getLogger(HttpClientProvider.class);
|
||||||
|
|
||||||
|
static {
|
||||||
|
try {
|
||||||
|
client = createClient();
|
||||||
|
} catch (Exception e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static CloseableHttpClient createClient() throws NoSuchAlgorithmException, KeyManagementException {
|
||||||
|
final ConnectionConfig connectionConfig = ConnectionConfig.custom()
|
||||||
|
.setSocketTimeout(15, TimeUnit.SECONDS)
|
||||||
|
.setConnectTimeout(15, TimeUnit.SECONDS)
|
||||||
|
.setValidateAfterInactivity(TimeValue.ofSeconds(5))
|
||||||
|
.build();
|
||||||
|
|
||||||
|
|
||||||
|
connectionManager = PoolingHttpClientConnectionManagerBuilder.create()
|
||||||
|
.setMaxConnPerRoute(2)
|
||||||
|
.setMaxConnTotal(50)
|
||||||
|
.setDefaultConnectionConfig(connectionConfig)
|
||||||
|
.build();
|
||||||
|
|
||||||
|
connectionManager.setDefaultSocketConfig(SocketConfig.custom()
|
||||||
|
.setSoLinger(TimeValue.ofSeconds(-1))
|
||||||
|
.setSoTimeout(Timeout.ofSeconds(10))
|
||||||
|
.build()
|
||||||
|
);
|
||||||
|
|
||||||
|
Thread.ofPlatform().daemon(true).start(() -> {
|
||||||
|
try {
|
||||||
|
for (;;) {
|
||||||
|
TimeUnit.SECONDS.sleep(15);
|
||||||
|
logger.info("Connection pool stats: {}", connectionManager.getTotalStats());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catch (InterruptedException e) {
|
||||||
|
Thread.currentThread().interrupt();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
final RequestConfig defaultRequestConfig = RequestConfig.custom()
|
||||||
|
.setCookieSpec(StandardCookieSpec.IGNORE)
|
||||||
|
.setResponseTimeout(10, TimeUnit.SECONDS)
|
||||||
|
.setConnectionRequestTimeout(5, TimeUnit.MINUTES)
|
||||||
|
.build();
|
||||||
|
|
||||||
|
return HttpClients.custom()
|
||||||
|
.setConnectionManager(connectionManager)
|
||||||
|
.setRetryStrategy(new RetryStrategy())
|
||||||
|
.setKeepAliveStrategy(new ConnectionKeepAliveStrategy() {
|
||||||
|
// Default keep-alive duration is 3 minutes, but this is too long for us,
|
||||||
|
// as we are either going to re-use it fairly quickly or close it for a long time.
|
||||||
|
//
|
||||||
|
// So we set it to 30 seconds or clamp the server-provided value to a minimum of 10 seconds.
|
||||||
|
private static final TimeValue defaultValue = TimeValue.ofSeconds(30);
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public TimeValue getKeepAliveDuration(HttpResponse response, HttpContext context) {
|
||||||
|
final Iterator<HeaderElement> it = MessageSupport.iterate(response, HeaderElements.KEEP_ALIVE);
|
||||||
|
|
||||||
|
while (it.hasNext()) {
|
||||||
|
final HeaderElement he = it.next();
|
||||||
|
final String param = he.getName();
|
||||||
|
final String value = he.getValue();
|
||||||
|
|
||||||
|
if (value == null)
|
||||||
|
continue;
|
||||||
|
if (!"timeout".equalsIgnoreCase(param))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
try {
|
||||||
|
long timeout = Long.parseLong(value);
|
||||||
|
timeout = Math.clamp(timeout, 30, defaultValue.toSeconds());
|
||||||
|
return TimeValue.ofSeconds(timeout);
|
||||||
|
} catch (final NumberFormatException ignore) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return defaultValue;
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.disableRedirectHandling()
|
||||||
|
.setDefaultRequestConfig(defaultRequestConfig)
|
||||||
|
.build();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public HttpClient get() {
|
||||||
|
return client;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@@ -0,0 +1,79 @@
|
|||||||
|
package nu.marginalia.ndp.io;
|
||||||
|
|
||||||
|
import org.apache.hc.client5.http.HttpHostConnectException;
|
||||||
|
import org.apache.hc.client5.http.HttpRequestRetryStrategy;
|
||||||
|
import org.apache.hc.core5.http.HttpRequest;
|
||||||
|
import org.apache.hc.core5.http.HttpResponse;
|
||||||
|
import org.apache.hc.core5.http.protocol.HttpContext;
|
||||||
|
import org.apache.hc.core5.util.TimeValue;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import javax.net.ssl.SSLException;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.net.SocketException;
|
||||||
|
import java.net.SocketTimeoutException;
|
||||||
|
import java.net.UnknownHostException;
|
||||||
|
|
||||||
|
public class RetryStrategy implements HttpRequestRetryStrategy {
|
||||||
|
private static final Logger logger = LoggerFactory.getLogger(RetryStrategy.class);
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean retryRequest(HttpRequest request, IOException exception, int executionCount, HttpContext context) {
|
||||||
|
return switch (exception) {
|
||||||
|
case SocketTimeoutException ste -> false;
|
||||||
|
case SSLException ssle -> false;
|
||||||
|
case UnknownHostException uhe -> false;
|
||||||
|
case HttpHostConnectException ex -> executionCount < 2;
|
||||||
|
case SocketException ex -> executionCount < 2;
|
||||||
|
default -> executionCount <= 3;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean retryRequest(HttpResponse response, int executionCount, HttpContext context) {
|
||||||
|
return switch (response.getCode()) {
|
||||||
|
case 500, 503 -> executionCount <= 2;
|
||||||
|
case 429 -> executionCount <= 3;
|
||||||
|
default -> false;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public TimeValue getRetryInterval(HttpRequest request, IOException exception, int executionCount, HttpContext context) {
|
||||||
|
return TimeValue.ofSeconds(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public TimeValue getRetryInterval(HttpResponse response, int executionCount, HttpContext context) {
|
||||||
|
|
||||||
|
int statusCode = response.getCode();
|
||||||
|
|
||||||
|
// Give 503 a bit more time
|
||||||
|
if (statusCode == 503) return TimeValue.ofSeconds(5);
|
||||||
|
|
||||||
|
if (statusCode == 429) {
|
||||||
|
// get the Retry-After header
|
||||||
|
var retryAfterHeader = response.getFirstHeader("Retry-After");
|
||||||
|
if (retryAfterHeader == null) {
|
||||||
|
return TimeValue.ofSeconds(3);
|
||||||
|
}
|
||||||
|
|
||||||
|
String retryAfter = retryAfterHeader.getValue();
|
||||||
|
if (retryAfter == null) {
|
||||||
|
return TimeValue.ofSeconds(2);
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
int retryAfterTime = Integer.parseInt(retryAfter);
|
||||||
|
retryAfterTime = Math.clamp(retryAfterTime, 1, 5);
|
||||||
|
|
||||||
|
return TimeValue.ofSeconds(retryAfterTime);
|
||||||
|
} catch (NumberFormatException e) {
|
||||||
|
logger.warn("Invalid Retry-After header: {}", retryAfter);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return TimeValue.ofSeconds(2);
|
||||||
|
}
|
||||||
|
}
|
@@ -0,0 +1,4 @@
|
|||||||
|
package nu.marginalia.ndp.model;
|
||||||
|
|
||||||
|
public record DomainToTest(String domainName, int domainId) {
|
||||||
|
}
|
@@ -0,0 +1,29 @@
|
|||||||
|
package nu.marginalia.ndp;
|
||||||
|
|
||||||
|
import nu.marginalia.coordination.LocalDomainCoordinator;
|
||||||
|
import org.junit.jupiter.api.Tag;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
|
import java.security.KeyManagementException;
|
||||||
|
import java.security.NoSuchAlgorithmException;
|
||||||
|
|
||||||
|
import static org.junit.jupiter.api.Assertions.assertFalse;
|
||||||
|
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||||
|
|
||||||
|
class DomainEvaluatorTest {
|
||||||
|
|
||||||
|
@Tag("flaky") // Exclude from CI runs due to potential network issues
|
||||||
|
@Test
|
||||||
|
public void testSunnyDay() throws NoSuchAlgorithmException, KeyManagementException {
|
||||||
|
DomainEvaluator evaluator = new DomainEvaluator(new LocalDomainCoordinator());
|
||||||
|
|
||||||
|
// Should be a valid domain
|
||||||
|
assertTrue(evaluator.evaluateDomain("www.marginalia.nu"));
|
||||||
|
|
||||||
|
// Should be a redirect to www.marginalia.nu
|
||||||
|
assertFalse(evaluator.evaluateDomain("memex.marginalia.nu"));
|
||||||
|
|
||||||
|
// Should fail on Anubis
|
||||||
|
assertFalse(evaluator.evaluateDomain("marginalia-search.com"));
|
||||||
|
}
|
||||||
|
}
|
12
code/processes/ping-process/README.md
Normal file
12
code/processes/ping-process/README.md
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
The ping process (which has nothing to do with ICMP ping) keeps track of
|
||||||
|
the aliveness of websites. It also gathers fingerprint information about
|
||||||
|
the security posture of the website, as well as DNS information.
|
||||||
|
|
||||||
|
This is kept to build an idea of when a website is down, and to identify
|
||||||
|
ownership changes, as well as other significant events in the lifecycle
|
||||||
|
of a website.
|
||||||
|
|
||||||
|
# Central Classes
|
||||||
|
|
||||||
|
* [PingMain](java/nu/marginalia/ping/PingMain.java) main class.
|
||||||
|
* [PingJobScheduler](java/nu/marginalia/ping/PingJobScheduler.java) service that dispatches pings.
|
@@ -18,13 +18,18 @@ import org.apache.hc.core5.http.HttpResponse;
|
|||||||
import org.apache.hc.core5.http.io.SocketConfig;
|
import org.apache.hc.core5.http.io.SocketConfig;
|
||||||
import org.apache.hc.core5.http.message.MessageSupport;
|
import org.apache.hc.core5.http.message.MessageSupport;
|
||||||
import org.apache.hc.core5.http.protocol.HttpContext;
|
import org.apache.hc.core5.http.protocol.HttpContext;
|
||||||
|
import org.apache.hc.core5.ssl.SSLContextBuilder;
|
||||||
import org.apache.hc.core5.util.TimeValue;
|
import org.apache.hc.core5.util.TimeValue;
|
||||||
import org.apache.hc.core5.util.Timeout;
|
import org.apache.hc.core5.util.Timeout;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import javax.net.ssl.SSLContext;
|
import javax.net.ssl.SSLContext;
|
||||||
|
import javax.net.ssl.TrustManager;
|
||||||
|
import javax.net.ssl.X509TrustManager;
|
||||||
|
import java.security.KeyManagementException;
|
||||||
import java.security.NoSuchAlgorithmException;
|
import java.security.NoSuchAlgorithmException;
|
||||||
|
import java.security.cert.X509Certificate;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
import java.util.concurrent.TimeUnit;
|
import java.util.concurrent.TimeUnit;
|
||||||
|
|
||||||
@@ -37,24 +42,55 @@ public class HttpClientProvider implements Provider<HttpClient> {
|
|||||||
static {
|
static {
|
||||||
try {
|
try {
|
||||||
client = createClient();
|
client = createClient();
|
||||||
} catch (NoSuchAlgorithmException e) {
|
} catch (Exception e) {
|
||||||
throw new RuntimeException(e);
|
throw new RuntimeException(e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static CloseableHttpClient createClient() throws NoSuchAlgorithmException {
|
private static CloseableHttpClient createClient() throws NoSuchAlgorithmException, KeyManagementException {
|
||||||
final ConnectionConfig connectionConfig = ConnectionConfig.custom()
|
final ConnectionConfig connectionConfig = ConnectionConfig.custom()
|
||||||
.setSocketTimeout(15, TimeUnit.SECONDS)
|
.setSocketTimeout(15, TimeUnit.SECONDS)
|
||||||
.setConnectTimeout(15, TimeUnit.SECONDS)
|
.setConnectTimeout(15, TimeUnit.SECONDS)
|
||||||
.setValidateAfterInactivity(TimeValue.ofSeconds(5))
|
.setValidateAfterInactivity(TimeValue.ofSeconds(5))
|
||||||
.build();
|
.build();
|
||||||
|
|
||||||
|
// No-op up front validation of server certificates.
|
||||||
|
//
|
||||||
|
// We will validate certificates later, after the connection is established
|
||||||
|
// as we want to store the certificate chain and validation
|
||||||
|
// outcome to the database.
|
||||||
|
|
||||||
|
var trustMeBro = new X509TrustManager() {
|
||||||
|
private X509Certificate[] lastServerCertChain;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void checkClientTrusted(X509Certificate[] chain, String authType) {
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void checkServerTrusted(X509Certificate[] chain, String authType) {
|
||||||
|
this.lastServerCertChain = chain.clone();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public X509Certificate[] getAcceptedIssuers() {
|
||||||
|
return new X509Certificate[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
public X509Certificate[] getLastServerCertChain() {
|
||||||
|
return lastServerCertChain != null ? lastServerCertChain.clone() : null;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
SSLContext sslContext = SSLContextBuilder.create().build();
|
||||||
|
sslContext.init(null, new TrustManager[]{trustMeBro}, null);
|
||||||
|
|
||||||
connectionManager = PoolingHttpClientConnectionManagerBuilder.create()
|
connectionManager = PoolingHttpClientConnectionManagerBuilder.create()
|
||||||
.setMaxConnPerRoute(2)
|
.setMaxConnPerRoute(2)
|
||||||
.setMaxConnTotal(50)
|
.setMaxConnTotal(50)
|
||||||
.setDefaultConnectionConfig(connectionConfig)
|
.setDefaultConnectionConfig(connectionConfig)
|
||||||
.setTlsSocketStrategy(
|
.setTlsSocketStrategy(
|
||||||
new DefaultClientTlsStrategy(SSLContext.getDefault(), NoopHostnameVerifier.INSTANCE))
|
new DefaultClientTlsStrategy(sslContext, NoopHostnameVerifier.INSTANCE))
|
||||||
.build();
|
.build();
|
||||||
|
|
||||||
connectionManager.setDefaultSocketConfig(SocketConfig.custom()
|
connectionManager.setDefaultSocketConfig(SocketConfig.custom()
|
||||||
@@ -76,7 +112,7 @@ public class HttpClientProvider implements Provider<HttpClient> {
|
|||||||
});
|
});
|
||||||
|
|
||||||
final RequestConfig defaultRequestConfig = RequestConfig.custom()
|
final RequestConfig defaultRequestConfig = RequestConfig.custom()
|
||||||
.setCookieSpec(StandardCookieSpec.RELAXED)
|
.setCookieSpec(StandardCookieSpec.IGNORE)
|
||||||
.setResponseTimeout(10, TimeUnit.SECONDS)
|
.setResponseTimeout(10, TimeUnit.SECONDS)
|
||||||
.setConnectionRequestTimeout(5, TimeUnit.MINUTES)
|
.setConnectionRequestTimeout(5, TimeUnit.MINUTES)
|
||||||
.build();
|
.build();
|
||||||
|
@@ -11,6 +11,7 @@ import org.slf4j.LoggerFactory;
|
|||||||
|
|
||||||
import javax.net.ssl.SSLException;
|
import javax.net.ssl.SSLException;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.net.SocketException;
|
||||||
import java.net.SocketTimeoutException;
|
import java.net.SocketTimeoutException;
|
||||||
import java.net.UnknownHostException;
|
import java.net.UnknownHostException;
|
||||||
|
|
||||||
@@ -23,7 +24,8 @@ public class RetryStrategy implements HttpRequestRetryStrategy {
|
|||||||
case SocketTimeoutException ste -> false;
|
case SocketTimeoutException ste -> false;
|
||||||
case SSLException ssle -> false;
|
case SSLException ssle -> false;
|
||||||
case UnknownHostException uhe -> false;
|
case UnknownHostException uhe -> false;
|
||||||
case HttpHostConnectException ex -> executionCount <= 2; // Only retry once for connection errors
|
case HttpHostConnectException ex -> executionCount < 2;
|
||||||
|
case SocketException ex -> executionCount < 2;
|
||||||
default -> executionCount <= 3;
|
default -> executionCount <= 3;
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
@@ -1,6 +1,7 @@
|
|||||||
package nu.marginalia.ping.model;
|
package nu.marginalia.ping.model;
|
||||||
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
import org.jetbrains.annotations.NotNull;
|
||||||
|
|
||||||
import javax.annotation.Nullable;
|
import javax.annotation.Nullable;
|
||||||
import java.sql.Connection;
|
import java.sql.Connection;
|
||||||
@@ -42,7 +43,10 @@ public record DomainSecurityRecord(
|
|||||||
@Nullable String headerXXssProtection,
|
@Nullable String headerXXssProtection,
|
||||||
@Nullable String headerServer,
|
@Nullable String headerServer,
|
||||||
@Nullable String headerXPoweredBy,
|
@Nullable String headerXPoweredBy,
|
||||||
@Nullable Instant tsLastUpdate
|
@Nullable Instant tsLastUpdate,
|
||||||
|
@Nullable Boolean sslChainValid,
|
||||||
|
@Nullable Boolean sslHostValid,
|
||||||
|
@Nullable Boolean sslDateValid
|
||||||
)
|
)
|
||||||
implements WritableModel
|
implements WritableModel
|
||||||
{
|
{
|
||||||
@@ -102,7 +106,11 @@ public record DomainSecurityRecord(
|
|||||||
rs.getString("DOMAIN_SECURITY_INFORMATION.HEADER_X_XSS_PROTECTION"),
|
rs.getString("DOMAIN_SECURITY_INFORMATION.HEADER_X_XSS_PROTECTION"),
|
||||||
rs.getString("DOMAIN_SECURITY_INFORMATION.HEADER_SERVER"),
|
rs.getString("DOMAIN_SECURITY_INFORMATION.HEADER_SERVER"),
|
||||||
rs.getString("DOMAIN_SECURITY_INFORMATION.HEADER_X_POWERED_BY"),
|
rs.getString("DOMAIN_SECURITY_INFORMATION.HEADER_X_POWERED_BY"),
|
||||||
rs.getObject("DOMAIN_SECURITY_INFORMATION.TS_LAST_UPDATE", Instant.class));
|
rs.getObject("DOMAIN_SECURITY_INFORMATION.TS_LAST_UPDATE", Instant.class),
|
||||||
|
rs.getObject("SSL_CHAIN_VALID", Boolean.class),
|
||||||
|
rs.getObject("SSL_HOST_VALID", Boolean.class),
|
||||||
|
rs.getObject("SSL_DATE_VALID", Boolean.class)
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static HttpSchema httpSchemaFromString(@Nullable String schema) {
|
private static HttpSchema httpSchemaFromString(@Nullable String schema) {
|
||||||
@@ -149,8 +157,11 @@ public record DomainSecurityRecord(
|
|||||||
header_x_powered_by,
|
header_x_powered_by,
|
||||||
ssl_cert_public_key_hash,
|
ssl_cert_public_key_hash,
|
||||||
asn,
|
asn,
|
||||||
ts_last_update)
|
ts_last_update,
|
||||||
VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)
|
ssl_chain_valid,
|
||||||
|
ssl_host_valid,
|
||||||
|
ssl_date_valid)
|
||||||
|
VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)
|
||||||
"""))
|
"""))
|
||||||
{
|
{
|
||||||
ps.setInt(1, domainId());
|
ps.setInt(1, domainId());
|
||||||
@@ -294,6 +305,25 @@ public record DomainSecurityRecord(
|
|||||||
} else {
|
} else {
|
||||||
ps.setTimestamp(32, java.sql.Timestamp.from(tsLastUpdate()));
|
ps.setTimestamp(32, java.sql.Timestamp.from(tsLastUpdate()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (sslChainValid() == null) {
|
||||||
|
ps.setNull(33, java.sql.Types.BOOLEAN);
|
||||||
|
} else {
|
||||||
|
ps.setBoolean(33, sslChainValid());
|
||||||
|
}
|
||||||
|
|
||||||
|
if (sslHostValid() == null) {
|
||||||
|
ps.setNull(34, java.sql.Types.BOOLEAN);
|
||||||
|
} else {
|
||||||
|
ps.setBoolean(34, sslHostValid());
|
||||||
|
}
|
||||||
|
|
||||||
|
if (sslDateValid() == null) {
|
||||||
|
ps.setNull(35, java.sql.Types.BOOLEAN);
|
||||||
|
} else {
|
||||||
|
ps.setBoolean(35, sslDateValid());
|
||||||
|
}
|
||||||
|
|
||||||
ps.executeUpdate();
|
ps.executeUpdate();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -332,6 +362,13 @@ public record DomainSecurityRecord(
|
|||||||
private String headerXPoweredBy;
|
private String headerXPoweredBy;
|
||||||
private Instant tsLastUpdate;
|
private Instant tsLastUpdate;
|
||||||
|
|
||||||
|
private Boolean isCertChainValid;
|
||||||
|
private Boolean isCertHostValid;
|
||||||
|
private Boolean isCertDateValid;
|
||||||
|
|
||||||
|
|
||||||
|
private static Instant MAX_UNIX_TIMESTAMP = Instant.ofEpochSecond(Integer.MAX_VALUE);
|
||||||
|
|
||||||
public Builder() {
|
public Builder() {
|
||||||
// Default values for boolean fields
|
// Default values for boolean fields
|
||||||
this.sslCertWildcard = false;
|
this.sslCertWildcard = false;
|
||||||
@@ -374,12 +411,18 @@ public record DomainSecurityRecord(
|
|||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
public Builder sslCertNotBefore(Instant sslCertNotBefore) {
|
public Builder sslCertNotBefore(@NotNull Instant sslCertNotBefore) {
|
||||||
|
if (sslCertNotBefore.isAfter(MAX_UNIX_TIMESTAMP)) {
|
||||||
|
sslCertNotBefore = MAX_UNIX_TIMESTAMP;
|
||||||
|
}
|
||||||
this.sslCertNotBefore = sslCertNotBefore;
|
this.sslCertNotBefore = sslCertNotBefore;
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
public Builder sslCertNotAfter(Instant sslCertNotAfter) {
|
public Builder sslCertNotAfter(@NotNull Instant sslCertNotAfter) {
|
||||||
|
if (sslCertNotAfter.isAfter(MAX_UNIX_TIMESTAMP)) {
|
||||||
|
sslCertNotAfter = MAX_UNIX_TIMESTAMP;
|
||||||
|
}
|
||||||
this.sslCertNotAfter = sslCertNotAfter;
|
this.sslCertNotAfter = sslCertNotAfter;
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
@@ -499,6 +542,21 @@ public record DomainSecurityRecord(
|
|||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public Builder sslChainValid(@Nullable Boolean isCertChainValid) {
|
||||||
|
this.isCertChainValid = isCertChainValid;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder sslHostValid(@Nullable Boolean isCertHostValid) {
|
||||||
|
this.isCertHostValid = isCertHostValid;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder sslDateValid(@Nullable Boolean isCertDateValid) {
|
||||||
|
this.isCertDateValid = isCertDateValid;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
public DomainSecurityRecord build() {
|
public DomainSecurityRecord build() {
|
||||||
return new DomainSecurityRecord(
|
return new DomainSecurityRecord(
|
||||||
domainId,
|
domainId,
|
||||||
@@ -532,7 +590,10 @@ public record DomainSecurityRecord(
|
|||||||
headerXXssProtection,
|
headerXXssProtection,
|
||||||
headerServer,
|
headerServer,
|
||||||
headerXPoweredBy,
|
headerXPoweredBy,
|
||||||
tsLastUpdate
|
tsLastUpdate,
|
||||||
|
isCertChainValid,
|
||||||
|
isCertHostValid,
|
||||||
|
isCertDateValid
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -0,0 +1,59 @@
|
|||||||
|
package nu.marginalia.ping.ssl;
|
||||||
|
|
||||||
|
import org.bouncycastle.asn1.ASN1OctetString;
|
||||||
|
import org.bouncycastle.asn1.ASN1Primitive;
|
||||||
|
import org.bouncycastle.asn1.x509.*;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import java.security.cert.X509Certificate;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
public class AIAExtractor {
|
||||||
|
|
||||||
|
private static final Logger logger = LoggerFactory.getLogger(AIAExtractor.class);
|
||||||
|
|
||||||
|
public static List<String> getCaIssuerUrls(X509Certificate certificate) {
|
||||||
|
List<String> caIssuerUrls = new ArrayList<>();
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Get the AIA extension value
|
||||||
|
byte[] aiaExtensionValue = certificate.getExtensionValue(Extension.authorityInfoAccess.getId());
|
||||||
|
if (aiaExtensionValue == null) {
|
||||||
|
logger.warn("No AIA extension found");
|
||||||
|
return caIssuerUrls;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parse the extension - first unwrap the OCTET STRING
|
||||||
|
ASN1OctetString octetString = ASN1OctetString.getInstance(aiaExtensionValue);
|
||||||
|
ASN1Primitive aiaObj = ASN1Primitive.fromByteArray(octetString.getOctets());
|
||||||
|
|
||||||
|
// Parse as AuthorityInformationAccess
|
||||||
|
AuthorityInformationAccess aia = AuthorityInformationAccess.getInstance(aiaObj);
|
||||||
|
|
||||||
|
if (aia != null) {
|
||||||
|
AccessDescription[] accessDescriptions = aia.getAccessDescriptions();
|
||||||
|
|
||||||
|
for (AccessDescription accessDesc : accessDescriptions) {
|
||||||
|
// Check if this is a CA Issuers access method
|
||||||
|
if (X509ObjectIdentifiers.id_ad_caIssuers.equals(accessDesc.getAccessMethod())) {
|
||||||
|
GeneralName accessLocation = accessDesc.getAccessLocation();
|
||||||
|
|
||||||
|
// Check if it's a URI
|
||||||
|
if (accessLocation.getTagNo() == GeneralName.uniformResourceIdentifier) {
|
||||||
|
String url = accessLocation.getName().toString();
|
||||||
|
caIssuerUrls.add(url);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} catch (Exception e) {
|
||||||
|
logger.error("Error parsing AIA extension: {}", e.getMessage());
|
||||||
|
}
|
||||||
|
|
||||||
|
return caIssuerUrls;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@@ -0,0 +1,273 @@
|
|||||||
|
package nu.marginalia.ping.ssl;
|
||||||
|
|
||||||
|
import com.google.common.cache.Cache;
|
||||||
|
import com.google.common.cache.CacheBuilder;
|
||||||
|
import nu.marginalia.WmsaHome;
|
||||||
|
import org.apache.hc.client5.http.classic.HttpClient;
|
||||||
|
import org.apache.hc.client5.http.impl.classic.HttpClientBuilder;
|
||||||
|
import org.apache.hc.core5.http.ClassicHttpRequest;
|
||||||
|
import org.apache.hc.core5.http.io.support.ClassicRequestBuilder;
|
||||||
|
import org.bouncycastle.asn1.ASN1OctetString;
|
||||||
|
import org.bouncycastle.asn1.ASN1Primitive;
|
||||||
|
import org.bouncycastle.asn1.x509.*;
|
||||||
|
import org.bouncycastle.cert.X509CertificateHolder;
|
||||||
|
import org.bouncycastle.cert.jcajce.JcaX509CertificateConverter;
|
||||||
|
import org.bouncycastle.cms.CMSSignedData;
|
||||||
|
import org.bouncycastle.openssl.PEMParser;
|
||||||
|
import org.bouncycastle.util.Store;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import java.io.ByteArrayInputStream;
|
||||||
|
import java.io.StringReader;
|
||||||
|
import java.nio.charset.StandardCharsets;
|
||||||
|
import java.security.cert.CertificateFactory;
|
||||||
|
import java.security.cert.TrustAnchor;
|
||||||
|
import java.security.cert.X509Certificate;
|
||||||
|
import java.time.Duration;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
public class CertificateFetcher {
|
||||||
|
|
||||||
|
private static final Logger logger = LoggerFactory.getLogger(CertificateFetcher.class);
|
||||||
|
|
||||||
|
private static HttpClient client = HttpClientBuilder.create()
|
||||||
|
.build();
|
||||||
|
|
||||||
|
private static Cache<String, X509Certificate> cache = CacheBuilder
|
||||||
|
.newBuilder()
|
||||||
|
.expireAfterAccess(Duration.ofHours(6))
|
||||||
|
.maximumSize(10_000)
|
||||||
|
.build();
|
||||||
|
|
||||||
|
|
||||||
|
public static List<X509Certificate> fetchMissingIntermediates(X509Certificate leafCert) {
|
||||||
|
List<X509Certificate> intermediates = new ArrayList<>();
|
||||||
|
|
||||||
|
// Get CA Issuer URLs from AIA extension
|
||||||
|
List<String> caIssuerUrls = AIAExtractor.getCaIssuerUrls(leafCert);
|
||||||
|
|
||||||
|
for (String url : caIssuerUrls) {
|
||||||
|
try {
|
||||||
|
// Check cache first
|
||||||
|
X509Certificate cached = cache.getIfPresent(url);
|
||||||
|
if (cached != null) {
|
||||||
|
intermediates.add(cached);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Download certificate
|
||||||
|
X509Certificate downloaded = downloadCertificate(url);
|
||||||
|
if (downloaded != null) {
|
||||||
|
// Verify this certificate can actually sign the leaf
|
||||||
|
if (canSign(downloaded, leafCert)) {
|
||||||
|
intermediates.add(downloaded);
|
||||||
|
cache.put(url, downloaded);
|
||||||
|
logger.info("Downloaded certificate for url: {}", url);
|
||||||
|
} else {
|
||||||
|
logger.warn("Downloaded certificate cannot sign leaf cert from: {}", url);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} catch (Exception e) {
|
||||||
|
logger.error("Failed to fetch certificate from {}: {}", url, e.getMessage());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return intermediates;
|
||||||
|
}
|
||||||
|
private static X509Certificate downloadCertificate(String urlString) {
|
||||||
|
try {
|
||||||
|
ClassicHttpRequest request = ClassicRequestBuilder.create("GET")
|
||||||
|
.addHeader("User-Agent", WmsaHome.getUserAgent() + " (Certificate Fetcher)")
|
||||||
|
.setUri(urlString)
|
||||||
|
.build();
|
||||||
|
|
||||||
|
byte[] data = client.execute(request, rsp -> {
|
||||||
|
var entity = rsp.getEntity();
|
||||||
|
if (entity == null) {
|
||||||
|
logger.warn("GET request returned no content for {}", urlString);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
return entity.getContent().readAllBytes();
|
||||||
|
});
|
||||||
|
|
||||||
|
if (data.length == 0) {
|
||||||
|
logger.warn("Empty response from {}", urlString);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try different formats based on file extension
|
||||||
|
if (urlString.toLowerCase().endsWith(".p7c") || urlString.toLowerCase().endsWith(".p7b")) {
|
||||||
|
return parsePKCS7(data);
|
||||||
|
} else {
|
||||||
|
return parseX509(data);
|
||||||
|
}
|
||||||
|
|
||||||
|
} catch (Exception e) {
|
||||||
|
logger.warn("Failed to fetch certificate from {}: {}", urlString, e.getMessage());
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static List<X509Certificate> parseMultiplePEM(byte[] data) throws Exception {
|
||||||
|
List<X509Certificate> certificates = new ArrayList<>();
|
||||||
|
|
||||||
|
try (StringReader stringReader = new StringReader(new String(data, StandardCharsets.UTF_8));
|
||||||
|
PEMParser pemParser = new PEMParser(stringReader)) {
|
||||||
|
|
||||||
|
JcaX509CertificateConverter converter = new JcaX509CertificateConverter();
|
||||||
|
Object object;
|
||||||
|
|
||||||
|
while ((object = pemParser.readObject()) != null) {
|
||||||
|
if (object instanceof X509CertificateHolder) {
|
||||||
|
X509CertificateHolder certHolder = (X509CertificateHolder) object;
|
||||||
|
certificates.add(converter.getCertificate(certHolder));
|
||||||
|
} else if (object instanceof X509Certificate) {
|
||||||
|
certificates.add((X509Certificate) object);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return certificates;
|
||||||
|
}
|
||||||
|
private static X509Certificate parseX509(byte[] data) throws Exception {
|
||||||
|
CertificateFactory cf = CertificateFactory.getInstance("X.509");
|
||||||
|
return (X509Certificate) cf.generateCertificate(new ByteArrayInputStream(data));
|
||||||
|
}
|
||||||
|
|
||||||
|
private static X509Certificate parsePKCS7(byte[] data) throws Exception {
|
||||||
|
try {
|
||||||
|
// Parse PKCS#7/CMS structure
|
||||||
|
CMSSignedData cmsData = new CMSSignedData(data);
|
||||||
|
Store<X509CertificateHolder> certStore = cmsData.getCertificates();
|
||||||
|
|
||||||
|
JcaX509CertificateConverter converter = new JcaX509CertificateConverter();
|
||||||
|
|
||||||
|
// Get the first certificate from the store
|
||||||
|
for (X509CertificateHolder certHolder : certStore.getMatches(null)) {
|
||||||
|
X509Certificate cert = converter.getCertificate(certHolder);
|
||||||
|
return cert;
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.warn("No certificates found in PKCS#7 structure");
|
||||||
|
return null;
|
||||||
|
|
||||||
|
} catch (Exception e) {
|
||||||
|
logger.error("Failed to parse PKCS#7 structure from {}: {}", data.length, e.getMessage());
|
||||||
|
return parseX509(data);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static boolean canSign(X509Certificate issuerCert, X509Certificate subjectCert) {
|
||||||
|
try {
|
||||||
|
// Check if the issuer DN matches
|
||||||
|
if (!issuerCert.getSubjectDN().equals(subjectCert.getIssuerDN())) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try to verify the signature
|
||||||
|
subjectCert.verify(issuerCert.getPublicKey());
|
||||||
|
return true;
|
||||||
|
|
||||||
|
} catch (Exception e) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Recursive fetching for complete chains
|
||||||
|
public static List<X509Certificate> buildCompleteChain(X509Certificate leafCert) {
|
||||||
|
List<X509Certificate> completeChain = new ArrayList<>();
|
||||||
|
completeChain.add(leafCert);
|
||||||
|
|
||||||
|
X509Certificate currentCert = leafCert;
|
||||||
|
int maxDepth = 10; // Prevent infinite loops
|
||||||
|
|
||||||
|
while (maxDepth-- > 0) {
|
||||||
|
// If current cert is self-signed (root), we're done
|
||||||
|
if (currentCert.getSubjectDN().equals(currentCert.getIssuerDN())) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try to find the issuer
|
||||||
|
List<X509Certificate> intermediates = fetchMissingIntermediates(currentCert);
|
||||||
|
if (intermediates.isEmpty()) {
|
||||||
|
logger.error("Could not find issuer for: {}", currentCert.getSubjectDN());
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add the first valid intermediate
|
||||||
|
X509Certificate intermediate = intermediates.get(0);
|
||||||
|
completeChain.add(intermediate);
|
||||||
|
currentCert = intermediate;
|
||||||
|
}
|
||||||
|
|
||||||
|
return completeChain;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add this to your AIAExtractor class if not already present
|
||||||
|
public static List<String> getOCSPUrls(X509Certificate certificate) {
|
||||||
|
List<String> ocspUrls = new ArrayList<>();
|
||||||
|
|
||||||
|
try {
|
||||||
|
byte[] aiaExtensionValue = certificate.getExtensionValue(Extension.authorityInfoAccess.getId());
|
||||||
|
if (aiaExtensionValue == null) {
|
||||||
|
return ocspUrls;
|
||||||
|
}
|
||||||
|
|
||||||
|
ASN1OctetString octetString = ASN1OctetString.getInstance(aiaExtensionValue);
|
||||||
|
ASN1Primitive aiaObj = ASN1Primitive.fromByteArray(octetString.getOctets());
|
||||||
|
AuthorityInformationAccess aia = AuthorityInformationAccess.getInstance(aiaObj);
|
||||||
|
|
||||||
|
if (aia != null) {
|
||||||
|
AccessDescription[] accessDescriptions = aia.getAccessDescriptions();
|
||||||
|
|
||||||
|
for (AccessDescription accessDesc : accessDescriptions) {
|
||||||
|
if (X509ObjectIdentifiers.id_ad_ocsp.equals(accessDesc.getAccessMethod())) {
|
||||||
|
GeneralName accessLocation = accessDesc.getAccessLocation();
|
||||||
|
if (accessLocation.getTagNo() == GeneralName.uniformResourceIdentifier) {
|
||||||
|
String url = accessLocation.getName().toString();
|
||||||
|
ocspUrls.add(url);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} catch (Exception e) {
|
||||||
|
logger.error("Error parsing AIA extension for OCSP: {}", e.getMessage());
|
||||||
|
}
|
||||||
|
|
||||||
|
return ocspUrls;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Set<TrustAnchor> getRootCerts(String bundleUrl) throws Exception {
|
||||||
|
ClassicHttpRequest request = ClassicRequestBuilder.create("GET")
|
||||||
|
.addHeader("User-Agent", WmsaHome.getUserAgent() + " (Certificate Fetcher)")
|
||||||
|
.setUri(bundleUrl)
|
||||||
|
.build();
|
||||||
|
|
||||||
|
byte[] data = client.execute(request, rsp -> {
|
||||||
|
var entity = rsp.getEntity();
|
||||||
|
if (entity == null) {
|
||||||
|
logger.warn("GET request returned no content for {}", bundleUrl);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
return entity.getContent().readAllBytes();
|
||||||
|
});
|
||||||
|
|
||||||
|
List<TrustAnchor> anchors = new ArrayList<>();
|
||||||
|
for (var cert : parseMultiplePEM(data)) {
|
||||||
|
try {
|
||||||
|
anchors.add(new TrustAnchor(cert, null));
|
||||||
|
} catch (Exception e) {
|
||||||
|
logger.warn("Failed to create TrustAnchor for certificate: {}", e.getMessage());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.info("Loaded {} root certificates from {}", anchors.size(), bundleUrl);
|
||||||
|
|
||||||
|
return Set.copyOf(anchors);
|
||||||
|
}
|
||||||
|
}
|
@@ -0,0 +1,493 @@
|
|||||||
|
package nu.marginalia.ping.ssl;
|
||||||
|
|
||||||
|
import org.bouncycastle.asn1.ASN1OctetString;
|
||||||
|
import org.bouncycastle.asn1.ASN1Primitive;
|
||||||
|
import org.bouncycastle.asn1.x509.*;
|
||||||
|
|
||||||
|
import javax.security.auth.x500.X500Principal;
|
||||||
|
import java.security.cert.TrustAnchor;
|
||||||
|
import java.security.cert.X509Certificate;
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
|
/** Utility class for validating X.509 certificates.
|
||||||
|
* This class provides methods to validate certificate chains, check expiration,
|
||||||
|
* hostname validity, and revocation status.
|
||||||
|
* <p></p>
|
||||||
|
* This is extremely unsuitable for actual SSL/TLS validation,
|
||||||
|
* and is only to be used in analyzing certificates for fingerprinting
|
||||||
|
* and diagnosing servers!
|
||||||
|
*/
|
||||||
|
public class CertificateValidator {
|
||||||
|
// If true, will attempt to fetch missing intermediate certificates via AIA urls.
|
||||||
|
private static final boolean TRY_FETCH_MISSING_CERTS = false;
|
||||||
|
|
||||||
|
public static class ValidationResult {
|
||||||
|
public boolean chainValid = false;
|
||||||
|
public boolean certificateExpired = false;
|
||||||
|
public boolean certificateRevoked = false;
|
||||||
|
public boolean selfSigned = false;
|
||||||
|
public boolean hostnameValid = false;
|
||||||
|
|
||||||
|
public boolean isValid() {
|
||||||
|
return !selfSigned && !certificateExpired && !certificateRevoked && hostnameValid;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<String> errors = new ArrayList<>();
|
||||||
|
public List<String> warnings = new ArrayList<>();
|
||||||
|
public Map<String, Object> details = new HashMap<>();
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
StringBuilder sb = new StringBuilder();
|
||||||
|
sb.append("=== Certificate Validation Result ===\n");
|
||||||
|
sb.append("Chain Valid: ").append(chainValid ? "✓" : "✗").append("\n");
|
||||||
|
sb.append("Not Expired: ").append(!certificateExpired ? "✓" : "✗").append("\n");
|
||||||
|
sb.append("Not Revoked: ").append(!certificateRevoked ? "✓" : "✗").append("\n");
|
||||||
|
sb.append("Hostname Valid: ").append(hostnameValid ? "✓" : "✗").append("\n");
|
||||||
|
sb.append("Self-Signed: ").append(selfSigned ? "✓" : "✗").append("\n");
|
||||||
|
|
||||||
|
if (!errors.isEmpty()) {
|
||||||
|
sb.append("\nErrors:\n");
|
||||||
|
for (String error : errors) {
|
||||||
|
sb.append(" ✗ ").append(error).append("\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!warnings.isEmpty()) {
|
||||||
|
sb.append("\nWarnings:\n");
|
||||||
|
for (String warning : warnings) {
|
||||||
|
sb.append(" ⚠ ").append(warning).append("\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!details.isEmpty()) {
|
||||||
|
sb.append("\nDetails:\n");
|
||||||
|
for (Map.Entry<String, Object> entry : details.entrySet()) {
|
||||||
|
sb.append(" ").append(entry.getKey()).append(": ").append(entry.getValue()).append("\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return sb.toString();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static ValidationResult validateCertificate(X509Certificate[] certChain,
|
||||||
|
String hostname) {
|
||||||
|
return validateCertificate(certChain, hostname, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static ValidationResult validateCertificate(X509Certificate[] certChain,
|
||||||
|
String hostname,
|
||||||
|
boolean autoTrustFetchedRoots) {
|
||||||
|
ValidationResult result = new ValidationResult();
|
||||||
|
|
||||||
|
if (certChain == null || certChain.length == 0) {
|
||||||
|
result.errors.add("No certificates provided");
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
X509Certificate leafCert = certChain[0];
|
||||||
|
|
||||||
|
// 1. Check certificate expiration
|
||||||
|
result.certificateExpired = checkExpiration(leafCert, result);
|
||||||
|
|
||||||
|
// 2. Check hostname validity
|
||||||
|
result.hostnameValid = checkHostname(leafCert, hostname, result);
|
||||||
|
|
||||||
|
// 3. Not really checking if it's self-signed, but if the chain is incomplete (and likely self-signed)
|
||||||
|
result.selfSigned = certChain.length <= 1;
|
||||||
|
|
||||||
|
// 4. Check certificate chain validity (optionally with AIA fetching)
|
||||||
|
result.chainValid = checkChainValidity(certChain, RootCerts.getTrustAnchors(), result, autoTrustFetchedRoots);
|
||||||
|
|
||||||
|
// 5. Check revocation status
|
||||||
|
result.certificateRevoked = false; // not implemented
|
||||||
|
// checkRevocation(leafCert, result);
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static boolean checkExpiration(X509Certificate cert, ValidationResult result) {
|
||||||
|
try {
|
||||||
|
cert.checkValidity();
|
||||||
|
result.details.put("validFrom", cert.getNotBefore());
|
||||||
|
result.details.put("validTo", cert.getNotAfter());
|
||||||
|
|
||||||
|
// Warn if expires soon (30 days)
|
||||||
|
long daysUntilExpiry = (cert.getNotAfter().getTime() - System.currentTimeMillis()) / (1000 * 60 * 60 * 24);
|
||||||
|
if (daysUntilExpiry < 30) {
|
||||||
|
result.warnings.add("Certificate expires in " + daysUntilExpiry + " days");
|
||||||
|
}
|
||||||
|
|
||||||
|
return false; // Not expired
|
||||||
|
} catch (Exception e) {
|
||||||
|
result.errors.add("Certificate expired or not yet valid: " + e.getMessage());
|
||||||
|
return true; // Expired
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static boolean checkHostname(X509Certificate cert, String hostname, ValidationResult result) {
|
||||||
|
if (hostname == null || hostname.isEmpty()) {
|
||||||
|
result.warnings.add("No hostname provided for validation");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Check Subject CN
|
||||||
|
String subjectCN = getCommonName(cert.getSubjectX500Principal());
|
||||||
|
if (subjectCN != null && matchesHostname(subjectCN, hostname)) {
|
||||||
|
result.details.put("hostnameMatchedBy", "Subject CN: " + subjectCN);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check Subject Alternative Names
|
||||||
|
Collection<List<?>> subjectAltNames = cert.getSubjectAlternativeNames();
|
||||||
|
if (subjectAltNames != null) {
|
||||||
|
for (List<?> altName : subjectAltNames) {
|
||||||
|
if (altName.size() >= 2) {
|
||||||
|
Integer type = (Integer) altName.get(0);
|
||||||
|
if (type == 2) { // DNS name
|
||||||
|
String dnsName = (String) altName.get(1);
|
||||||
|
if (matchesHostname(dnsName, hostname)) {
|
||||||
|
result.details.put("hostnameMatchedBy", "SAN DNS: " + dnsName);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
result.errors.add("Hostname '" + hostname + "' does not match certificate");
|
||||||
|
result.details.put("subjectCN", subjectCN);
|
||||||
|
result.details.put("subjectAltNames", subjectAltNames);
|
||||||
|
return false;
|
||||||
|
|
||||||
|
} catch (Exception e) {
|
||||||
|
result.errors.add("Error checking hostname: " + e.getMessage());
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static boolean checkChainValidity(X509Certificate[] originalChain,
|
||||||
|
Set<TrustAnchor> trustAnchors,
|
||||||
|
ValidationResult result,
|
||||||
|
boolean autoTrustFetchedRoots) {
|
||||||
|
try {
|
||||||
|
// First try with the original chain
|
||||||
|
ChainValidationResult originalResult = validateChain(originalChain, trustAnchors);
|
||||||
|
|
||||||
|
if (originalResult.isValid) {
|
||||||
|
result.details.put("chainLength", originalChain.length);
|
||||||
|
result.details.put("chainExtended", false);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
else if (!TRY_FETCH_MISSING_CERTS) {
|
||||||
|
result.errors.addAll(originalResult.issues);
|
||||||
|
result.details.put("chainLength", originalChain.length);
|
||||||
|
result.details.put("chainExtended", false);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
List<X509Certificate> repairedChain = CertificateFetcher.buildCompleteChain(originalChain[0]);
|
||||||
|
|
||||||
|
if (!repairedChain.isEmpty()) {
|
||||||
|
|
||||||
|
X509Certificate[] extendedArray = repairedChain.toArray(new X509Certificate[0]);
|
||||||
|
|
||||||
|
// Create a copy of trust anchors for potential modification
|
||||||
|
Set<TrustAnchor> workingTrustAnchors = new HashSet<>(trustAnchors);
|
||||||
|
|
||||||
|
// If auto-trust is enabled, add any self-signed certs as trusted roots
|
||||||
|
if (autoTrustFetchedRoots) {
|
||||||
|
for (X509Certificate cert : extendedArray) {
|
||||||
|
if (cert.getSubjectX500Principal().equals(cert.getIssuerX500Principal())) {
|
||||||
|
// Self-signed certificate - add to trust anchors if not already there
|
||||||
|
boolean alreadyTrusted = false;
|
||||||
|
for (TrustAnchor anchor : workingTrustAnchors) {
|
||||||
|
if (anchor.getTrustedCert().equals(cert)) {
|
||||||
|
alreadyTrusted = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!alreadyTrusted) {
|
||||||
|
workingTrustAnchors.add(new TrustAnchor(cert, null));
|
||||||
|
result.warnings.add("Auto-trusted fetched root: " + cert.getSubjectX500Principal().getName());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ChainValidationResult extendedResult = validateChain(extendedArray, workingTrustAnchors);
|
||||||
|
|
||||||
|
result.details.put("chainLength", extendedArray.length);
|
||||||
|
result.details.put("originalChainLength", originalChain.length);
|
||||||
|
result.details.put("chainExtended", true);
|
||||||
|
result.details.put("fetchedIntermediates", extendedArray.length);
|
||||||
|
result.details.put("autoTrustedRoots", autoTrustFetchedRoots);
|
||||||
|
|
||||||
|
if (extendedResult.isValid) {
|
||||||
|
result.warnings.add("Extended certificate chain with " + extendedArray.length + " fetched intermediates");
|
||||||
|
return true;
|
||||||
|
} else {
|
||||||
|
result.errors.addAll(extendedResult.issues);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
result.warnings.add("Could not fetch missing intermediate certificates");
|
||||||
|
result.details.put("chainLength", originalChain.length);
|
||||||
|
result.details.put("chainExtended", false);
|
||||||
|
result.errors.addAll(originalResult.issues);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
} catch (Exception e) {
|
||||||
|
result.warnings.add("Failed to fetch intermediates: " + e.getMessage());
|
||||||
|
result.details.put("chainLength", originalChain.length);
|
||||||
|
result.details.put("chainExtended", false);
|
||||||
|
result.errors.addAll(originalResult.issues);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
} catch (Exception e) {
|
||||||
|
result.errors.add("Error validating chain: " + e.getMessage());
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private static void debugCertificateChain(List<X509Certificate> certs, Set<TrustAnchor> trustAnchors) {
|
||||||
|
System.out.println("=== Certificate Chain Analysis ===");
|
||||||
|
|
||||||
|
int length = certs.size();
|
||||||
|
System.out.println("Chain length: " + length);
|
||||||
|
|
||||||
|
int i = 0;
|
||||||
|
for (var x509cert : certs) {
|
||||||
|
System.out.println("\nCertificate " + i++ + ":");
|
||||||
|
System.out.println(" Subject: " + x509cert.getSubjectDN().getName());
|
||||||
|
System.out.println(" Issuer: " + x509cert.getIssuerDN().getName());
|
||||||
|
System.out.println(" Serial: " + x509cert.getSerialNumber().toString(16));
|
||||||
|
System.out.println(" Valid: " + x509cert.getNotBefore() + " to " + x509cert.getNotAfter());
|
||||||
|
System.out.println(" Self-signed: " + x509cert.getSubjectDN().equals(x509cert.getIssuerDN()));
|
||||||
|
|
||||||
|
// Check if we have the issuer in our trust anchors
|
||||||
|
boolean issuerFound = false;
|
||||||
|
for (TrustAnchor anchor : trustAnchors) {
|
||||||
|
if (anchor.getTrustedCert().getSubjectDN().equals(x509cert.getIssuerDN())) {
|
||||||
|
issuerFound = true;
|
||||||
|
System.out.println(" Issuer found in trust anchors: " + anchor.getTrustedCert().getSubjectDN().getName());
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!issuerFound && i == length) {
|
||||||
|
System.out.println(" *** MISSING ISSUER: " + x509cert.getIssuerDN().getName());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static class ChainValidationResult {
|
||||||
|
boolean isValid = false;
|
||||||
|
List<String> issues = new ArrayList<>();
|
||||||
|
}
|
||||||
|
|
||||||
|
private static ChainValidationResult validateChain(X509Certificate[] certChain, Set<TrustAnchor> trustAnchors) {
|
||||||
|
ChainValidationResult result = new ChainValidationResult();
|
||||||
|
|
||||||
|
// Check each certificate in the chain
|
||||||
|
for (int i = 0; i < certChain.length; i++) {
|
||||||
|
X509Certificate cert = certChain[i];
|
||||||
|
|
||||||
|
// Check certificate validity dates
|
||||||
|
try {
|
||||||
|
cert.checkValidity();
|
||||||
|
} catch (Exception e) {
|
||||||
|
result.issues.add("Certificate " + i + " expired: " + cert.getSubjectDN());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check signature (except for self-signed root)
|
||||||
|
if (i < certChain.length - 1) {
|
||||||
|
X509Certificate issuer = certChain[i + 1];
|
||||||
|
try {
|
||||||
|
cert.verify(issuer.getPublicKey());
|
||||||
|
} catch (Exception e) {
|
||||||
|
result.issues.add("Certificate " + i + " signature invalid: " + e.getMessage());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check issuer/subject relationship
|
||||||
|
if (!cert.getIssuerX500Principal().equals(issuer.getSubjectX500Principal())) {
|
||||||
|
result.issues.add("Certificate " + i + " issuer does not match certificate " + (i + 1) + " subject");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if chain ends with a trusted root
|
||||||
|
X509Certificate rootCert = certChain[certChain.length - 1];
|
||||||
|
boolean trustedRootFound = false;
|
||||||
|
|
||||||
|
if (rootCert.getSubjectX500Principal().equals(rootCert.getIssuerX500Principal())) {
|
||||||
|
// Self-signed root - check if it's in trust anchors
|
||||||
|
for (TrustAnchor anchor : trustAnchors) {
|
||||||
|
if (anchor.getTrustedCert().equals(rootCert)) {
|
||||||
|
trustedRootFound = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!trustedRootFound) {
|
||||||
|
// Check if we trust the root's subject even if the certificate is different
|
||||||
|
for (TrustAnchor anchor : trustAnchors) {
|
||||||
|
if (anchor.getTrustedCert().getSubjectX500Principal().equals(rootCert.getSubjectX500Principal())) {
|
||||||
|
trustedRootFound = true;
|
||||||
|
// Note: we'll add this as a warning in the main result
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Chain doesn't end with self-signed cert - check if issuer is trusted
|
||||||
|
for (TrustAnchor anchor : trustAnchors) {
|
||||||
|
if (anchor.getTrustedCert().getSubjectX500Principal().equals(rootCert.getIssuerX500Principal())) {
|
||||||
|
trustedRootFound = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!trustedRootFound) {
|
||||||
|
result.issues.add("Chain does not end with a trusted root");
|
||||||
|
}
|
||||||
|
|
||||||
|
result.isValid = result.issues.isEmpty();
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static boolean checkRevocation(X509Certificate cert, ValidationResult result) {
|
||||||
|
try {
|
||||||
|
// Try OCSP first
|
||||||
|
if (checkOCSP(cert, result)) {
|
||||||
|
return true; // Revoked
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fallback to CRL
|
||||||
|
if (checkCRL(cert, result)) {
|
||||||
|
return true; // Revoked
|
||||||
|
}
|
||||||
|
|
||||||
|
result.warnings.add("Could not check revocation status");
|
||||||
|
return false; // Assume not revoked if we can't check
|
||||||
|
|
||||||
|
} catch (Exception e) {
|
||||||
|
result.warnings.add("Error checking revocation: " + e.getMessage());
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static boolean checkOCSP(X509Certificate cert, ValidationResult result) {
|
||||||
|
// For now, just extract OCSP URL and note that we found it
|
||||||
|
try {
|
||||||
|
List<String> ocspUrls = CertificateFetcher.getOCSPUrls(cert);
|
||||||
|
if (!ocspUrls.isEmpty()) {
|
||||||
|
result.details.put("ocspUrls", ocspUrls);
|
||||||
|
result.warnings.add("OCSP checking not implemented - found OCSP URLs: " + ocspUrls);
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
} catch (Exception e) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static boolean checkCRL(X509Certificate cert, ValidationResult result) {
|
||||||
|
// Basic CRL URL extraction
|
||||||
|
try {
|
||||||
|
List<String> crlUrls = getCRLUrls(cert);
|
||||||
|
if (!crlUrls.isEmpty()) {
|
||||||
|
result.details.put("crlUrls", crlUrls);
|
||||||
|
result.warnings.add("CRL checking not implemented - found CRL URLs: " + crlUrls);
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
} catch (Exception e) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Helper methods
|
||||||
|
private static String getCommonName(X500Principal principal) {
|
||||||
|
String name = principal.getName();
|
||||||
|
String[] parts = name.split(",");
|
||||||
|
for (String part : parts) {
|
||||||
|
part = part.trim();
|
||||||
|
if (part.startsWith("CN=")) {
|
||||||
|
return part.substring(3);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static boolean matchesHostname(String certName, String hostname) {
|
||||||
|
if (certName == null || hostname == null) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Exact match
|
||||||
|
if (certName.equalsIgnoreCase(hostname)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Wildcard match
|
||||||
|
if (certName.startsWith("*.")) {
|
||||||
|
String certDomain = certName.substring(2);
|
||||||
|
String hostDomain = hostname;
|
||||||
|
int firstDot = hostname.indexOf('.');
|
||||||
|
if (firstDot > 0) {
|
||||||
|
hostDomain = hostname.substring(firstDot + 1);
|
||||||
|
}
|
||||||
|
return certDomain.equalsIgnoreCase(hostDomain);
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static List<String> getCRLUrls(X509Certificate cert) {
|
||||||
|
// This would need to parse the CRL Distribution Points extension
|
||||||
|
// For now, return empty list
|
||||||
|
return new ArrayList<>();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add this to your AIAExtractor class if not already present
|
||||||
|
public static List<String> getOCSPUrls(X509Certificate certificate) {
|
||||||
|
List<String> ocspUrls = new ArrayList<>();
|
||||||
|
|
||||||
|
try {
|
||||||
|
byte[] aiaExtensionValue = certificate.getExtensionValue(Extension.authorityInfoAccess.getId());
|
||||||
|
if (aiaExtensionValue == null) {
|
||||||
|
return ocspUrls;
|
||||||
|
}
|
||||||
|
|
||||||
|
ASN1OctetString octetString = ASN1OctetString.getInstance(aiaExtensionValue);
|
||||||
|
ASN1Primitive aiaObj = ASN1Primitive.fromByteArray(octetString.getOctets());
|
||||||
|
AuthorityInformationAccess aia = AuthorityInformationAccess.getInstance(aiaObj);
|
||||||
|
|
||||||
|
if (aia != null) {
|
||||||
|
AccessDescription[] accessDescriptions = aia.getAccessDescriptions();
|
||||||
|
|
||||||
|
for (AccessDescription accessDesc : accessDescriptions) {
|
||||||
|
if (X509ObjectIdentifiers.id_ad_ocsp.equals(accessDesc.getAccessMethod())) {
|
||||||
|
GeneralName accessLocation = accessDesc.getAccessLocation();
|
||||||
|
if (accessLocation.getTagNo() == GeneralName.uniformResourceIdentifier) {
|
||||||
|
String url = accessLocation.getName().toString();
|
||||||
|
ocspUrls.add(url);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} catch (Exception e) {
|
||||||
|
System.err.println("Error parsing AIA extension for OCSP: " + e.getMessage());
|
||||||
|
}
|
||||||
|
|
||||||
|
return ocspUrls;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@@ -1,491 +0,0 @@
|
|||||||
package nu.marginalia.ping.ssl;
|
|
||||||
|
|
||||||
import javax.net.ssl.*;
|
|
||||||
import java.io.FileInputStream;
|
|
||||||
import java.security.InvalidAlgorithmParameterException;
|
|
||||||
import java.security.KeyStore;
|
|
||||||
import java.security.KeyStoreException;
|
|
||||||
import java.security.NoSuchAlgorithmException;
|
|
||||||
import java.security.cert.*;
|
|
||||||
import java.util.*;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Custom PKIX validator for validating X.509 certificate chains with verbose output
|
|
||||||
* for db export (i.e. not just SSLException).
|
|
||||||
*/
|
|
||||||
public class CustomPKIXValidator {
|
|
||||||
|
|
||||||
private final Set<TrustAnchor> trustAnchors;
|
|
||||||
private final boolean revocationEnabled;
|
|
||||||
private final boolean anyPolicyInhibited;
|
|
||||||
private final boolean explicitPolicyRequired;
|
|
||||||
private final boolean policyMappingInhibited;
|
|
||||||
private final Set<String> initialPolicies;
|
|
||||||
|
|
||||||
private static final Set<String> EV_POLICY_OIDS = Set.of(
|
|
||||||
"1.3.6.1.4.1.17326.10.14.2.1.2", // Entrust
|
|
||||||
"1.3.6.1.4.1.17326.10.8.12.1.2", // Entrust
|
|
||||||
"2.16.840.1.114028.10.1.2", // Entrust/AffirmTrust
|
|
||||||
"1.3.6.1.4.1.6449.1.2.1.5.1", // Comodo
|
|
||||||
"1.3.6.1.4.1.8024.0.2.100.1.2", // QuoVadis
|
|
||||||
"2.16.840.1.114404.1.1.2.4.1", // GoDaddy
|
|
||||||
"2.16.840.1.114413.1.7.23.3", // DigiCert
|
|
||||||
"2.16.840.1.114414.1.7.23.3", // DigiCert
|
|
||||||
"1.3.6.1.4.1.14370.1.6", // GlobalSign
|
|
||||||
"2.16.756.1.89.1.2.1.1", // SwissSign
|
|
||||||
"1.3.6.1.4.1.4146.1.1" // GlobalSign
|
|
||||||
);
|
|
||||||
|
|
||||||
// Constructor with default settings
|
|
||||||
public CustomPKIXValidator() throws Exception {
|
|
||||||
this(true, false, false, false, null);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Constructor with custom settings
|
|
||||||
public CustomPKIXValidator(boolean revocationEnabled,
|
|
||||||
boolean anyPolicyInhibited,
|
|
||||||
boolean explicitPolicyRequired,
|
|
||||||
boolean policyMappingInhibited,
|
|
||||||
Set<String> initialPolicies) throws Exception {
|
|
||||||
this.trustAnchors = loadDefaultTrustAnchors();
|
|
||||||
this.revocationEnabled = revocationEnabled;
|
|
||||||
this.anyPolicyInhibited = anyPolicyInhibited;
|
|
||||||
this.explicitPolicyRequired = explicitPolicyRequired;
|
|
||||||
this.policyMappingInhibited = policyMappingInhibited;
|
|
||||||
this.initialPolicies = initialPolicies;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Constructor with custom trust anchors
|
|
||||||
public CustomPKIXValidator(Set<TrustAnchor> customTrustAnchors,
|
|
||||||
boolean revocationEnabled) {
|
|
||||||
this.trustAnchors = new HashSet<>(customTrustAnchors);
|
|
||||||
this.revocationEnabled = revocationEnabled;
|
|
||||||
this.anyPolicyInhibited = false;
|
|
||||||
this.explicitPolicyRequired = false;
|
|
||||||
this.policyMappingInhibited = false;
|
|
||||||
this.initialPolicies = null;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Validates certificate chain using PKIX algorithm
|
|
||||||
*/
|
|
||||||
public PKIXValidationResult validateCertificateChain(String hostname, X509Certificate[] certChain) {
|
|
||||||
EnumSet<PkixValidationError> errors = EnumSet.noneOf(PkixValidationError.class);
|
|
||||||
try {
|
|
||||||
// 1. Basic input validation
|
|
||||||
if (certChain == null || certChain.length == 0) {
|
|
||||||
return new PKIXValidationResult(false, "Certificate chain is empty", errors,
|
|
||||||
null, null, null, false);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (hostname == null || hostname.trim().isEmpty()) {
|
|
||||||
return new PKIXValidationResult(false, "Hostname is null or empty", errors,
|
|
||||||
null, null, null, false);
|
|
||||||
}
|
|
||||||
|
|
||||||
// 2. Create certificate path
|
|
||||||
CertPath certPath = createCertificatePath(certChain);
|
|
||||||
if (certPath == null) {
|
|
||||||
return new PKIXValidationResult(false, "Failed to create certificate path", errors,
|
|
||||||
null, null, null, false);
|
|
||||||
}
|
|
||||||
|
|
||||||
// 3. Build and validate certificate path using PKIX
|
|
||||||
PKIXCertPathValidatorResult pkixResult = performPKIXValidation(certPath, errors);
|
|
||||||
|
|
||||||
// 4. Validate hostname
|
|
||||||
boolean hostnameValid = validateHostname(hostname, certChain[0], errors);
|
|
||||||
|
|
||||||
// 5. Extract critical extensions information
|
|
||||||
Set<String> criticalExtensions = extractCriticalExtensions(certChain);
|
|
||||||
|
|
||||||
boolean overallValid = (pkixResult != null) && hostnameValid;
|
|
||||||
String errorMessage = null;
|
|
||||||
|
|
||||||
if (pkixResult == null) {
|
|
||||||
errorMessage = "PKIX path validation failed";
|
|
||||||
} else if (!hostnameValid) {
|
|
||||||
errorMessage = "Hostname validation failed";
|
|
||||||
}
|
|
||||||
|
|
||||||
return new PKIXValidationResult(overallValid, errorMessage, errors,
|
|
||||||
pkixResult, certPath, criticalExtensions, hostnameValid);
|
|
||||||
|
|
||||||
} catch (Exception e) {
|
|
||||||
return new PKIXValidationResult(false, "Validation exception: " + e.getMessage(),
|
|
||||||
errors, null, null, null, false);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Creates a certificate path from the certificate chain
|
|
||||||
*/
|
|
||||||
private CertPath createCertificatePath(X509Certificate[] certChain) throws CertificateException {
|
|
||||||
CertificateFactory cf = CertificateFactory.getInstance("X.509");
|
|
||||||
List<Certificate> certList = Arrays.asList(certChain);
|
|
||||||
return cf.generateCertPath(certList);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Performs PKIX validation
|
|
||||||
*/
|
|
||||||
private PKIXCertPathValidatorResult performPKIXValidation(CertPath certPath, Set<PkixValidationError> warnings) {
|
|
||||||
try {
|
|
||||||
// Create PKIX parameters
|
|
||||||
PKIXParameters params = new PKIXParameters(trustAnchors);
|
|
||||||
|
|
||||||
// Configure PKIX parameters
|
|
||||||
params.setRevocationEnabled(revocationEnabled);
|
|
||||||
params.setAnyPolicyInhibited(anyPolicyInhibited);
|
|
||||||
params.setExplicitPolicyRequired(explicitPolicyRequired);
|
|
||||||
params.setPolicyMappingInhibited(policyMappingInhibited);
|
|
||||||
|
|
||||||
if (initialPolicies != null && !initialPolicies.isEmpty()) {
|
|
||||||
params.setInitialPolicies(initialPolicies);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Set up certificate stores for intermediate certificates if needed
|
|
||||||
// This helps with path building when intermediate certs are missing
|
|
||||||
List<Certificate> intermediateCerts = extractIntermediateCertificates(certPath);
|
|
||||||
if (!intermediateCerts.isEmpty()) {
|
|
||||||
CertStore certStore = CertStore.getInstance("Collection",
|
|
||||||
new CollectionCertStoreParameters(intermediateCerts));
|
|
||||||
params.addCertStore(certStore);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Configure revocation checking if enabled
|
|
||||||
if (revocationEnabled) {
|
|
||||||
configureRevocationChecking(params);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Create and run validator
|
|
||||||
CertPathValidator validator = CertPathValidator.getInstance("PKIX");
|
|
||||||
PKIXCertPathValidatorResult result = (PKIXCertPathValidatorResult)
|
|
||||||
validator.validate(certPath, params);
|
|
||||||
|
|
||||||
return result;
|
|
||||||
|
|
||||||
} catch (CertPathValidatorException e) {
|
|
||||||
warnings.add(PkixValidationError.PATH_VALIDATION_FAILED);
|
|
||||||
return null;
|
|
||||||
} catch (InvalidAlgorithmParameterException e) {
|
|
||||||
warnings.add(PkixValidationError.INVALID_PKIX_PARAMETERS);
|
|
||||||
return null;
|
|
||||||
} catch (Exception e) {
|
|
||||||
warnings.add(PkixValidationError.UNKNOWN);
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Extracts intermediate certificates from the path
|
|
||||||
*/
|
|
||||||
private List<Certificate> extractIntermediateCertificates(CertPath certPath) {
|
|
||||||
List<Certificate> certs = (List<Certificate>) certPath.getCertificates();
|
|
||||||
if (certs.size() <= 2) {
|
|
||||||
return new ArrayList<>(); // Only leaf and root, no intermediates
|
|
||||||
}
|
|
||||||
// Return all but the first (leaf) and potentially last (root)
|
|
||||||
return new ArrayList<>(certs.subList(1, certs.size()));
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Configures revocation checking (CRL/OCSP)
|
|
||||||
*/
|
|
||||||
private void configureRevocationChecking(PKIXParameters params) throws NoSuchAlgorithmException {
|
|
||||||
// Create PKIX revocation checker
|
|
||||||
PKIXRevocationChecker revocationChecker = (PKIXRevocationChecker)
|
|
||||||
CertPathValidator.getInstance("PKIX").getRevocationChecker();
|
|
||||||
|
|
||||||
// Configure revocation checker options
|
|
||||||
Set<PKIXRevocationChecker.Option> options = EnumSet.of(
|
|
||||||
PKIXRevocationChecker.Option.PREFER_CRLS,
|
|
||||||
PKIXRevocationChecker.Option.SOFT_FAIL // Don't fail if revocation info unavailable
|
|
||||||
);
|
|
||||||
revocationChecker.setOptions(options);
|
|
||||||
|
|
||||||
params.addCertPathChecker(revocationChecker);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Comprehensive hostname validation including SAN and CN
|
|
||||||
*/
|
|
||||||
private boolean validateHostname(String hostname, X509Certificate cert, Set<PkixValidationError> warnings) {
|
|
||||||
try {
|
|
||||||
// Use Java's built-in hostname verifier as a starting point
|
|
||||||
HostnameVerifier defaultVerifier = HttpsURLConnection.getDefaultHostnameVerifier();
|
|
||||||
|
|
||||||
// Create a mock SSL session for the hostname verifier
|
|
||||||
MockSSLSession mockSession = new MockSSLSession(cert);
|
|
||||||
|
|
||||||
boolean defaultResult = defaultVerifier.verify(hostname, mockSession);
|
|
||||||
|
|
||||||
if (defaultResult) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
// If default fails, do manual validation
|
|
||||||
return performManualHostnameValidation(hostname, cert, warnings);
|
|
||||||
|
|
||||||
} catch (Exception e) {
|
|
||||||
warnings.add(PkixValidationError.UNSPECIFIED_HOST_ERROR);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Manual hostname validation implementation
|
|
||||||
*/
|
|
||||||
private boolean performManualHostnameValidation(String hostname, X509Certificate cert, Set<PkixValidationError> warnings) {
|
|
||||||
try {
|
|
||||||
// 1. Check Subject Alternative Names (SAN) - preferred method
|
|
||||||
Collection<List<?>> sanEntries = cert.getSubjectAlternativeNames();
|
|
||||||
if (sanEntries != null) {
|
|
||||||
for (List<?> sanEntry : sanEntries) {
|
|
||||||
if (sanEntry.size() >= 2) {
|
|
||||||
Integer type = (Integer) sanEntry.get(0);
|
|
||||||
if (type == 2) { // DNS name
|
|
||||||
String dnsName = (String) sanEntry.get(1);
|
|
||||||
if (matchesHostname(hostname, dnsName)) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
} else if (type == 7) { // IP address
|
|
||||||
String ipAddress = (String) sanEntry.get(1);
|
|
||||||
if (hostname.equals(ipAddress)) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// If SAN is present but no match found, don't check CN (RFC 6125)
|
|
||||||
warnings.add(PkixValidationError.SAN_MISMATCH);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
// 2. Fallback to Common Name (CN) in subject if no SAN present
|
|
||||||
String subjectDN = cert.getSubjectDN().getName();
|
|
||||||
String cn = extractCommonName(subjectDN);
|
|
||||||
if (cn != null) {
|
|
||||||
if (matchesHostname(hostname, cn)) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
warnings.add(PkixValidationError.SAN_MISMATCH);
|
|
||||||
return false;
|
|
||||||
|
|
||||||
} catch (Exception e) {
|
|
||||||
warnings.add(PkixValidationError.UNKNOWN);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Checks if hostname matches certificate name (handles wildcards)
|
|
||||||
*/
|
|
||||||
private boolean matchesHostname(String hostname, String certName) {
|
|
||||||
if (hostname == null || certName == null) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
hostname = hostname.toLowerCase();
|
|
||||||
certName = certName.toLowerCase();
|
|
||||||
|
|
||||||
// Exact match
|
|
||||||
if (hostname.equals(certName)) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Wildcard matching (*.example.com)
|
|
||||||
if (certName.startsWith("*.")) {
|
|
||||||
String domain = certName.substring(2);
|
|
||||||
|
|
||||||
// Wildcard must match exactly one level
|
|
||||||
if (hostname.endsWith("." + domain)) {
|
|
||||||
String prefix = hostname.substring(0, hostname.length() - domain.length() - 1);
|
|
||||||
// Ensure wildcard doesn't match multiple levels (no dots in prefix)
|
|
||||||
return !prefix.contains(".");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Extracts Common Name from Subject DN
|
|
||||||
*/
|
|
||||||
private String extractCommonName(String subjectDN) {
|
|
||||||
if (subjectDN == null) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Parse DN components
|
|
||||||
String[] components = subjectDN.split(",");
|
|
||||||
for (String component : components) {
|
|
||||||
component = component.trim();
|
|
||||||
if (component.startsWith("CN=")) {
|
|
||||||
return component.substring(3).trim();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Extracts critical extensions from all certificates in the chain
|
|
||||||
*/
|
|
||||||
private Set<String> extractCriticalExtensions(X509Certificate[] certChain) {
|
|
||||||
Set<String> allCriticalExtensions = new HashSet<>();
|
|
||||||
|
|
||||||
for (X509Certificate cert : certChain) {
|
|
||||||
Set<String> criticalExtensions = cert.getCriticalExtensionOIDs();
|
|
||||||
if (criticalExtensions != null) {
|
|
||||||
allCriticalExtensions.addAll(criticalExtensions);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return allCriticalExtensions;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Gets the key length from a certificate
|
|
||||||
*/
|
|
||||||
private int getKeyLength(X509Certificate cert) {
|
|
||||||
try {
|
|
||||||
java.security.PublicKey publicKey = cert.getPublicKey();
|
|
||||||
if (publicKey instanceof java.security.interfaces.RSAPublicKey) {
|
|
||||||
return ((java.security.interfaces.RSAPublicKey) publicKey).getModulus().bitLength();
|
|
||||||
} else if (publicKey instanceof java.security.interfaces.DSAPublicKey) {
|
|
||||||
return ((java.security.interfaces.DSAPublicKey) publicKey).getParams().getP().bitLength();
|
|
||||||
} else if (publicKey instanceof java.security.interfaces.ECPublicKey) {
|
|
||||||
return ((java.security.interfaces.ECPublicKey) publicKey).getParams().getOrder().bitLength();
|
|
||||||
}
|
|
||||||
} catch (Exception e) {
|
|
||||||
// Ignore
|
|
||||||
}
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Checks if signature algorithm is considered weak
|
|
||||||
*/
|
|
||||||
private boolean isWeakSignatureAlgorithm(String sigAlg) {
|
|
||||||
if (sigAlg == null) return false;
|
|
||||||
|
|
||||||
sigAlg = sigAlg.toLowerCase();
|
|
||||||
return sigAlg.contains("md5") ||
|
|
||||||
sigAlg.contains("sha1") ||
|
|
||||||
sigAlg.equals("md2withrsa") ||
|
|
||||||
sigAlg.equals("md4withrsa");
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Checks for deprecated or problematic extensions
|
|
||||||
*/
|
|
||||||
private void checkDeprecatedExtensions(X509Certificate cert, int index, List<String> warnings) {
|
|
||||||
// Check for Netscape extensions (deprecated)
|
|
||||||
if (cert.getNonCriticalExtensionOIDs() != null) {
|
|
||||||
for (String oid : cert.getNonCriticalExtensionOIDs()) {
|
|
||||||
if (oid.startsWith("2.16.840.1.113730")) { // Netscape OID space
|
|
||||||
warnings.add("Certificate " + index + " contains deprecated Netscape extension: " + oid);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Additional extension checks can be added here
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Loads default trust anchors from Java's cacerts keystore
|
|
||||||
*/
|
|
||||||
private Set<TrustAnchor> loadDefaultTrustAnchors() throws Exception {
|
|
||||||
Set<TrustAnchor> trustAnchors = new HashSet<>();
|
|
||||||
|
|
||||||
// Try to load from default locations
|
|
||||||
String[] keystorePaths = {
|
|
||||||
System.getProperty("javax.net.ssl.trustStore"),
|
|
||||||
System.getProperty("java.home") + "/lib/security/cacerts",
|
|
||||||
System.getProperty("java.home") + "/jre/lib/security/cacerts"
|
|
||||||
};
|
|
||||||
|
|
||||||
String[] keystorePasswords = {
|
|
||||||
System.getProperty("javax.net.ssl.trustStorePassword"),
|
|
||||||
"changeit",
|
|
||||||
""
|
|
||||||
};
|
|
||||||
|
|
||||||
for (String keystorePath : keystorePaths) {
|
|
||||||
if (keystorePath != null) {
|
|
||||||
for (String password : keystorePasswords) {
|
|
||||||
try {
|
|
||||||
KeyStore trustStore = loadKeyStore(keystorePath, password);
|
|
||||||
if (trustStore != null) {
|
|
||||||
trustAnchors.addAll(extractTrustAnchors(trustStore));
|
|
||||||
if (!trustAnchors.isEmpty()) {
|
|
||||||
return trustAnchors;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} catch (Exception e) {
|
|
||||||
// Try next combination
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Fallback: try to get from default trust manager
|
|
||||||
try {
|
|
||||||
TrustManagerFactory tmf = TrustManagerFactory.getInstance(
|
|
||||||
TrustManagerFactory.getDefaultAlgorithm());
|
|
||||||
tmf.init((KeyStore) null);
|
|
||||||
|
|
||||||
for (TrustManager tm : tmf.getTrustManagers()) {
|
|
||||||
if (tm instanceof X509TrustManager) {
|
|
||||||
X509TrustManager x509tm = (X509TrustManager) tm;
|
|
||||||
for (X509Certificate cert : x509tm.getAcceptedIssuers()) {
|
|
||||||
trustAnchors.add(new TrustAnchor(cert, null));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} catch (Exception e) {
|
|
||||||
throw new Exception("Failed to load any trust anchors", e);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (trustAnchors.isEmpty()) {
|
|
||||||
throw new Exception("No trust anchors could be loaded");
|
|
||||||
}
|
|
||||||
|
|
||||||
return trustAnchors;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Loads a keystore from file
|
|
||||||
*/
|
|
||||||
private KeyStore loadKeyStore(String keystorePath, String password) throws Exception {
|
|
||||||
KeyStore keystore = KeyStore.getInstance(KeyStore.getDefaultType());
|
|
||||||
try (FileInputStream fis = new FileInputStream(keystorePath)) {
|
|
||||||
keystore.load(fis, password != null ? password.toCharArray() : null);
|
|
||||||
return keystore;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Extracts trust anchors from a keystore
|
|
||||||
*/
|
|
||||||
private Set<TrustAnchor> extractTrustAnchors(KeyStore trustStore) throws KeyStoreException {
|
|
||||||
Set<TrustAnchor> trustAnchors = new HashSet<>();
|
|
||||||
|
|
||||||
Enumeration<String> aliases = trustStore.aliases();
|
|
||||||
while (aliases.hasMoreElements()) {
|
|
||||||
String alias = aliases.nextElement();
|
|
||||||
if (trustStore.isCertificateEntry(alias)) {
|
|
||||||
Certificate cert = trustStore.getCertificate(alias);
|
|
||||||
if (cert instanceof X509Certificate) {
|
|
||||||
trustAnchors.add(new TrustAnchor((X509Certificate) cert, null));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return trustAnchors;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
@@ -1,116 +0,0 @@
|
|||||||
package nu.marginalia.ping.ssl;
|
|
||||||
|
|
||||||
import javax.net.ssl.SSLPeerUnverifiedException;
|
|
||||||
import javax.net.ssl.SSLSession;
|
|
||||||
import javax.net.ssl.SSLSessionContext;
|
|
||||||
import java.security.cert.Certificate;
|
|
||||||
import java.security.cert.X509Certificate;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Mock SSL session for hostname verification
|
|
||||||
*/
|
|
||||||
public class MockSSLSession implements SSLSession {
|
|
||||||
private final X509Certificate[] peerCertificates;
|
|
||||||
|
|
||||||
public MockSSLSession(X509Certificate cert) {
|
|
||||||
this.peerCertificates = new X509Certificate[]{cert};
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Certificate[] getPeerCertificates() throws SSLPeerUnverifiedException {
|
|
||||||
return peerCertificates;
|
|
||||||
}
|
|
||||||
|
|
||||||
// All other methods return default/empty values as they're not used by hostname verification
|
|
||||||
@Override
|
|
||||||
public byte[] getId() {
|
|
||||||
return new byte[0];
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public SSLSessionContext getSessionContext() {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public long getCreationTime() {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public long getLastAccessedTime() {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void invalidate() {
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean isValid() {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void putValue(String name, Object value) {
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Object getValue(String name) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void removeValue(String name) {
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String[] getValueNames() {
|
|
||||||
return new String[0];
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public java.security.Principal getPeerPrincipal() throws SSLPeerUnverifiedException {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public java.security.Principal getLocalPrincipal() {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String getCipherSuite() {
|
|
||||||
return "";
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String getProtocol() {
|
|
||||||
return "";
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String getPeerHost() {
|
|
||||||
return "";
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int getPeerPort() {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int getPacketBufferSize() {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int getApplicationBufferSize() {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Certificate[] getLocalCertificates() {
|
|
||||||
return new Certificate[0];
|
|
||||||
}
|
|
||||||
}
|
|
@@ -1,14 +0,0 @@
|
|||||||
package nu.marginalia.ping.ssl;
|
|
||||||
|
|
||||||
import java.security.cert.CertPath;
|
|
||||||
import java.security.cert.PKIXCertPathValidatorResult;
|
|
||||||
import java.util.Set;
|
|
||||||
|
|
||||||
public record PKIXValidationResult(boolean isValid, String errorMessage,
|
|
||||||
Set<PkixValidationError> errors,
|
|
||||||
PKIXCertPathValidatorResult pkixResult,
|
|
||||||
CertPath validatedPath,
|
|
||||||
Set<String> criticalExtensions,
|
|
||||||
boolean hostnameValid)
|
|
||||||
{
|
|
||||||
}
|
|
@@ -1,11 +0,0 @@
|
|||||||
package nu.marginalia.ping.ssl;
|
|
||||||
|
|
||||||
public enum PkixValidationError {
|
|
||||||
SAN_MISMATCH,
|
|
||||||
EXPIRED,
|
|
||||||
NOT_YET_VALID,
|
|
||||||
PATH_VALIDATION_FAILED,
|
|
||||||
INVALID_PKIX_PARAMETERS,
|
|
||||||
UNKNOWN,
|
|
||||||
UNSPECIFIED_HOST_ERROR;
|
|
||||||
}
|
|
@@ -0,0 +1,57 @@
|
|||||||
|
package nu.marginalia.ping.ssl;
|
||||||
|
|
||||||
|
import java.security.cert.TrustAnchor;
|
||||||
|
import java.time.Duration;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
public class RootCerts {
|
||||||
|
private static final String MOZILLA_CA_BUNDLE_URL = "https://curl.se/ca/cacert.pem";
|
||||||
|
|
||||||
|
volatile static boolean initialized = false;
|
||||||
|
volatile static Set<TrustAnchor> trustAnchors;
|
||||||
|
|
||||||
|
public static Set<TrustAnchor> getTrustAnchors() {
|
||||||
|
if (!initialized) {
|
||||||
|
try {
|
||||||
|
synchronized (RootCerts.class) {
|
||||||
|
while (!initialized) {
|
||||||
|
RootCerts.class.wait(100);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catch (InterruptedException e) {
|
||||||
|
Thread.currentThread().interrupt();
|
||||||
|
throw new RuntimeException("RootCerts initialization interrupted", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return trustAnchors;
|
||||||
|
}
|
||||||
|
|
||||||
|
static {
|
||||||
|
Thread.ofPlatform()
|
||||||
|
.name("RootCertsUpdater")
|
||||||
|
.daemon()
|
||||||
|
.unstarted(RootCerts::updateTrustAnchors)
|
||||||
|
.start();
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void updateTrustAnchors() {
|
||||||
|
while (true) {
|
||||||
|
try {
|
||||||
|
trustAnchors = CertificateFetcher.getRootCerts(MOZILLA_CA_BUNDLE_URL);
|
||||||
|
synchronized (RootCerts.class) {
|
||||||
|
initialized = true;
|
||||||
|
RootCerts.class.notifyAll(); // Notify any waiting threads
|
||||||
|
}
|
||||||
|
Thread.sleep(Duration.ofHours(24));
|
||||||
|
} catch (InterruptedException e) {
|
||||||
|
Thread.currentThread().interrupt();
|
||||||
|
break; // Exit if interrupted
|
||||||
|
} catch (Exception e) {
|
||||||
|
// Log the exception and continue to retry
|
||||||
|
System.err.println("Failed to update trust anchors: " + e.getMessage());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@@ -9,7 +9,7 @@ import nu.marginalia.ping.fetcher.response.HttpsResponse;
|
|||||||
import nu.marginalia.ping.model.DomainAvailabilityRecord;
|
import nu.marginalia.ping.model.DomainAvailabilityRecord;
|
||||||
import nu.marginalia.ping.model.ErrorClassification;
|
import nu.marginalia.ping.model.ErrorClassification;
|
||||||
import nu.marginalia.ping.model.HttpSchema;
|
import nu.marginalia.ping.model.HttpSchema;
|
||||||
import nu.marginalia.ping.ssl.PKIXValidationResult;
|
import nu.marginalia.ping.ssl.CertificateValidator;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
@@ -124,7 +124,7 @@ public class DomainAvailabilityInformationFactory {
|
|||||||
int nodeId,
|
int nodeId,
|
||||||
@Nullable InetAddress address,
|
@Nullable InetAddress address,
|
||||||
@Nullable DomainAvailabilityRecord previousRecord,
|
@Nullable DomainAvailabilityRecord previousRecord,
|
||||||
PKIXValidationResult validationResult,
|
CertificateValidator.ValidationResult validationResult,
|
||||||
HttpsResponse rsp) {
|
HttpsResponse rsp) {
|
||||||
Instant updateTime;
|
Instant updateTime;
|
||||||
|
|
||||||
|
@@ -4,7 +4,7 @@ import nu.marginalia.ping.fetcher.response.HttpResponse;
|
|||||||
import nu.marginalia.ping.fetcher.response.HttpsResponse;
|
import nu.marginalia.ping.fetcher.response.HttpsResponse;
|
||||||
import nu.marginalia.ping.model.DomainSecurityRecord;
|
import nu.marginalia.ping.model.DomainSecurityRecord;
|
||||||
import nu.marginalia.ping.model.HttpSchema;
|
import nu.marginalia.ping.model.HttpSchema;
|
||||||
import nu.marginalia.ping.ssl.PKIXValidationResult;
|
import nu.marginalia.ping.ssl.CertificateValidator;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
@@ -14,9 +14,7 @@ import java.security.NoSuchAlgorithmException;
|
|||||||
import java.security.cert.CertificateEncodingException;
|
import java.security.cert.CertificateEncodingException;
|
||||||
import java.security.cert.X509Certificate;
|
import java.security.cert.X509Certificate;
|
||||||
import java.time.Instant;
|
import java.time.Instant;
|
||||||
import java.util.HashSet;
|
import java.util.*;
|
||||||
import java.util.Set;
|
|
||||||
import java.util.StringJoiner;
|
|
||||||
|
|
||||||
public class DomainSecurityInformationFactory {
|
public class DomainSecurityInformationFactory {
|
||||||
private static final Logger logger = LoggerFactory.getLogger(DomainSecurityInformationFactory.class);
|
private static final Logger logger = LoggerFactory.getLogger(DomainSecurityInformationFactory.class);
|
||||||
@@ -54,7 +52,7 @@ public class DomainSecurityInformationFactory {
|
|||||||
// HTTPS response
|
// HTTPS response
|
||||||
public DomainSecurityRecord createHttpsSecurityInformation(
|
public DomainSecurityRecord createHttpsSecurityInformation(
|
||||||
HttpsResponse httpResponse,
|
HttpsResponse httpResponse,
|
||||||
PKIXValidationResult validationResult,
|
CertificateValidator.ValidationResult validationResult,
|
||||||
int domainId,
|
int domainId,
|
||||||
int nodeId,
|
int nodeId,
|
||||||
@Nullable Integer asn
|
@Nullable Integer asn
|
||||||
@@ -69,8 +67,11 @@ public class DomainSecurityInformationFactory {
|
|||||||
boolean isWildcard = false;
|
boolean isWildcard = false;
|
||||||
try {
|
try {
|
||||||
if (sslCertificates != null && sslCertificates.length > 0) {
|
if (sslCertificates != null && sslCertificates.length > 0) {
|
||||||
for (var sanEntry : sslCertificates[0].getSubjectAlternativeNames()) {
|
Collection<List<?>> sans = sslCertificates[0].getSubjectAlternativeNames();
|
||||||
|
if (sans == null) {
|
||||||
|
sans = Collections.emptyList();
|
||||||
|
}
|
||||||
|
for (var sanEntry : sans) {
|
||||||
|
|
||||||
if (sanEntry != null && sanEntry.size() >= 2) {
|
if (sanEntry != null && sanEntry.size() >= 2) {
|
||||||
// Check if the SAN entry is a DNS or IP address
|
// Check if the SAN entry is a DNS or IP address
|
||||||
@@ -125,6 +126,9 @@ public class DomainSecurityInformationFactory {
|
|||||||
.sslCertWildcard(isWildcard)
|
.sslCertWildcard(isWildcard)
|
||||||
.sslCertificateChainLength(sslCertificates.length)
|
.sslCertificateChainLength(sslCertificates.length)
|
||||||
.sslCertificateValid(validationResult.isValid())
|
.sslCertificateValid(validationResult.isValid())
|
||||||
|
.sslHostValid(validationResult.hostnameValid)
|
||||||
|
.sslChainValid(validationResult.chainValid)
|
||||||
|
.sslDateValid(!validationResult.certificateExpired)
|
||||||
.httpVersion(httpResponse.version())
|
.httpVersion(httpResponse.version())
|
||||||
.tsLastUpdate(Instant.now())
|
.tsLastUpdate(Instant.now())
|
||||||
.build();
|
.build();
|
||||||
|
@@ -8,8 +8,7 @@ import nu.marginalia.ping.fetcher.response.*;
|
|||||||
import nu.marginalia.ping.model.*;
|
import nu.marginalia.ping.model.*;
|
||||||
import nu.marginalia.ping.model.comparison.DomainAvailabilityChange;
|
import nu.marginalia.ping.model.comparison.DomainAvailabilityChange;
|
||||||
import nu.marginalia.ping.model.comparison.SecurityInformationChange;
|
import nu.marginalia.ping.model.comparison.SecurityInformationChange;
|
||||||
import nu.marginalia.ping.ssl.CustomPKIXValidator;
|
import nu.marginalia.ping.ssl.CertificateValidator;
|
||||||
import nu.marginalia.ping.ssl.PKIXValidationResult;
|
|
||||||
import nu.marginalia.ping.util.JsonObject;
|
import nu.marginalia.ping.util.JsonObject;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
@@ -32,9 +31,7 @@ public class HttpPingService {
|
|||||||
|
|
||||||
private final DomainAvailabilityInformationFactory domainAvailabilityInformationFactory;
|
private final DomainAvailabilityInformationFactory domainAvailabilityInformationFactory;
|
||||||
private final DomainSecurityInformationFactory domainSecurityInformationFactory;
|
private final DomainSecurityInformationFactory domainSecurityInformationFactory;
|
||||||
|
|
||||||
private static final Logger logger = LoggerFactory.getLogger(HttpPingService.class);
|
private static final Logger logger = LoggerFactory.getLogger(HttpPingService.class);
|
||||||
CustomPKIXValidator validator;
|
|
||||||
|
|
||||||
@Inject
|
@Inject
|
||||||
public HttpPingService(
|
public HttpPingService(
|
||||||
@@ -46,7 +43,6 @@ public class HttpPingService {
|
|||||||
this.pingHttpFetcher = pingHttpFetcher;
|
this.pingHttpFetcher = pingHttpFetcher;
|
||||||
this.domainAvailabilityInformationFactory = domainAvailabilityInformationFactory;
|
this.domainAvailabilityInformationFactory = domainAvailabilityInformationFactory;
|
||||||
this.domainSecurityInformationFactory = domainSecurityInformationFactory;
|
this.domainSecurityInformationFactory = domainSecurityInformationFactory;
|
||||||
this.validator = new CustomPKIXValidator();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private int compareInetAddresses(InetAddress a, InetAddress b) {
|
private int compareInetAddresses(InetAddress a, InetAddress b) {
|
||||||
@@ -164,7 +160,11 @@ public class HttpPingService {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
case HttpsResponse httpsResponse -> {
|
case HttpsResponse httpsResponse -> {
|
||||||
PKIXValidationResult validationResult = validator.validateCertificateChain(domainReference.domainName(), (X509Certificate[]) httpsResponse.sslCertificates());
|
var validationResult = CertificateValidator.validateCertificate(
|
||||||
|
(X509Certificate[]) httpsResponse.sslCertificates(),
|
||||||
|
domainReference.domainName(),
|
||||||
|
true
|
||||||
|
);
|
||||||
|
|
||||||
newPingStatus = domainAvailabilityInformationFactory.createHttpsResponse(
|
newPingStatus = domainAvailabilityInformationFactory.createHttpsResponse(
|
||||||
domainReference.domainId(),
|
domainReference.domainId(),
|
||||||
|
@@ -243,6 +243,7 @@ class PingDaoTest {
|
|||||||
.headerServer("Apache/2.4.41 (Ubuntu)")
|
.headerServer("Apache/2.4.41 (Ubuntu)")
|
||||||
.headerXPoweredBy("PHP/7.4.3")
|
.headerXPoweredBy("PHP/7.4.3")
|
||||||
.tsLastUpdate(Instant.now())
|
.tsLastUpdate(Instant.now())
|
||||||
|
.sslHostValid(true)
|
||||||
.build();
|
.build();
|
||||||
var svc = new PingDao(dataSource);
|
var svc = new PingDao(dataSource);
|
||||||
svc.write(foo);
|
svc.write(foo);
|
||||||
|
@@ -60,6 +60,7 @@ class PingHttpServiceTest {
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Tag("flaky") // Do not run this test in CI
|
||||||
@Test
|
@Test
|
||||||
public void testGetSslInfo() throws Exception {
|
public void testGetSslInfo() throws Exception {
|
||||||
var provider = new HttpClientProvider();
|
var provider = new HttpClientProvider();
|
||||||
|
@@ -4,6 +4,7 @@ public class ProcessInboxNames {
|
|||||||
public static final String CONVERTER_INBOX = "converter";
|
public static final String CONVERTER_INBOX = "converter";
|
||||||
public static final String LOADER_INBOX = "loader";
|
public static final String LOADER_INBOX = "loader";
|
||||||
public static final String PING_INBOX = "ping";
|
public static final String PING_INBOX = "ping";
|
||||||
|
public static final String NDP_INBOX = "ndp";
|
||||||
public static final String CRAWLER_INBOX = "crawler";
|
public static final String CRAWLER_INBOX = "crawler";
|
||||||
public static final String LIVE_CRAWLER_INBOX = "live-crawler";
|
public static final String LIVE_CRAWLER_INBOX = "live-crawler";
|
||||||
|
|
||||||
|
@@ -0,0 +1,4 @@
|
|||||||
|
package nu.marginalia.mqapi.ndp;
|
||||||
|
|
||||||
|
public record NdpRequest(int goal) {
|
||||||
|
}
|
@@ -25,6 +25,11 @@ into the [MariaDB database](../common/db).
|
|||||||
The [index-construction-process](index-constructor-process/) constructs indices from
|
The [index-construction-process](index-constructor-process/) constructs indices from
|
||||||
the data generated by the loader.
|
the data generated by the loader.
|
||||||
|
|
||||||
|
## 5. Other Processes
|
||||||
|
|
||||||
|
* Ping Process: The [ping-process](ping-process/) keeps track of the aliveness of websites, gathering fingerprint information about the security posture of the website, as well as DNS information.
|
||||||
|
* Live-Crawling Process: The [live-crawling-process](live-crawling-process/) is a process that crawls websites in real-time based on RSS feeds, updating a smaller index with the latest content.
|
||||||
|
|
||||||
## Overview
|
## Overview
|
||||||
|
|
||||||
Schematically the crawling and loading process looks like this:
|
Schematically the crawling and loading process looks like this:
|
||||||
|
@@ -22,6 +22,7 @@ import nu.marginalia.search.model.NavbarModel;
|
|||||||
import nu.marginalia.search.model.ResultsPage;
|
import nu.marginalia.search.model.ResultsPage;
|
||||||
import nu.marginalia.search.model.UrlDetails;
|
import nu.marginalia.search.model.UrlDetails;
|
||||||
import nu.marginalia.search.svc.SearchFlagSiteService.FlagSiteFormData;
|
import nu.marginalia.search.svc.SearchFlagSiteService.FlagSiteFormData;
|
||||||
|
import nu.marginalia.service.server.RateLimiter;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
@@ -47,6 +48,8 @@ public class SearchSiteInfoService {
|
|||||||
private final HikariDataSource dataSource;
|
private final HikariDataSource dataSource;
|
||||||
private final SearchSiteSubscriptionService searchSiteSubscriptions;
|
private final SearchSiteSubscriptionService searchSiteSubscriptions;
|
||||||
|
|
||||||
|
private final RateLimiter rateLimiter = RateLimiter.custom(60);
|
||||||
|
|
||||||
@Inject
|
@Inject
|
||||||
public SearchSiteInfoService(SearchOperator searchOperator,
|
public SearchSiteInfoService(SearchOperator searchOperator,
|
||||||
DomainInfoClient domainInfoClient,
|
DomainInfoClient domainInfoClient,
|
||||||
@@ -238,6 +241,7 @@ public class SearchSiteInfoService {
|
|||||||
boolean hasScreenshot = screenshotService.hasScreenshot(domainId);
|
boolean hasScreenshot = screenshotService.hasScreenshot(domainId);
|
||||||
boolean isSubscribed = searchSiteSubscriptions.isSubscribed(context, domain);
|
boolean isSubscribed = searchSiteSubscriptions.isSubscribed(context, domain);
|
||||||
|
|
||||||
|
boolean rateLimited = !rateLimiter.isAllowed();
|
||||||
if (domainId < 0) {
|
if (domainId < 0) {
|
||||||
domainInfoFuture = CompletableFuture.failedFuture(new Exception("Unknown Domain ID"));
|
domainInfoFuture = CompletableFuture.failedFuture(new Exception("Unknown Domain ID"));
|
||||||
similarSetFuture = CompletableFuture.failedFuture(new Exception("Unknown Domain ID"));
|
similarSetFuture = CompletableFuture.failedFuture(new Exception("Unknown Domain ID"));
|
||||||
@@ -250,6 +254,12 @@ public class SearchSiteInfoService {
|
|||||||
linkingDomainsFuture = CompletableFuture.failedFuture(new Exception("Assistant Service Unavailable"));
|
linkingDomainsFuture = CompletableFuture.failedFuture(new Exception("Assistant Service Unavailable"));
|
||||||
feedItemsFuture = CompletableFuture.failedFuture(new Exception("Assistant Service Unavailable"));
|
feedItemsFuture = CompletableFuture.failedFuture(new Exception("Assistant Service Unavailable"));
|
||||||
}
|
}
|
||||||
|
else if (rateLimited) {
|
||||||
|
domainInfoFuture = domainInfoClient.domainInformation(domainId);
|
||||||
|
similarSetFuture = CompletableFuture.failedFuture(new Exception("Rate limit exceeded"));
|
||||||
|
linkingDomainsFuture = CompletableFuture.failedFuture(new Exception("Rate limit exceeded"));
|
||||||
|
feedItemsFuture = CompletableFuture.failedFuture(new Exception("Rate limit exceeded"));
|
||||||
|
}
|
||||||
else {
|
else {
|
||||||
domainInfoFuture = domainInfoClient.domainInformation(domainId);
|
domainInfoFuture = domainInfoClient.domainInformation(domainId);
|
||||||
similarSetFuture = domainInfoClient.similarDomains(domainId, 25);
|
similarSetFuture = domainInfoClient.similarDomains(domainId, 25);
|
||||||
@@ -257,7 +267,14 @@ public class SearchSiteInfoService {
|
|||||||
feedItemsFuture = feedsClient.getFeed(domainId);
|
feedItemsFuture = feedsClient.getFeed(domainId);
|
||||||
}
|
}
|
||||||
|
|
||||||
List<UrlDetails> sampleResults = searchOperator.doSiteSearch(domainName, domainId,5, 1).results;
|
List<UrlDetails> sampleResults;
|
||||||
|
if (rateLimited) {
|
||||||
|
sampleResults = List.of();
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
sampleResults = searchOperator.doSiteSearch(domainName, domainId, 5, 1).results;
|
||||||
|
}
|
||||||
|
|
||||||
if (!sampleResults.isEmpty()) {
|
if (!sampleResults.isEmpty()) {
|
||||||
url = sampleResults.getFirst().url.withPathAndParam("/", null).toString();
|
url = sampleResults.getFirst().url.withPathAndParam("/", null).toString();
|
||||||
}
|
}
|
||||||
@@ -276,8 +293,9 @@ public class SearchSiteInfoService {
|
|||||||
sampleResults
|
sampleResults
|
||||||
);
|
);
|
||||||
|
|
||||||
|
if (!rateLimited) {
|
||||||
requestMissingScreenshots(result);
|
requestMissingScreenshots(result);
|
||||||
|
}
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -0,0 +1,11 @@
|
|||||||
|
@param String message
|
||||||
|
|
||||||
|
<!DOCTYPE html>
|
||||||
|
<html lang="en">
|
||||||
|
<head><meta charset="UTF-8">
|
||||||
|
<title>Unavailable</title></head>
|
||||||
|
<body>
|
||||||
|
<h1>Service Overloaded</h1>
|
||||||
|
<p>${message}</p>
|
||||||
|
</body>
|
||||||
|
</html>
|
@@ -69,6 +69,7 @@ include 'code:processes:crawling-process:ft-link-parser'
|
|||||||
include 'code:processes:crawling-process:ft-content-type'
|
include 'code:processes:crawling-process:ft-content-type'
|
||||||
include 'code:processes:live-crawling-process'
|
include 'code:processes:live-crawling-process'
|
||||||
include 'code:processes:ping-process'
|
include 'code:processes:ping-process'
|
||||||
|
include 'code:processes:new-domain-process'
|
||||||
|
|
||||||
include 'code:processes:process-mq-api'
|
include 'code:processes:process-mq-api'
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user