mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-10-05 21:22:39 +02:00
Compare commits
63 Commits
deploy-018
...
deploy-021
Author | SHA1 | Date | |
---|---|---|---|
|
db907ab06a | ||
|
c49cd9dd95 | ||
|
eec9df3b0a | ||
|
e5f3288de6 | ||
|
d587544d3a | ||
|
1a9ae1bc40 | ||
|
e0c81e956a | ||
|
542fb12b38 | ||
|
65ec734566 | ||
|
10b6a25c63 | ||
|
6260f6bec7 | ||
|
d6d5467696 | ||
|
034560ca75 | ||
|
e994fddae4 | ||
|
345f01f306 | ||
|
5a8e286689 | ||
|
39a055aa94 | ||
|
37aaa90dc9 | ||
|
24022c5adc | ||
|
1de9ecc0b6 | ||
|
9b80245ea0 | ||
|
4e1595c1a6 | ||
|
0be8585fa5 | ||
|
a0fe070fe7 | ||
|
abe9da0fc6 | ||
|
56d0128b0a | ||
|
840b68ac55 | ||
|
c34ff6d6c3 | ||
|
32780967d8 | ||
|
7330bc489d | ||
|
ea23f33738 | ||
|
4a8a028118 | ||
|
a25bc647be | ||
|
a720dba3a2 | ||
|
284f382867 | ||
|
a80717f138 | ||
|
d6da715fa4 | ||
|
c1ec7aa491 | ||
|
3daf37e283 | ||
|
44a774d3a8 | ||
|
597aeaf496 | ||
|
06df7892c2 | ||
|
dc26854268 | ||
|
9f16326cba | ||
|
ed66d0b3a7 | ||
|
c3afc82dad | ||
|
08e25e539e | ||
|
4946044dd0 | ||
|
edf382e1c5 | ||
|
644cba32e4 | ||
|
34b76390b2 | ||
|
43cd507971 | ||
|
cc40e99fdc | ||
|
8a944cf4c6 | ||
|
1c128e6d82 | ||
|
be039d1a8c | ||
|
4edc0d3267 | ||
|
890f521d0d | ||
|
b1814a30f7 | ||
|
f59a9eb025 | ||
|
599534806b | ||
|
7e8253dac7 | ||
|
97a6780ea3 |
16
ROADMAP.md
16
ROADMAP.md
@@ -38,14 +38,6 @@ associated with each language added, at least a models file or two, as well as s
|
||||
|
||||
It would be very helpful to find a speaker of a large language other than English to help in the fine tuning.
|
||||
|
||||
## Support for binary formats like PDF
|
||||
|
||||
The crawler needs to be modified to retain them, and the conversion logic needs to parse them.
|
||||
The documents database probably should have some sort of flag indicating it's a PDF as well.
|
||||
|
||||
PDF parsing is known to be a bit of a security liability so some thought needs to be put in
|
||||
that direction as well.
|
||||
|
||||
## Custom ranking logic
|
||||
|
||||
Stract does an interesting thing where they have configurable search filters.
|
||||
@@ -66,6 +58,14 @@ One of the search engine's biggest limitations right now is that it does not ind
|
||||
|
||||
# Completed
|
||||
|
||||
## Support for binary formats like PDF (COMPLETED 2025-05)
|
||||
|
||||
The crawler needs to be modified to retain them, and the conversion logic needs to parse them.
|
||||
The documents database probably should have some sort of flag indicating it's a PDF as well.
|
||||
|
||||
PDF parsing is known to be a bit of a security liability so some thought needs to be put in
|
||||
that direction as well.
|
||||
|
||||
## Web Design Overhaul (COMPLETED 2025-01)
|
||||
|
||||
The design is kinda clunky and hard to maintain, and needlessly outdated-looking.
|
||||
|
@@ -1,3 +1,8 @@
|
||||
package nu.marginalia;
|
||||
|
||||
/**
|
||||
* A record representing a User Agent.
|
||||
* @param uaString - the header value of the User Agent
|
||||
* @param uaIdentifier - what we look for in robots.txt
|
||||
*/
|
||||
public record UserAgent(String uaString, String uaIdentifier) {}
|
||||
|
@@ -0,0 +1,5 @@
|
||||
CREATE TABLE IF NOT EXISTS WMSA_prod.NSFW_DOMAINS (
|
||||
ID INT NOT NULL AUTO_INCREMENT,
|
||||
TIER INT NOT NULL,
|
||||
PRIMARY KEY (ID)
|
||||
);
|
@@ -0,0 +1,213 @@
|
||||
|
||||
-- Create metadata tables for domain ping status and security information
|
||||
|
||||
-- These are not ICMP pings, but rather HTTP(S) pings to check the availability and security
|
||||
-- of web servers associated with domains, to assess uptime and changes in security configurations
|
||||
-- indicating ownership changes or security issues.
|
||||
|
||||
-- Note: DOMAIN_ID and NODE_ID are used to identify the domain and the node that performed the ping.
|
||||
-- These are strictly speaking foreign keys to the EC_DOMAIN table, but as it
|
||||
-- is strictly append-only, we do not need to enforce foreign key constraints.
|
||||
|
||||
CREATE TABLE IF NOT EXISTS DOMAIN_AVAILABILITY_INFORMATION (
|
||||
DOMAIN_ID INT NOT NULL PRIMARY KEY,
|
||||
NODE_ID INT NOT NULL,
|
||||
|
||||
SERVER_AVAILABLE BOOLEAN NOT NULL, -- Indicates if the server is available (true) or not (false)
|
||||
SERVER_IP VARBINARY(16), -- IP address of the server (IPv4 or IPv6)
|
||||
SERVER_IP_ASN INTEGER, -- Autonomous System number
|
||||
|
||||
DATA_HASH BIGINT, -- Hash of the data for integrity checks
|
||||
SECURITY_CONFIG_HASH BIGINT, -- Hash of the security configuration for integrity checks
|
||||
|
||||
HTTP_SCHEMA ENUM('HTTP', 'HTTPS'), -- HTTP or HTTPS protocol used
|
||||
HTTP_ETAG VARCHAR(255), -- ETag of the resource as per HTTP headers
|
||||
HTTP_LAST_MODIFIED VARCHAR(255), -- Last modified date of the resource as per HTTP headers
|
||||
HTTP_STATUS INT, -- HTTP status code (e.g., 200, 404, etc.)
|
||||
HTTP_LOCATION VARCHAR(255), -- If the server redirects, this is the location of the redirect
|
||||
HTTP_RESPONSE_TIME_MS SMALLINT UNSIGNED, -- Response time in milliseconds
|
||||
|
||||
ERROR_CLASSIFICATION ENUM('NONE', 'TIMEOUT', 'SSL_ERROR', 'DNS_ERROR', 'CONNECTION_ERROR', 'HTTP_CLIENT_ERROR', 'HTTP_SERVER_ERROR', 'UNKNOWN'), -- Classification of the error if the server is not available
|
||||
ERROR_MESSAGE VARCHAR(255), -- Error message if the server is not available
|
||||
|
||||
TS_LAST_PING TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, -- Timestamp of the last ping
|
||||
TS_LAST_AVAILABLE TIMESTAMP, -- Timestamp of the last time the server was available
|
||||
TS_LAST_ERROR TIMESTAMP, -- Timestamp of the last error encountered
|
||||
|
||||
NEXT_SCHEDULED_UPDATE TIMESTAMP NOT NULL,
|
||||
BACKOFF_CONSECUTIVE_FAILURES INT NOT NULL DEFAULT 0, -- Number of consecutive failures to ping the server
|
||||
BACKOFF_FETCH_INTERVAL INT NOT NULL DEFAULT 60 -- Interval in seconds for the next scheduled ping
|
||||
) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin;
|
||||
|
||||
CREATE INDEX IF NOT EXISTS DOMAIN_AVAILABILITY_INFORMATION__NODE_ID__DOMAIN_ID_IDX ON DOMAIN_AVAILABILITY_INFORMATION (NODE_ID, DOMAIN_ID);
|
||||
CREATE INDEX IF NOT EXISTS DOMAIN_AVAILABILITY_INFORMATION__NEXT_SCHEDULED_UPDATE_IDX ON DOMAIN_AVAILABILITY_INFORMATION (NODE_ID, NEXT_SCHEDULED_UPDATE);
|
||||
|
||||
|
||||
|
||||
CREATE TABLE IF NOT EXISTS DOMAIN_SECURITY_INFORMATION (
|
||||
DOMAIN_ID INT NOT NULL PRIMARY KEY,
|
||||
NODE_ID INT NOT NULL,
|
||||
|
||||
ASN INTEGER, -- Autonomous System Number (ASN) of the server
|
||||
HTTP_SCHEMA ENUM('HTTP', 'HTTPS'), -- HTTP or HTTPS protocol used
|
||||
HTTP_VERSION VARCHAR(10), -- HTTP version used (e.g., HTTP/1.1, HTTP/2)
|
||||
HTTP_COMPRESSION VARCHAR(50), -- Compression method used (e.g., gzip, deflate, br)
|
||||
HTTP_CACHE_CONTROL TEXT, -- Cache control directives from HTTP headers
|
||||
|
||||
SSL_CERT_NOT_BEFORE TIMESTAMP, -- Valid from date (usually same as issued)
|
||||
SSL_CERT_NOT_AFTER TIMESTAMP, -- Valid until date (usually same as expires)
|
||||
|
||||
SSL_CERT_ISSUER VARCHAR(255), -- CA that issued the cert
|
||||
SSL_CERT_SUBJECT VARCHAR(255), -- Certificate subject/CN
|
||||
|
||||
SSL_CERT_PUBLIC_KEY_HASH BINARY(32), -- SHA-256 hash of the public key
|
||||
SSL_CERT_SERIAL_NUMBER VARCHAR(100), -- Unique cert serial number
|
||||
SSL_CERT_FINGERPRINT_SHA256 BINARY(32), -- SHA-256 fingerprint for exact identification
|
||||
SSL_CERT_SAN TEXT, -- Subject Alternative Names (JSON array)
|
||||
SSL_CERT_WILDCARD BOOLEAN, -- Wildcard certificate (*.example.com)
|
||||
|
||||
SSL_PROTOCOL VARCHAR(20), -- TLS 1.2, TLS 1.3, etc.
|
||||
SSL_CIPHER_SUITE VARCHAR(100), -- e.g., TLS_AES_256_GCM_SHA384
|
||||
SSL_KEY_EXCHANGE VARCHAR(50), -- ECDHE, RSA, etc.
|
||||
SSL_CERTIFICATE_CHAIN_LENGTH TINYINT, -- Number of certs in chain
|
||||
|
||||
SSL_CERTIFICATE_VALID BOOLEAN, -- Valid cert chain
|
||||
|
||||
HEADER_CORS_ALLOW_ORIGIN TEXT, -- Could be *, specific domains, or null
|
||||
HEADER_CORS_ALLOW_CREDENTIALS BOOLEAN, -- Credential handling
|
||||
HEADER_CONTENT_SECURITY_POLICY_HASH INT, -- CSP header, hash of the policy
|
||||
HEADER_STRICT_TRANSPORT_SECURITY VARCHAR(255), -- HSTS header
|
||||
HEADER_REFERRER_POLICY VARCHAR(50), -- Referrer handling
|
||||
HEADER_X_FRAME_OPTIONS VARCHAR(50), -- Clickjacking protection
|
||||
HEADER_X_CONTENT_TYPE_OPTIONS VARCHAR(50), -- MIME sniffing protection
|
||||
HEADER_X_XSS_PROTECTION VARCHAR(50), -- XSS protection header
|
||||
|
||||
HEADER_SERVER VARCHAR(255), -- Server header (e.g., Apache, Nginx, etc.)
|
||||
HEADER_X_POWERED_BY VARCHAR(255), -- X-Powered-By header (if present)
|
||||
|
||||
TS_LAST_UPDATE TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP -- Timestamp of the last SSL check
|
||||
) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin;
|
||||
|
||||
|
||||
CREATE INDEX IF NOT EXISTS DOMAIN_SECURITY_INFORMATION__NODE_ID__DOMAIN_ID_IDX ON DOMAIN_SECURITY_INFORMATION (NODE_ID, DOMAIN_ID);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS DOMAIN_SECURITY_EVENTS (
|
||||
CHANGE_ID BIGINT AUTO_INCREMENT PRIMARY KEY, -- Unique identifier for the change
|
||||
DOMAIN_ID INT NOT NULL, -- Domain ID, used as a foreign key to EC_DOMAIN
|
||||
NODE_ID INT NOT NULL,
|
||||
|
||||
TS_CHANGE TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, -- Timestamp of the change
|
||||
|
||||
CHANGE_ASN BOOLEAN NOT NULL DEFAULT FALSE, -- Indicates if the change is related to ASN (Autonomous System Number)
|
||||
CHANGE_CERTIFICATE_FINGERPRINT BOOLEAN NOT NULL DEFAULT FALSE, -- Indicates if the change is related to SSL certificate fingerprint
|
||||
CHANGE_CERTIFICATE_PROFILE BOOLEAN NOT NULL DEFAULT FALSE, -- Indicates if the change is related to SSL certificate profile (e.g., algorithm, exchange)
|
||||
CHANGE_CERTIFICATE_SAN BOOLEAN NOT NULL DEFAULT FALSE, -- Indicates if the change is related to SSL certificate SAN (Subject Alternative Name)
|
||||
CHANGE_CERTIFICATE_PUBLIC_KEY BOOLEAN NOT NULL DEFAULT FALSE, -- Indicates if the change is related to SSL certificate public key
|
||||
CHANGE_SECURITY_HEADERS BOOLEAN NOT NULL DEFAULT FALSE, -- Indicates if the change is related to security headers
|
||||
CHANGE_IP_ADDRESS BOOLEAN NOT NULL DEFAULT FALSE, -- Indicates if the change is related to IP address
|
||||
CHANGE_SOFTWARE BOOLEAN NOT NULL DEFAULT FALSE, -- Indicates if the change is related to the generator (e.g., web server software)
|
||||
OLD_CERT_TIME_TO_EXPIRY INT, -- Time to expiry of the old certificate in hours, if applicable
|
||||
|
||||
SECURITY_SIGNATURE_BEFORE BLOB NOT NULL, -- Security signature before the change, gzipped json record
|
||||
SECURITY_SIGNATURE_AFTER BLOB NOT NULL -- Security signature after the change, gzipped json record
|
||||
) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin;
|
||||
|
||||
CREATE INDEX IF NOT EXISTS DOMAIN_SECURITY_EVENTS__NODE_ID__DOMAIN_ID_IDX ON DOMAIN_SECURITY_EVENTS (NODE_ID, DOMAIN_ID);
|
||||
CREATE INDEX IF NOT EXISTS DOMAIN_SECURITY_EVENTS__TS_CHANGE_IDX ON DOMAIN_SECURITY_EVENTS (TS_CHANGE);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS DOMAIN_AVAILABILITY_EVENTS (
|
||||
DOMAIN_ID INT NOT NULL,
|
||||
NODE_ID INT NOT NULL,
|
||||
|
||||
AVAILABLE BOOLEAN NOT NULL, -- True if the service is available, false if it is not
|
||||
OUTAGE_TYPE ENUM('NONE', 'TIMEOUT', 'SSL_ERROR', 'DNS_ERROR', 'CONNECTION_ERROR', 'HTTP_CLIENT_ERROR', 'HTTP_SERVER_ERROR', 'UNKNOWN') NOT NULL,
|
||||
HTTP_STATUS_CODE INT, -- HTTP status code if available (e.g., 200, 404, etc.)
|
||||
ERROR_MESSAGE VARCHAR(255), -- Specific error details
|
||||
|
||||
TS_CHANGE TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, -- Timestamp of the last update
|
||||
|
||||
AVAILABILITY_RECORD_ID BIGINT AUTO_INCREMENT,
|
||||
P_KEY_MONTH TINYINT NOT NULL DEFAULT MONTH(TS_CHANGE), -- Month of the change for partitioning
|
||||
PRIMARY KEY (AVAILABILITY_RECORD_ID, P_KEY_MONTH)
|
||||
)
|
||||
CHARACTER SET utf8mb4 COLLATE utf8mb4_bin
|
||||
PARTITION BY RANGE (P_KEY_MONTH) (
|
||||
PARTITION p0 VALUES LESS THAN (1), -- January
|
||||
PARTITION p1 VALUES LESS THAN (2), -- February
|
||||
PARTITION p2 VALUES LESS THAN (3), -- March
|
||||
PARTITION p3 VALUES LESS THAN (4), -- April
|
||||
PARTITION p4 VALUES LESS THAN (5), -- May
|
||||
PARTITION p5 VALUES LESS THAN (6), -- June
|
||||
PARTITION p6 VALUES LESS THAN (7), -- July
|
||||
PARTITION p7 VALUES LESS THAN (8), -- August
|
||||
PARTITION p8 VALUES LESS THAN (9), -- September
|
||||
PARTITION p9 VALUES LESS THAN (10), -- October
|
||||
PARTITION p10 VALUES LESS THAN (11), -- November
|
||||
PARTITION p11 VALUES LESS THAN (12) -- December
|
||||
);
|
||||
|
||||
CREATE INDEX DOMAIN_AVAILABILITY_EVENTS__DOMAIN_ID_TS_IDX ON DOMAIN_AVAILABILITY_EVENTS (DOMAIN_ID, TS_CHANGE);
|
||||
CREATE INDEX DOMAIN_AVAILABILITY_EVENTS__TS_CHANGE_IDX ON DOMAIN_AVAILABILITY_EVENTS (TS_CHANGE);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS DOMAIN_DNS_INFORMATION (
|
||||
DNS_ROOT_DOMAIN_ID INT AUTO_INCREMENT PRIMARY KEY,
|
||||
ROOT_DOMAIN_NAME VARCHAR(255) NOT NULL UNIQUE,
|
||||
NODE_AFFINITY INT NOT NULL, -- Node ID that performs the DNS check, assign randomly across nodes
|
||||
|
||||
DNS_A_RECORDS TEXT, -- JSON array of IPv4 addresses
|
||||
DNS_AAAA_RECORDS TEXT, -- JSON array of IPv6 addresses
|
||||
DNS_CNAME_RECORD VARCHAR(255), -- Canonical name (if applicable)
|
||||
DNS_MX_RECORDS TEXT, -- JSON array of mail exchange records
|
||||
DNS_CAA_RECORDS TEXT, -- Certificate Authority Authorization
|
||||
DNS_TXT_RECORDS TEXT, -- TXT records (SPF, DKIM, verification, etc.)
|
||||
DNS_NS_RECORDS TEXT, -- Name servers (JSON array)
|
||||
DNS_SOA_RECORD TEXT, -- Start of Authority (JSON object)
|
||||
|
||||
TS_LAST_DNS_CHECK TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
|
||||
TS_NEXT_DNS_CHECK TIMESTAMP NOT NULL,
|
||||
DNS_CHECK_PRIORITY TINYINT DEFAULT 0 -- Priority of the DNS check, in case we want to schedule a refresh sooner
|
||||
) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci;
|
||||
|
||||
CREATE INDEX DOMAIN_DNS_INFORMATION__PRIORITY_NEXT_CHECK_IDX ON DOMAIN_DNS_INFORMATION (NODE_AFFINITY, DNS_CHECK_PRIORITY DESC, TS_NEXT_DNS_CHECK);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS DOMAIN_DNS_EVENTS (
|
||||
DNS_ROOT_DOMAIN_ID INT NOT NULL,
|
||||
NODE_ID INT NOT NULL,
|
||||
|
||||
TS_CHANGE TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
|
||||
-- DNS change type flags
|
||||
CHANGE_A_RECORDS BOOLEAN NOT NULL DEFAULT FALSE, -- IPv4 address changes
|
||||
CHANGE_AAAA_RECORDS BOOLEAN NOT NULL DEFAULT FALSE, -- IPv6 address changes
|
||||
CHANGE_CNAME BOOLEAN NOT NULL DEFAULT FALSE, -- CNAME changes
|
||||
CHANGE_MX_RECORDS BOOLEAN NOT NULL DEFAULT FALSE, -- Mail server changes
|
||||
CHANGE_CAA_RECORDS BOOLEAN NOT NULL DEFAULT FALSE, -- Certificate authority changes
|
||||
CHANGE_TXT_RECORDS BOOLEAN NOT NULL DEFAULT FALSE, -- TXT record changes (SPF, DKIM, etc.)
|
||||
CHANGE_NS_RECORDS BOOLEAN NOT NULL DEFAULT FALSE, -- Name server changes (big red flag!)
|
||||
CHANGE_SOA_RECORD BOOLEAN NOT NULL DEFAULT FALSE, -- Start of Authority changes
|
||||
|
||||
DNS_SIGNATURE_BEFORE BLOB NOT NULL, -- Compressed JSON snapshot of DNS records before change
|
||||
DNS_SIGNATURE_AFTER BLOB NOT NULL, -- Compressed JSON snapshot of DNS records after change
|
||||
|
||||
DNS_EVENT_ID BIGINT AUTO_INCREMENT,
|
||||
P_KEY_MONTH TINYINT NOT NULL DEFAULT MONTH(TS_CHANGE), -- Month of the change for partitioning
|
||||
PRIMARY KEY (DNS_EVENT_ID, P_KEY_MONTH)
|
||||
)
|
||||
CHARACTER SET utf8mb4 COLLATE utf8mb4_bin
|
||||
PARTITION BY RANGE (P_KEY_MONTH) (
|
||||
PARTITION p0 VALUES LESS THAN (1), -- January
|
||||
PARTITION p1 VALUES LESS THAN (2), -- February
|
||||
PARTITION p2 VALUES LESS THAN (3), -- March
|
||||
PARTITION p3 VALUES LESS THAN (4), -- April
|
||||
PARTITION p4 VALUES LESS THAN (5), -- May
|
||||
PARTITION p5 VALUES LESS THAN (6), -- June
|
||||
PARTITION p6 VALUES LESS THAN (7), -- July
|
||||
PARTITION p7 VALUES LESS THAN (8), -- August
|
||||
PARTITION p8 VALUES LESS THAN (9), -- September
|
||||
PARTITION p9 VALUES LESS THAN (10), -- October
|
||||
PARTITION p10 VALUES LESS THAN (11), -- November
|
||||
PARTITION p11 VALUES LESS THAN (12) -- December
|
||||
);
|
||||
|
||||
CREATE INDEX DOMAIN_DNS_EVENTS__DNS_ROOT_DOMAIN_ID_TS_IDX ON DOMAIN_DNS_EVENTS (DNS_ROOT_DOMAIN_ID, TS_CHANGE);
|
||||
CREATE INDEX DOMAIN_DNS_EVENTS__TS_CHANGE_IDX ON DOMAIN_DNS_EVENTS (TS_CHANGE);
|
@@ -112,14 +112,6 @@ public class EdgeDomain implements Serializable {
|
||||
return topDomain;
|
||||
}
|
||||
|
||||
public String getDomainKey() {
|
||||
int cutPoint = topDomain.indexOf('.');
|
||||
if (cutPoint < 0) {
|
||||
return topDomain;
|
||||
}
|
||||
return topDomain.substring(0, cutPoint).toLowerCase();
|
||||
}
|
||||
|
||||
/** If possible, try to provide an alias domain,
|
||||
* i.e. a domain name that is very likely to link to this one
|
||||
* */
|
||||
|
@@ -6,11 +6,20 @@ import nu.marginalia.model.EdgeDomain;
|
||||
import nu.marginalia.model.EdgeUrl;
|
||||
|
||||
import java.net.URISyntaxException;
|
||||
import java.time.Instant;
|
||||
|
||||
public class GsonFactory {
|
||||
public static Gson get() {
|
||||
return new GsonBuilder()
|
||||
.registerTypeAdapterFactory(RecordTypeAdapterFactory.builder().allowMissingComponentValues().create())
|
||||
.registerTypeAdapter(Instant.class, (JsonSerializer<Instant>) (src, typeOfSrc, context) -> new JsonPrimitive(src.toEpochMilli()))
|
||||
.registerTypeAdapter(Instant.class, (JsonDeserializer<Instant>) (json, typeOfT, context) -> {
|
||||
if (json.isJsonPrimitive() && json.getAsJsonPrimitive().isNumber()) {
|
||||
return Instant.ofEpochMilli(json.getAsLong());
|
||||
} else {
|
||||
throw new JsonParseException("Expected a number for Instant");
|
||||
}
|
||||
})
|
||||
.registerTypeAdapter(EdgeUrl.class, (JsonSerializer<EdgeUrl>) (src, typeOfSrc, context) -> new JsonPrimitive(src.toString()))
|
||||
.registerTypeAdapter(EdgeDomain.class, (JsonSerializer<EdgeDomain>) (src, typeOfSrc, context) -> new JsonPrimitive(src.toString()))
|
||||
.registerTypeAdapter(EdgeUrl.class, (JsonDeserializer<EdgeUrl>) (json, typeOfT, context) -> {
|
||||
|
@@ -8,14 +8,6 @@ import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
|
||||
class EdgeDomainTest {
|
||||
|
||||
@Test
|
||||
public void testSkepdic() throws URISyntaxException {
|
||||
var domain = new EdgeUrl("http://www.skepdic.com/astrology.html");
|
||||
assertEquals("skepdic", domain.getDomain().getDomainKey());
|
||||
var domain2 = new EdgeUrl("http://skepdic.com/astrology.html");
|
||||
assertEquals("skepdic", domain2.getDomain().getDomainKey());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHkDomain() throws URISyntaxException {
|
||||
var domain = new EdgeUrl("http://l7072i3.l7c.net");
|
||||
|
@@ -0,0 +1,59 @@
|
||||
package nu.marginalia.process.control;
|
||||
|
||||
import com.google.inject.Inject;
|
||||
import com.google.inject.Singleton;
|
||||
import com.zaxxer.hikari.HikariDataSource;
|
||||
import nu.marginalia.process.ProcessConfiguration;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.sql.SQLException;
|
||||
import java.util.Objects;
|
||||
import java.util.UUID;
|
||||
|
||||
@Singleton
|
||||
public class ProcessEventLog {
|
||||
private final HikariDataSource dataSource;
|
||||
|
||||
private final Logger logger = LoggerFactory.getLogger(ProcessEventLog.class);
|
||||
|
||||
private final String serviceName;
|
||||
private final UUID instanceUuid;
|
||||
private final String serviceBase;
|
||||
|
||||
@Inject
|
||||
public ProcessEventLog(HikariDataSource dataSource, ProcessConfiguration configuration) {
|
||||
this.dataSource = dataSource;
|
||||
|
||||
this.serviceName = configuration.processName() + ":" + configuration.node();
|
||||
this.instanceUuid = configuration.instanceUuid();
|
||||
this.serviceBase = configuration.processName();
|
||||
|
||||
logger.info("Starting service {} instance {}", serviceName, instanceUuid);
|
||||
|
||||
logEvent("PCS-START", serviceName);
|
||||
}
|
||||
|
||||
public void logEvent(Class<?> type, String message) {
|
||||
logEvent(type.getSimpleName(), message);
|
||||
}
|
||||
public void logEvent(String type, String message) {
|
||||
|
||||
try (var conn = dataSource.getConnection();
|
||||
var stmt = conn.prepareStatement("""
|
||||
INSERT INTO SERVICE_EVENTLOG(SERVICE_NAME, SERVICE_BASE, INSTANCE, EVENT_TYPE, EVENT_MESSAGE)
|
||||
VALUES (?, ?, ?, ?, ?)
|
||||
""")) {
|
||||
stmt.setString(1, serviceName);
|
||||
stmt.setString(2, serviceBase);
|
||||
stmt.setString(3, instanceUuid.toString());
|
||||
stmt.setString(4, type);
|
||||
stmt.setString(5, Objects.requireNonNull(message, ""));
|
||||
|
||||
stmt.executeUpdate();
|
||||
}
|
||||
catch (SQLException ex) {
|
||||
logger.error("Failed to log event {}:{}", type, message);
|
||||
}
|
||||
}
|
||||
}
|
@@ -1,17 +1,23 @@
|
||||
package nu.marginalia.service.discovery;
|
||||
|
||||
import nu.marginalia.service.discovery.monitor.*;
|
||||
import com.google.inject.ImplementedBy;
|
||||
import nu.marginalia.service.discovery.monitor.ServiceChangeMonitor;
|
||||
import nu.marginalia.service.discovery.monitor.ServiceMonitorIf;
|
||||
import nu.marginalia.service.discovery.property.ServiceEndpoint;
|
||||
import static nu.marginalia.service.discovery.property.ServiceEndpoint.*;
|
||||
|
||||
import nu.marginalia.service.discovery.property.ServiceKey;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.UUID;
|
||||
import java.util.function.BiConsumer;
|
||||
import java.util.function.Consumer;
|
||||
|
||||
import static nu.marginalia.service.discovery.property.ServiceEndpoint.InstanceAddress;
|
||||
|
||||
/** A service registry that allows services to register themselves and
|
||||
* be discovered by other services on the network.
|
||||
*/
|
||||
@ImplementedBy(ZkServiceRegistry.class)
|
||||
public interface ServiceRegistryIf {
|
||||
/**
|
||||
* Register a service with the registry.
|
||||
@@ -57,4 +63,9 @@ public interface ServiceRegistryIf {
|
||||
* </ul>
|
||||
* */
|
||||
void registerMonitor(ServiceMonitorIf monitor) throws Exception;
|
||||
|
||||
void registerProcess(String processName, int nodeId);
|
||||
void deregisterProcess(String processName, int nodeId);
|
||||
void watchProcess(String processName, int nodeId, Consumer<Boolean> callback) throws Exception;
|
||||
void watchProcessAnyNode(String processName, Collection<Integer> nodes, BiConsumer<Boolean, Integer> callback) throws Exception;
|
||||
}
|
||||
|
@@ -13,11 +13,10 @@ import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Random;
|
||||
import java.util.UUID;
|
||||
import java.util.*;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.function.BiConsumer;
|
||||
import java.util.function.Consumer;
|
||||
|
||||
import static nu.marginalia.service.discovery.property.ServiceEndpoint.InstanceAddress;
|
||||
|
||||
@@ -256,6 +255,90 @@ public class ZkServiceRegistry implements ServiceRegistryIf {
|
||||
.forPath("/running-instances");
|
||||
}
|
||||
|
||||
@Override
|
||||
public void registerProcess(String processName, int nodeId) {
|
||||
String path = "/process-locks/" + processName + "/" + nodeId;
|
||||
try {
|
||||
curatorFramework.create()
|
||||
.creatingParentsIfNeeded()
|
||||
.withMode(CreateMode.EPHEMERAL)
|
||||
.forPath(path);
|
||||
livenessPaths.add(path);
|
||||
}
|
||||
catch (Exception ex) {
|
||||
logger.error("Failed to register process {} on node {}", processName, nodeId, ex);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void deregisterProcess(String processName, int nodeId) {
|
||||
String path = "/process-locks/" + processName + "/" + nodeId;
|
||||
try {
|
||||
curatorFramework.delete().forPath(path);
|
||||
livenessPaths.remove(path);
|
||||
}
|
||||
catch (Exception ex) {
|
||||
logger.error("Failed to deregister process {} on node {}", processName, nodeId, ex);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void watchProcess(String processName, int nodeId, Consumer<Boolean> callback) throws Exception {
|
||||
String path = "/process-locks/" + processName + "/" + nodeId;
|
||||
|
||||
// first check if the path exists and call the callback accordingly
|
||||
|
||||
if (curatorFramework.checkExists().forPath(path) != null) {
|
||||
callback.accept(true);
|
||||
}
|
||||
else {
|
||||
callback.accept(false);
|
||||
}
|
||||
|
||||
curatorFramework.watchers().add()
|
||||
.usingWatcher((Watcher) change -> {
|
||||
Watcher.Event.EventType type = change.getType();
|
||||
|
||||
if (type == Watcher.Event.EventType.NodeCreated) {
|
||||
callback.accept(true);
|
||||
}
|
||||
if (type == Watcher.Event.EventType.NodeDeleted) {
|
||||
callback.accept(false);
|
||||
}
|
||||
})
|
||||
.forPath(path);
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public void watchProcessAnyNode(String processName, Collection<Integer> nodes, BiConsumer<Boolean, Integer> callback) throws Exception {
|
||||
|
||||
for (int node : nodes) {
|
||||
String path = "/process-locks/" + processName + "/" + node;
|
||||
|
||||
// first check if the path exists and call the callback accordingly
|
||||
if (curatorFramework.checkExists().forPath(path) != null) {
|
||||
callback.accept(true, node);
|
||||
}
|
||||
else {
|
||||
callback.accept(false, node);
|
||||
}
|
||||
|
||||
curatorFramework.watchers().add()
|
||||
.usingWatcher((Watcher) change -> {
|
||||
Watcher.Event.EventType type = change.getType();
|
||||
|
||||
if (type == Watcher.Event.EventType.NodeCreated) {
|
||||
callback.accept(true, node);
|
||||
}
|
||||
if (type == Watcher.Event.EventType.NodeDeleted) {
|
||||
callback.accept(false, node);
|
||||
}
|
||||
})
|
||||
.forPath(path);
|
||||
}
|
||||
}
|
||||
|
||||
/* Exposed for tests */
|
||||
public synchronized void shutDown() {
|
||||
if (stopped)
|
||||
|
@@ -19,6 +19,7 @@ dependencies {
|
||||
implementation project(':code:processes:crawling-process')
|
||||
implementation project(':code:processes:live-crawling-process')
|
||||
implementation project(':code:processes:loading-process')
|
||||
implementation project(':code:processes:ping-process')
|
||||
implementation project(':code:processes:converting-process')
|
||||
implementation project(':code:processes:index-constructor-process')
|
||||
|
||||
@@ -37,6 +38,7 @@ dependencies {
|
||||
implementation project(':code:functions:link-graph:api')
|
||||
implementation project(':code:functions:live-capture:api')
|
||||
implementation project(':code:functions:search-query')
|
||||
implementation project(':code:functions:nsfw-domain-filter')
|
||||
implementation project(':code:execution:api')
|
||||
|
||||
implementation project(':code:processes:crawling-process:model')
|
||||
|
@@ -6,11 +6,13 @@ import java.util.Set;
|
||||
|
||||
public enum ExecutorActor {
|
||||
PREC_EXPORT_ALL(NodeProfile.BATCH_CRAWL, NodeProfile.MIXED),
|
||||
SYNC_NSFW_LISTS(NodeProfile.BATCH_CRAWL, NodeProfile.MIXED),
|
||||
|
||||
CRAWL(NodeProfile.BATCH_CRAWL, NodeProfile.MIXED),
|
||||
RECRAWL(NodeProfile.BATCH_CRAWL, NodeProfile.MIXED),
|
||||
RECRAWL_SINGLE_DOMAIN(NodeProfile.BATCH_CRAWL, NodeProfile.MIXED),
|
||||
PROC_CRAWLER_SPAWNER(NodeProfile.BATCH_CRAWL, NodeProfile.MIXED),
|
||||
PROC_PING_SPAWNER(NodeProfile.BATCH_CRAWL, NodeProfile.MIXED, NodeProfile.SIDELOAD),
|
||||
PROC_EXPORT_TASKS_SPAWNER(NodeProfile.BATCH_CRAWL, NodeProfile.MIXED),
|
||||
ADJACENCY_CALCULATION(NodeProfile.BATCH_CRAWL, NodeProfile.MIXED),
|
||||
EXPORT_DATA(NodeProfile.BATCH_CRAWL, NodeProfile.MIXED),
|
||||
@@ -35,7 +37,8 @@ public enum ExecutorActor {
|
||||
LIVE_CRAWL(NodeProfile.REALTIME),
|
||||
PROC_LIVE_CRAWL_SPAWNER(NodeProfile.REALTIME),
|
||||
SCRAPE_FEEDS(NodeProfile.REALTIME),
|
||||
UPDATE_RSS(NodeProfile.REALTIME);
|
||||
UPDATE_RSS(NodeProfile.REALTIME)
|
||||
;
|
||||
|
||||
public String id() {
|
||||
return "fsm:" + name().toLowerCase();
|
||||
|
@@ -49,6 +49,7 @@ public class ExecutorActorControlService {
|
||||
RecrawlSingleDomainActor recrawlSingleDomainActor,
|
||||
RestoreBackupActor restoreBackupActor,
|
||||
ConverterMonitorActor converterMonitorFSM,
|
||||
PingMonitorActor pingMonitorActor,
|
||||
CrawlerMonitorActor crawlerMonitorActor,
|
||||
LiveCrawlerMonitorActor liveCrawlerMonitorActor,
|
||||
LoaderMonitorActor loaderMonitor,
|
||||
@@ -68,6 +69,7 @@ public class ExecutorActorControlService {
|
||||
ExecutorActorStateMachines stateMachines,
|
||||
MigrateCrawlDataActor migrateCrawlDataActor,
|
||||
ExportAllPrecessionActor exportAllPrecessionActor,
|
||||
UpdateNsfwFiltersActor updateNsfwFiltersActor,
|
||||
UpdateRssActor updateRssActor) throws SQLException {
|
||||
this.messageQueueFactory = messageQueueFactory;
|
||||
this.eventLog = baseServiceParams.eventLog;
|
||||
@@ -88,6 +90,7 @@ public class ExecutorActorControlService {
|
||||
register(ExecutorActor.PROC_CONVERTER_SPAWNER, converterMonitorFSM);
|
||||
register(ExecutorActor.PROC_LOADER_SPAWNER, loaderMonitor);
|
||||
register(ExecutorActor.PROC_CRAWLER_SPAWNER, crawlerMonitorActor);
|
||||
register(ExecutorActor.PROC_PING_SPAWNER, pingMonitorActor);
|
||||
register(ExecutorActor.PROC_LIVE_CRAWL_SPAWNER, liveCrawlerMonitorActor);
|
||||
register(ExecutorActor.PROC_EXPORT_TASKS_SPAWNER, exportTasksMonitorActor);
|
||||
|
||||
@@ -109,6 +112,7 @@ public class ExecutorActorControlService {
|
||||
register(ExecutorActor.UPDATE_RSS, updateRssActor);
|
||||
|
||||
register(ExecutorActor.MIGRATE_CRAWL_DATA, migrateCrawlDataActor);
|
||||
register(ExecutorActor.SYNC_NSFW_LISTS, updateNsfwFiltersActor);
|
||||
|
||||
if (serviceConfiguration.node() == 1) {
|
||||
register(ExecutorActor.PREC_EXPORT_ALL, exportAllPrecessionActor);
|
||||
|
@@ -0,0 +1,26 @@
|
||||
package nu.marginalia.actor.proc;
|
||||
|
||||
import com.google.gson.Gson;
|
||||
import com.google.inject.Inject;
|
||||
import com.google.inject.Singleton;
|
||||
import nu.marginalia.actor.monitor.AbstractProcessSpawnerActor;
|
||||
import nu.marginalia.mq.persistence.MqPersistence;
|
||||
import nu.marginalia.mqapi.ProcessInboxNames;
|
||||
import nu.marginalia.process.ProcessService;
|
||||
import nu.marginalia.service.module.ServiceConfiguration;
|
||||
|
||||
@Singleton
|
||||
public class PingMonitorActor extends AbstractProcessSpawnerActor {
|
||||
|
||||
@Inject
|
||||
public PingMonitorActor(Gson gson, ServiceConfiguration configuration, MqPersistence persistence, ProcessService processService) {
|
||||
super(gson,
|
||||
configuration,
|
||||
persistence,
|
||||
processService,
|
||||
ProcessInboxNames.PING_INBOX,
|
||||
ProcessService.ProcessId.PING);
|
||||
}
|
||||
|
||||
|
||||
}
|
@@ -0,0 +1,53 @@
|
||||
package nu.marginalia.actor.task;
|
||||
|
||||
import com.google.gson.Gson;
|
||||
import com.google.inject.Inject;
|
||||
import com.google.inject.Singleton;
|
||||
import nu.marginalia.actor.prototype.RecordActorPrototype;
|
||||
import nu.marginalia.actor.state.ActorStep;
|
||||
import nu.marginalia.nsfw.NsfwDomainFilter;
|
||||
import nu.marginalia.service.module.ServiceConfiguration;
|
||||
|
||||
@Singleton
|
||||
public class UpdateNsfwFiltersActor extends RecordActorPrototype {
|
||||
private final ServiceConfiguration serviceConfiguration;
|
||||
private final NsfwDomainFilter nsfwDomainFilter;
|
||||
|
||||
public record Initial() implements ActorStep {}
|
||||
public record Run() implements ActorStep {}
|
||||
|
||||
@Override
|
||||
public ActorStep transition(ActorStep self) throws Exception {
|
||||
return switch(self) {
|
||||
case Initial() -> {
|
||||
if (serviceConfiguration.node() != 1) {
|
||||
yield new Error("This actor can only run on node 1");
|
||||
}
|
||||
else {
|
||||
yield new Run();
|
||||
}
|
||||
}
|
||||
case Run() -> {
|
||||
nsfwDomainFilter.fetchLists();
|
||||
yield new End();
|
||||
}
|
||||
default -> new Error();
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public String describe() {
|
||||
return "Sync NSFW filters";
|
||||
}
|
||||
|
||||
@Inject
|
||||
public UpdateNsfwFiltersActor(Gson gson,
|
||||
ServiceConfiguration serviceConfiguration,
|
||||
NsfwDomainFilter nsfwDomainFilter)
|
||||
{
|
||||
super(gson);
|
||||
this.serviceConfiguration = serviceConfiguration;
|
||||
this.nsfwDomainFilter = nsfwDomainFilter;
|
||||
}
|
||||
|
||||
}
|
@@ -8,6 +8,7 @@ import nu.marginalia.crawl.CrawlerMain;
|
||||
import nu.marginalia.index.IndexConstructorMain;
|
||||
import nu.marginalia.livecrawler.LiveCrawlerMain;
|
||||
import nu.marginalia.loading.LoaderMain;
|
||||
import nu.marginalia.ping.PingMain;
|
||||
import nu.marginalia.service.control.ServiceEventLog;
|
||||
import nu.marginalia.service.server.BaseServiceParams;
|
||||
import nu.marginalia.task.ExportTasksMain;
|
||||
@@ -41,6 +42,7 @@ public class ProcessService {
|
||||
return switch (id) {
|
||||
case "converter" -> ProcessId.CONVERTER;
|
||||
case "crawler" -> ProcessId.CRAWLER;
|
||||
case "ping" -> ProcessId.PING;
|
||||
case "loader" -> ProcessId.LOADER;
|
||||
case "export-tasks" -> ProcessId.EXPORT_TASKS;
|
||||
case "index-constructor" -> ProcessId.INDEX_CONSTRUCTOR;
|
||||
@@ -50,6 +52,7 @@ public class ProcessService {
|
||||
|
||||
public enum ProcessId {
|
||||
CRAWLER(CrawlerMain.class),
|
||||
PING(PingMain.class),
|
||||
LIVE_CRAWLER(LiveCrawlerMain.class),
|
||||
CONVERTER(ConverterMain.class),
|
||||
LOADER(LoaderMain.class),
|
||||
@@ -68,6 +71,7 @@ public class ProcessService {
|
||||
case LIVE_CRAWLER -> "LIVE_CRAWLER_PROCESS_OPTS";
|
||||
case CONVERTER -> "CONVERTER_PROCESS_OPTS";
|
||||
case LOADER -> "LOADER_PROCESS_OPTS";
|
||||
case PING -> "PING_PROCESS_OPTS";
|
||||
case INDEX_CONSTRUCTOR -> "INDEX_CONSTRUCTION_PROCESS_OPTS";
|
||||
case EXPORT_TASKS -> "EXPORT_TASKS_PROCESS_OPTS";
|
||||
};
|
||||
|
@@ -25,9 +25,9 @@ dependencies {
|
||||
|
||||
implementation project(':code:execution:api')
|
||||
implementation project(':code:processes:crawling-process:ft-content-type')
|
||||
implementation project(':third-party:rssreader')
|
||||
|
||||
implementation libs.jsoup
|
||||
implementation project(':third-party:rssreader')
|
||||
implementation libs.opencsv
|
||||
implementation libs.slop
|
||||
implementation libs.sqlite
|
||||
@@ -57,8 +57,6 @@ dependencies {
|
||||
implementation libs.bundles.gson
|
||||
implementation libs.bundles.mariadb
|
||||
|
||||
|
||||
|
||||
testImplementation libs.bundles.slf4j.test
|
||||
testImplementation libs.bundles.junit
|
||||
testImplementation libs.mockito
|
||||
|
@@ -0,0 +1,126 @@
|
||||
package nu.marginalia.domsample;
|
||||
|
||||
import com.google.inject.Inject;
|
||||
import com.zaxxer.hikari.HikariDataSource;
|
||||
import jakarta.inject.Named;
|
||||
import nu.marginalia.domsample.db.DomSampleDb;
|
||||
import nu.marginalia.livecapture.BrowserlessClient;
|
||||
import nu.marginalia.service.module.ServiceConfiguration;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.net.URI;
|
||||
import java.net.URISyntaxException;
|
||||
import java.time.Duration;
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
public class DomSampleService {
|
||||
private final DomSampleDb db;
|
||||
private final HikariDataSource mariadbDataSource;
|
||||
private final URI browserlessURI;
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(DomSampleService.class);
|
||||
|
||||
@Inject
|
||||
public DomSampleService(DomSampleDb db,
|
||||
HikariDataSource mariadbDataSource,
|
||||
@Named("browserless-uri") String browserlessAddress,
|
||||
ServiceConfiguration serviceConfiguration)
|
||||
throws URISyntaxException
|
||||
{
|
||||
this.db = db;
|
||||
this.mariadbDataSource = mariadbDataSource;
|
||||
|
||||
if (StringUtils.isEmpty(browserlessAddress) || serviceConfiguration.node() > 1) {
|
||||
logger.warn("Live capture service will not run");
|
||||
browserlessURI = null;
|
||||
}
|
||||
else {
|
||||
browserlessURI = new URI(browserlessAddress);
|
||||
}
|
||||
}
|
||||
|
||||
public void start() {
|
||||
if (browserlessURI == null) {
|
||||
logger.warn("DomSampleService is not enabled due to missing browserless URI or multi-node configuration");
|
||||
return;
|
||||
}
|
||||
|
||||
Thread.ofPlatform().daemon().start(this::run);
|
||||
}
|
||||
|
||||
public void syncDomains() {
|
||||
Set<String> dbDomains = new HashSet<>();
|
||||
|
||||
logger.info("Fetching domains from database...");
|
||||
|
||||
try (var conn = mariadbDataSource.getConnection();
|
||||
var stmt = conn.prepareStatement("""
|
||||
SELECT DOMAIN_NAME
|
||||
FROM EC_DOMAIN
|
||||
WHERE NODE_AFFINITY>0
|
||||
""")
|
||||
) {
|
||||
var rs = stmt.executeQuery();
|
||||
while (rs.next()) {
|
||||
dbDomains.add(rs.getString("DOMAIN_NAME"));
|
||||
}
|
||||
} catch (Exception e) {
|
||||
throw new RuntimeException("Failed to sync domains", e);
|
||||
}
|
||||
|
||||
logger.info("Found {} domains in database", dbDomains.size());
|
||||
|
||||
db.syncDomains(dbDomains);
|
||||
|
||||
logger.info("Synced domains to sqlite");
|
||||
}
|
||||
|
||||
public void run() {
|
||||
|
||||
try (var client = new BrowserlessClient(browserlessURI)) {
|
||||
|
||||
while (!Thread.currentThread().isInterrupted()) {
|
||||
|
||||
try {
|
||||
// Grace sleep in case we're operating on an empty domain list
|
||||
TimeUnit.SECONDS.sleep(15);
|
||||
|
||||
syncDomains();
|
||||
var domains = db.getScheduledDomains();
|
||||
|
||||
for (var domain : domains) {
|
||||
updateDomain(client, domain);
|
||||
}
|
||||
} catch (InterruptedException e) {
|
||||
Thread.currentThread().interrupt();
|
||||
logger.info("DomSampleService interrupted, stopping...");
|
||||
return;
|
||||
} catch (Exception e) {
|
||||
logger.error("Error in DomSampleService run loop", e);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
private void updateDomain(BrowserlessClient client, String domain) {
|
||||
var rootUrl = "https://" + domain + "/";
|
||||
try {
|
||||
var content = client.annotatedContent(rootUrl, new BrowserlessClient.GotoOptions("load", Duration.ofSeconds(10).toMillis()));
|
||||
|
||||
if (content.isPresent()) {
|
||||
db.saveSample(domain, rootUrl, content.get());
|
||||
}
|
||||
} catch (Exception e) {
|
||||
logger.error("Failed to process domain: " + domain, e);
|
||||
}
|
||||
finally {
|
||||
db.flagDomainAsFetched(domain);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@@ -0,0 +1,174 @@
|
||||
package nu.marginalia.domsample.db;
|
||||
|
||||
import nu.marginalia.WmsaHome;
|
||||
import org.jsoup.Jsoup;
|
||||
|
||||
import java.nio.file.Path;
|
||||
import java.sql.Connection;
|
||||
import java.sql.DriverManager;
|
||||
import java.sql.SQLException;
|
||||
import java.util.*;
|
||||
|
||||
public class DomSampleDb implements AutoCloseable {
|
||||
private static final String dbFileName = "dom-sample.db";
|
||||
private final Connection connection;
|
||||
|
||||
public DomSampleDb() throws SQLException{
|
||||
this(WmsaHome.getDataPath().resolve(dbFileName));
|
||||
}
|
||||
|
||||
public DomSampleDb(Path dbPath) throws SQLException {
|
||||
String dbUrl = "jdbc:sqlite:" + dbPath.toAbsolutePath();
|
||||
|
||||
connection = DriverManager.getConnection(dbUrl);
|
||||
|
||||
try (var stmt = connection.createStatement()) {
|
||||
stmt.executeUpdate("CREATE TABLE IF NOT EXISTS samples (url TEXT PRIMARY KEY, domain TEXT, sample BLOB, requests BLOB, accepted_popover BOOLEAN DEFAULT FALSE)");
|
||||
stmt.executeUpdate("CREATE INDEX IF NOT EXISTS domain_index ON samples (domain)");
|
||||
stmt.executeUpdate("CREATE TABLE IF NOT EXISTS schedule (domain TEXT PRIMARY KEY, last_fetch TIMESTAMP DEFAULT NULL)");
|
||||
stmt.execute("PRAGMA journal_mode=WAL");
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public void syncDomains(Set<String> domains) {
|
||||
Set<String> currentDomains = new HashSet<>();
|
||||
try (var stmt = connection.prepareStatement("SELECT domain FROM schedule")) {
|
||||
var rs = stmt.executeQuery();
|
||||
while (rs.next()) {
|
||||
currentDomains.add(rs.getString("domain"));
|
||||
}
|
||||
} catch (SQLException e) {
|
||||
throw new RuntimeException("Failed to sync domains", e);
|
||||
}
|
||||
|
||||
Set<String> toRemove = new HashSet<>(currentDomains);
|
||||
Set<String> toAdd = new HashSet<>(domains);
|
||||
|
||||
toRemove.removeAll(domains);
|
||||
toAdd.removeAll(currentDomains);
|
||||
|
||||
try (var removeStmt = connection.prepareStatement("DELETE FROM schedule WHERE domain = ?");
|
||||
var addStmt = connection.prepareStatement("INSERT OR IGNORE INTO schedule (domain) VALUES (?)")
|
||||
) {
|
||||
for (String domain : toRemove) {
|
||||
removeStmt.setString(1, domain);
|
||||
removeStmt.executeUpdate();
|
||||
}
|
||||
|
||||
for (String domain : toAdd) {
|
||||
addStmt.setString(1, domain);
|
||||
addStmt.executeUpdate();
|
||||
}
|
||||
} catch (SQLException e) {
|
||||
throw new RuntimeException("Failed to remove domains", e);
|
||||
}
|
||||
}
|
||||
|
||||
public List<String> getScheduledDomains() {
|
||||
List<String> domains = new ArrayList<>();
|
||||
try (var stmt = connection.prepareStatement("SELECT domain FROM schedule ORDER BY last_fetch IS NULL DESC, last_fetch ASC")) {
|
||||
var rs = stmt.executeQuery();
|
||||
while (rs.next()) {
|
||||
domains.add(rs.getString("domain"));
|
||||
}
|
||||
} catch (SQLException e) {
|
||||
throw new RuntimeException("Failed to get scheduled domains", e);
|
||||
}
|
||||
return domains;
|
||||
}
|
||||
|
||||
public void flagDomainAsFetched(String domain) {
|
||||
try (var stmt = connection.prepareStatement("INSERT OR REPLACE INTO schedule (domain, last_fetch) VALUES (?, CURRENT_TIMESTAMP)")) {
|
||||
stmt.setString(1, domain);
|
||||
stmt.executeUpdate();
|
||||
} catch (SQLException e) {
|
||||
throw new RuntimeException("Failed to flag domain as fetched", e);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public record Sample(String url, String domain, String sample, String requests, boolean acceptedPopover) {}
|
||||
|
||||
public List<Sample> getSamples(String domain) throws SQLException {
|
||||
List<Sample> samples = new ArrayList<>();
|
||||
|
||||
try (var stmt = connection.prepareStatement("""
|
||||
SELECT url, sample, requests, accepted_popover
|
||||
FROM samples
|
||||
WHERE domain = ?
|
||||
"""))
|
||||
{
|
||||
stmt.setString(1, domain);
|
||||
var rs = stmt.executeQuery();
|
||||
while (rs.next()) {
|
||||
samples.add(
|
||||
new Sample(
|
||||
rs.getString("url"),
|
||||
domain,
|
||||
rs.getString("sample"),
|
||||
rs.getString("requests"),
|
||||
rs.getBoolean("accepted_popover")
|
||||
)
|
||||
);
|
||||
}
|
||||
}
|
||||
return samples;
|
||||
}
|
||||
|
||||
public void saveSample(String domain, String url, String rawContent) throws SQLException {
|
||||
var doc = Jsoup.parse(rawContent);
|
||||
|
||||
var networkRequests = doc.getElementById("marginalia-network-requests");
|
||||
|
||||
boolean acceptedPopover = false;
|
||||
|
||||
StringBuilder requestTsv = new StringBuilder();
|
||||
if (networkRequests != null) {
|
||||
|
||||
acceptedPopover = !networkRequests.getElementsByClass("marginalia-agreed-cookies").isEmpty();
|
||||
|
||||
for (var request : networkRequests.getElementsByClass("network-request")) {
|
||||
String method = request.attr("data-method");
|
||||
String urlAttr = request.attr("data-url");
|
||||
String timestamp = request.attr("data-timestamp");
|
||||
|
||||
requestTsv
|
||||
.append(method)
|
||||
.append('\t')
|
||||
.append(timestamp)
|
||||
.append('\t')
|
||||
.append(urlAttr.replace('\n', ' '))
|
||||
.append("\n");
|
||||
}
|
||||
|
||||
networkRequests.remove();
|
||||
}
|
||||
|
||||
doc.body().removeAttr("id");
|
||||
|
||||
String sample = doc.html();
|
||||
|
||||
saveSampleRaw(domain, url, sample, requestTsv.toString().trim(), acceptedPopover);
|
||||
|
||||
}
|
||||
|
||||
public void saveSampleRaw(String domain, String url, String sample, String requests, boolean acceptedPopover) throws SQLException {
|
||||
try (var stmt = connection.prepareStatement("""
|
||||
INSERT OR REPLACE
|
||||
INTO samples (domain, url, sample, requests, accepted_popover)
|
||||
VALUES (?, ?, ?, ?, ?)
|
||||
""")) {
|
||||
stmt.setString(1, domain);
|
||||
stmt.setString(2, url);
|
||||
stmt.setString(3, sample);
|
||||
stmt.setString(4, requests);
|
||||
stmt.setBoolean(5, acceptedPopover);
|
||||
stmt.executeUpdate();
|
||||
}
|
||||
}
|
||||
|
||||
public void close() throws SQLException {
|
||||
connection.close();
|
||||
}
|
||||
}
|
@@ -8,10 +8,13 @@ import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.URI;
|
||||
import java.net.URLEncoder;
|
||||
import java.net.http.HttpClient;
|
||||
import java.net.http.HttpRequest;
|
||||
import java.net.http.HttpResponse;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.time.Duration;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Optional;
|
||||
|
||||
@@ -60,6 +63,42 @@ public class BrowserlessClient implements AutoCloseable {
|
||||
return Optional.of(rsp.body());
|
||||
}
|
||||
|
||||
/** Fetches content with a marginalia hack extension loaded that decorates the DOM with attributes for
|
||||
* certain CSS attributes, to be able to easier identify popovers and other nuisance elements.
|
||||
*/
|
||||
public Optional<String> annotatedContent(String url, GotoOptions gotoOptions) throws IOException, InterruptedException {
|
||||
Map<String, Object> requestData = Map.of(
|
||||
"url", url,
|
||||
"userAgent", userAgent,
|
||||
"gotoOptions", gotoOptions,
|
||||
"waitForSelector", Map.of("selector", "#marginaliahack", "timeout", 15000)
|
||||
);
|
||||
|
||||
// Launch parameters for the browserless instance to load the extension
|
||||
Map<String, Object> launchParameters = Map.of(
|
||||
"args", List.of("--load-extension=/dom-export")
|
||||
);
|
||||
|
||||
String launchParametersStr = URLEncoder.encode(gson.toJson(launchParameters), StandardCharsets.UTF_8);
|
||||
|
||||
var request = HttpRequest.newBuilder()
|
||||
.uri(browserlessURI.resolve("/content?token="+BROWSERLESS_TOKEN+"&launch="+launchParametersStr))
|
||||
.method("POST", HttpRequest.BodyPublishers.ofString(
|
||||
gson.toJson(requestData)
|
||||
))
|
||||
.header("Content-type", "application/json")
|
||||
.build();
|
||||
|
||||
var rsp = httpClient.send(request, HttpResponse.BodyHandlers.ofString());
|
||||
|
||||
if (rsp.statusCode() >= 300) {
|
||||
logger.info("Failed to fetch annotated content for {}, status {}", url, rsp.statusCode());
|
||||
return Optional.empty();
|
||||
}
|
||||
|
||||
return Optional.of(rsp.body());
|
||||
}
|
||||
|
||||
public byte[] screenshot(String url, GotoOptions gotoOptions, ScreenshotOptions screenshotOptions)
|
||||
throws IOException, InterruptedException {
|
||||
|
||||
|
@@ -126,7 +126,6 @@ public class LiveCaptureGrpcService
|
||||
}
|
||||
else {
|
||||
EdgeDomain domain = domainNameOpt.get();
|
||||
String domainNameStr = domain.toString();
|
||||
|
||||
if (!isValidDomainForCapture(domain)) {
|
||||
ScreenshotDbOperations.flagDomainAsFetched(conn, domain);
|
||||
|
@@ -0,0 +1,113 @@
|
||||
package nu.marginalia.domsample.db;
|
||||
|
||||
import org.junit.jupiter.api.AfterEach;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.testcontainers.shaded.org.apache.commons.io.FileUtils;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.*;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.*;
|
||||
|
||||
class DomSampleDbTest {
|
||||
Path tempDir;
|
||||
|
||||
@BeforeEach
|
||||
void setUp() throws Exception {
|
||||
tempDir = Files.createTempDirectory("test");
|
||||
}
|
||||
|
||||
@AfterEach
|
||||
void tearDown() throws IOException {
|
||||
FileUtils.deleteDirectory(tempDir.toFile());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSetUp() {
|
||||
var dbPath = tempDir.resolve("test.db");
|
||||
try (var db = new DomSampleDb(dbPath)) {
|
||||
}
|
||||
catch (Exception e) {
|
||||
fail("Failed to set up database: " + e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSyncDomains() {
|
||||
var dbPath = tempDir.resolve("test.db");
|
||||
try (var db = new DomSampleDb(dbPath)) {
|
||||
|
||||
db.syncDomains(Set.of("example.com", "test.com", "foobar.com"));
|
||||
assertEquals(Set.of("example.com", "test.com", "foobar.com"), new HashSet<>(db.getScheduledDomains()));
|
||||
db.syncDomains(Set.of("example.com", "test.com"));
|
||||
assertEquals(Set.of("example.com", "test.com"), new HashSet<>(db.getScheduledDomains()));
|
||||
db.syncDomains(Set.of("foobar.com", "test.com"));
|
||||
assertEquals(Set.of("foobar.com", "test.com"), new HashSet<>(db.getScheduledDomains()));
|
||||
}
|
||||
catch (Exception e) {
|
||||
fail("Failed to sync domains: " + e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFetchDomains() {
|
||||
var dbPath = tempDir.resolve("test.db");
|
||||
try (var db = new DomSampleDb(dbPath)) {
|
||||
|
||||
db.syncDomains(Set.of("example.com", "test.com", "foobar.com"));
|
||||
db.flagDomainAsFetched("example.com");
|
||||
db.flagDomainAsFetched("test.com");
|
||||
db.flagDomainAsFetched("foobar.com");
|
||||
assertEquals(List.of("example.com", "test.com", "foobar.com"), db.getScheduledDomains());
|
||||
db.flagDomainAsFetched("test.com");
|
||||
assertEquals(List.of("example.com", "foobar.com", "test.com"), db.getScheduledDomains());
|
||||
}
|
||||
catch (Exception e) {
|
||||
fail("Failed to sync domains: " + e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void saveLoadSingle() {
|
||||
var dbPath = tempDir.resolve("test.db");
|
||||
try (var db = new DomSampleDb(dbPath)) {
|
||||
db.saveSampleRaw("example.com", "http://example.com/sample", "sample data", "requests data", true);
|
||||
var samples = db.getSamples("example.com");
|
||||
assertEquals(1, samples.size());
|
||||
var sample = samples.getFirst();
|
||||
assertEquals("example.com", sample.domain());
|
||||
assertEquals("http://example.com/sample", sample.url());
|
||||
assertEquals("sample data", sample.sample());
|
||||
assertEquals("requests data", sample.requests());
|
||||
assertTrue(sample.acceptedPopover());
|
||||
}
|
||||
catch (Exception e) {
|
||||
fail("Failed to save/load sample: " + e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void saveLoadTwo() {
|
||||
var dbPath = tempDir.resolve("test.db");
|
||||
try (var db = new DomSampleDb(dbPath)) {
|
||||
db.saveSampleRaw("example.com", "http://example.com/sample", "sample data", "r1", true);
|
||||
db.saveSampleRaw("example.com", "http://example.com/sample2", "sample data2", "r2", false);
|
||||
var samples = db.getSamples("example.com");
|
||||
assertEquals(2, samples.size());
|
||||
|
||||
Map<String, String> samplesByUrl = new HashMap<>();
|
||||
for (var sample : samples) {
|
||||
samplesByUrl.put(sample.url(), sample.sample());
|
||||
}
|
||||
|
||||
assertEquals("sample data", samplesByUrl.get("http://example.com/sample"));
|
||||
assertEquals("sample data2", samplesByUrl.get("http://example.com/sample2"));
|
||||
}
|
||||
catch (Exception e) {
|
||||
fail("Failed to save/load sample: " + e.getMessage());
|
||||
}
|
||||
}
|
||||
}
|
@@ -3,17 +3,21 @@ package nu.marginalia.livecapture;
|
||||
import com.github.tomakehurst.wiremock.WireMockServer;
|
||||
import com.github.tomakehurst.wiremock.core.WireMockConfiguration;
|
||||
import nu.marginalia.WmsaHome;
|
||||
import nu.marginalia.domsample.db.DomSampleDb;
|
||||
import nu.marginalia.service.module.ServiceConfigurationModule;
|
||||
import org.junit.jupiter.api.Assertions;
|
||||
import org.junit.jupiter.api.BeforeAll;
|
||||
import org.junit.jupiter.api.Tag;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.testcontainers.containers.GenericContainer;
|
||||
import org.testcontainers.images.PullPolicy;
|
||||
import org.testcontainers.junit.jupiter.Testcontainers;
|
||||
import org.testcontainers.utility.DockerImageName;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.URI;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.Map;
|
||||
|
||||
import static com.github.tomakehurst.wiremock.client.WireMock.*;
|
||||
@@ -22,9 +26,14 @@ import static com.github.tomakehurst.wiremock.client.WireMock.*;
|
||||
@Testcontainers
|
||||
@Tag("slow")
|
||||
public class BrowserlessClientTest {
|
||||
static GenericContainer<?> container = new GenericContainer<>(DockerImageName.parse("browserless/chrome"))
|
||||
// Run gradle docker if this image is not available
|
||||
static GenericContainer<?> container = new GenericContainer<>(DockerImageName.parse("marginalia-browserless"))
|
||||
.withEnv(Map.of("TOKEN", "BROWSERLESS_TOKEN"))
|
||||
.withImagePullPolicy(PullPolicy.defaultPolicy())
|
||||
.withNetworkMode("bridge")
|
||||
.withLogConsumer(frame -> {
|
||||
System.out.print(frame.getUtf8String());
|
||||
})
|
||||
.withExposedPorts(3000);
|
||||
|
||||
static WireMockServer wireMockServer =
|
||||
@@ -34,6 +43,7 @@ public class BrowserlessClientTest {
|
||||
static String localIp;
|
||||
|
||||
static URI browserlessURI;
|
||||
static URI browserlessWssURI;
|
||||
|
||||
@BeforeAll
|
||||
public static void setup() throws IOException {
|
||||
@@ -44,6 +54,12 @@ public class BrowserlessClientTest {
|
||||
container.getMappedPort(3000))
|
||||
);
|
||||
|
||||
browserlessWssURI = URI.create(String.format("ws://%s:%d/?token=BROWSERLESS_TOKEN",
|
||||
container.getHost(),
|
||||
container.getMappedPort(3000))
|
||||
);
|
||||
|
||||
|
||||
wireMockServer.start();
|
||||
wireMockServer.stubFor(get("/").willReturn(aResponse().withStatus(200).withBody("Ok")));
|
||||
|
||||
@@ -85,6 +101,30 @@ public class BrowserlessClientTest {
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testAnnotatedContent() throws Exception {
|
||||
|
||||
try (var client = new BrowserlessClient(browserlessURI);
|
||||
DomSampleDb dbop = new DomSampleDb(Path.of("/tmp/dom-sample.db"))
|
||||
) {
|
||||
var content = client.annotatedContent("https://marginalia.nu/", BrowserlessClient.GotoOptions.defaultValues()).orElseThrow();
|
||||
dbop.saveSample("marginalia.nu", "https://marginalia.nu/", content);
|
||||
System.out.println(content);
|
||||
Assertions.assertFalse(content.isBlank(), "Content should not be empty");
|
||||
|
||||
dbop.getSamples("marginalia.nu").forEach(sample -> {
|
||||
System.out.println("Sample URL: " + sample.url());
|
||||
System.out.println("Sample Content: " + sample.sample());
|
||||
System.out.println("Sample Requests: " + sample.requests());
|
||||
System.out.println("Accepted Popover: " + sample.acceptedPopover());
|
||||
});
|
||||
}
|
||||
finally {
|
||||
Files.deleteIfExists(Path.of("/tmp/dom-sample.db"));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testScreenshot() throws Exception {
|
||||
try (var client = new BrowserlessClient(browserlessURI)) {
|
||||
|
43
code/functions/nsfw-domain-filter/build.gradle
Normal file
43
code/functions/nsfw-domain-filter/build.gradle
Normal file
@@ -0,0 +1,43 @@
|
||||
plugins {
|
||||
id 'java'
|
||||
id 'jvm-test-suite'
|
||||
}
|
||||
|
||||
java {
|
||||
toolchain {
|
||||
languageVersion.set(JavaLanguageVersion.of(rootProject.ext.jvmVersion))
|
||||
}
|
||||
}
|
||||
|
||||
apply from: "$rootProject.projectDir/srcsets.gradle"
|
||||
|
||||
dependencies {
|
||||
|
||||
implementation project(':code:common:config')
|
||||
implementation project(':code:common:model')
|
||||
implementation project(':code:common:db')
|
||||
|
||||
|
||||
implementation libs.bundles.slf4j
|
||||
implementation libs.prometheus
|
||||
implementation libs.guava
|
||||
implementation libs.commons.lang3
|
||||
implementation dependencies.create(libs.guice.get()) {
|
||||
exclude group: 'com.google.guava'
|
||||
}
|
||||
implementation libs.notnull
|
||||
implementation libs.fastutil
|
||||
implementation libs.bundles.mariadb
|
||||
|
||||
|
||||
testImplementation libs.bundles.slf4j.test
|
||||
testImplementation libs.bundles.junit
|
||||
testImplementation libs.mockito
|
||||
|
||||
testImplementation platform('org.testcontainers:testcontainers-bom:1.17.4')
|
||||
testImplementation libs.commons.codec
|
||||
testImplementation project(':code:common:service')
|
||||
testImplementation 'org.testcontainers:mariadb:1.17.4'
|
||||
testImplementation 'org.testcontainers:junit-jupiter:1.17.4'
|
||||
testImplementation project(':code:libraries:test-helpers')
|
||||
}
|
@@ -0,0 +1,192 @@
|
||||
package nu.marginalia.nsfw;
|
||||
|
||||
import com.google.inject.Inject;
|
||||
import com.google.inject.Singleton;
|
||||
import com.google.inject.name.Named;
|
||||
import com.zaxxer.hikari.HikariDataSource;
|
||||
import it.unimi.dsi.fastutil.ints.IntOpenHashSet;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.InputStreamReader;
|
||||
import java.net.http.HttpClient;
|
||||
import java.net.http.HttpRequest;
|
||||
import java.net.http.HttpResponse;
|
||||
import java.sql.SQLException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.zip.GZIPInputStream;
|
||||
|
||||
@Singleton
|
||||
public class NsfwDomainFilter {
|
||||
private final HikariDataSource dataSource;
|
||||
|
||||
private final List<String> dangerLists;
|
||||
private final List<String> smutLists;
|
||||
|
||||
private volatile IntOpenHashSet blockedDomainIdsTier1 = new IntOpenHashSet();
|
||||
private volatile IntOpenHashSet blockedDomainIdsTier2 = new IntOpenHashSet();
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(NsfwDomainFilter.class);
|
||||
|
||||
public static final int NSFW_DISABLE = 0;
|
||||
public static final int NSFW_BLOCK_DANGER = 1;
|
||||
public static final int NSFW_BLOCK_SMUT = 2;
|
||||
|
||||
@Inject
|
||||
public NsfwDomainFilter(HikariDataSource dataSource,
|
||||
@Named("nsfw.dangerLists") List<String> dangerLists,
|
||||
@Named("nsfw.smutLists") List<String> smutLists
|
||||
) {
|
||||
this.dataSource = dataSource;
|
||||
|
||||
this.dangerLists = dangerLists;
|
||||
this.smutLists = smutLists;
|
||||
|
||||
Thread.ofPlatform().daemon().name("NsfwDomainFilterSync").start(() -> {
|
||||
while (true) {
|
||||
sync();
|
||||
try {
|
||||
TimeUnit.HOURS.sleep(1);
|
||||
} catch (InterruptedException e) {
|
||||
Thread.currentThread().interrupt();
|
||||
break; // Exit the loop if interrupted
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
public boolean isBlocked(int domainId, int tier) {
|
||||
if (tier == 0)
|
||||
return false;
|
||||
|
||||
if (tier >= 1 && blockedDomainIdsTier1.contains(domainId))
|
||||
return true;
|
||||
if (tier >= 2 && blockedDomainIdsTier2.contains(domainId))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
private synchronized void sync() {
|
||||
try (var conn = dataSource.getConnection();
|
||||
var stmt = conn.prepareStatement("SELECT ID, TIER FROM NSFW_DOMAINS")
|
||||
) {
|
||||
var rs = stmt.executeQuery();
|
||||
IntOpenHashSet tier1 = new IntOpenHashSet();
|
||||
IntOpenHashSet tier2 = new IntOpenHashSet();
|
||||
|
||||
while (rs.next()) {
|
||||
int domainId = rs.getInt("ID");
|
||||
int tier = rs.getInt("TIER");
|
||||
|
||||
switch (tier) {
|
||||
case 1 -> tier1.add(domainId);
|
||||
case 2 -> tier2.add(domainId);
|
||||
}
|
||||
}
|
||||
|
||||
this.blockedDomainIdsTier1 = tier1;
|
||||
this.blockedDomainIdsTier2 = tier2;
|
||||
|
||||
logger.info("NSFW domain filter synced: {} tier 1, {} tier 2", tier1.size(), tier2.size());
|
||||
|
||||
}
|
||||
catch (SQLException ex) {
|
||||
logger.error("Failed to sync NSFW domain filter", ex);
|
||||
}
|
||||
}
|
||||
|
||||
public synchronized void fetchLists() {
|
||||
try (var conn = dataSource.getConnection();
|
||||
HttpClient client = HttpClient.newBuilder()
|
||||
.followRedirects(HttpClient.Redirect.ALWAYS)
|
||||
.build();
|
||||
var stmt = conn.createStatement();
|
||||
var insertStmt = conn.prepareStatement("INSERT IGNORE INTO NSFW_DOMAINS_TMP (ID, TIER) SELECT ID, ? FROM EC_DOMAIN WHERE DOMAIN_NAME = ?")) {
|
||||
|
||||
stmt.execute("DROP TABLE IF EXISTS NSFW_DOMAINS_TMP");
|
||||
stmt.execute("CREATE TABLE NSFW_DOMAINS_TMP LIKE NSFW_DOMAINS");
|
||||
|
||||
List<String> combinedDangerList = new ArrayList<>(10_000);
|
||||
for (var dangerListUrl : dangerLists) {
|
||||
combinedDangerList.addAll(fetchList(client, dangerListUrl));
|
||||
}
|
||||
|
||||
for (String domain : combinedDangerList) {
|
||||
insertStmt.setInt(1, NSFW_BLOCK_DANGER);
|
||||
insertStmt.setString(2, domain);
|
||||
insertStmt.execute();
|
||||
}
|
||||
|
||||
List<String> combinedSmutList = new ArrayList<>(10_000);
|
||||
for (var smutListUrl : smutLists) {
|
||||
combinedSmutList.addAll(fetchList(client, smutListUrl));
|
||||
}
|
||||
|
||||
for (String domain : combinedSmutList) {
|
||||
insertStmt.setInt(1, NSFW_BLOCK_SMUT);
|
||||
insertStmt.setString(2, domain);
|
||||
insertStmt.addBatch();
|
||||
insertStmt.execute();
|
||||
}
|
||||
|
||||
stmt.execute("""
|
||||
DROP TABLE IF EXISTS NSFW_DOMAINS
|
||||
""");
|
||||
stmt.execute("""
|
||||
RENAME TABLE NSFW_DOMAINS_TMP TO NSFW_DOMAINS
|
||||
""");
|
||||
sync();
|
||||
}
|
||||
catch (SQLException ex) {
|
||||
logger.error("Failed to fetch NSFW domain lists", ex);
|
||||
}
|
||||
}
|
||||
|
||||
public List<String> fetchList(HttpClient client, String url) {
|
||||
|
||||
logger.info("Fetching NSFW domain list from {}", url);
|
||||
|
||||
var request = HttpRequest.newBuilder()
|
||||
.uri(java.net.URI.create(url))
|
||||
.build();
|
||||
|
||||
try {
|
||||
if (url.endsWith(".gz")) {
|
||||
var response = client.send(request, HttpResponse.BodyHandlers.ofByteArray());
|
||||
|
||||
byte[] body = response.body();
|
||||
|
||||
try (var reader = new BufferedReader(new InputStreamReader(new GZIPInputStream(new ByteArrayInputStream(body))))) {
|
||||
return reader.lines()
|
||||
.filter(StringUtils::isNotEmpty)
|
||||
.toList();
|
||||
} catch (Exception e) {
|
||||
logger.error("Error reading GZIP response from {}", url, e);
|
||||
}
|
||||
} else {
|
||||
var response = client.send(request, HttpResponse.BodyHandlers.ofString());
|
||||
if (response.statusCode() == 200) {
|
||||
|
||||
return Arrays.stream(StringUtils.split(response.body(), "\n"))
|
||||
.filter(StringUtils::isNotEmpty)
|
||||
.toList();
|
||||
} else {
|
||||
logger.warn("Failed to fetch list from {}: HTTP {}", url, response.statusCode());
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (Exception e) {
|
||||
logger.error("Error fetching NSFW domain list from {}", url, e);
|
||||
}
|
||||
|
||||
|
||||
return List.of();
|
||||
}
|
||||
}
|
@@ -0,0 +1,30 @@
|
||||
package nu.marginalia.nsfw;
|
||||
|
||||
import com.google.inject.AbstractModule;
|
||||
import com.google.inject.Provides;
|
||||
import jakarta.inject.Named;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
public class NsfwFilterModule extends AbstractModule {
|
||||
|
||||
@Provides
|
||||
@Named("nsfw.dangerLists")
|
||||
public List<String> nsfwDomainLists1() {
|
||||
return List.of(
|
||||
"https://raw.githubusercontent.com/olbat/ut1-blacklists/refs/heads/master/blacklists/cryptojacking/domains",
|
||||
"https://raw.githubusercontent.com/olbat/ut1-blacklists/refs/heads/master/blacklists/malware/domains",
|
||||
"https://raw.githubusercontent.com/olbat/ut1-blacklists/refs/heads/master/blacklists/phishing/domains"
|
||||
);
|
||||
}
|
||||
@Provides
|
||||
@Named("nsfw.smutLists")
|
||||
public List<String> nsfwDomainLists2() {
|
||||
return List.of(
|
||||
"https://github.com/olbat/ut1-blacklists/raw/refs/heads/master/blacklists/adult/domains.gz",
|
||||
"https://raw.githubusercontent.com/olbat/ut1-blacklists/refs/heads/master/blacklists/gambling/domains"
|
||||
);
|
||||
}
|
||||
|
||||
public void configure() {}
|
||||
}
|
@@ -0,0 +1,108 @@
|
||||
package nu.marginalia.nsfw;
|
||||
|
||||
|
||||
import com.google.inject.AbstractModule;
|
||||
import com.google.inject.Guice;
|
||||
import com.google.inject.Provides;
|
||||
import com.zaxxer.hikari.HikariConfig;
|
||||
import com.zaxxer.hikari.HikariDataSource;
|
||||
import jakarta.inject.Named;
|
||||
import nu.marginalia.test.TestMigrationLoader;
|
||||
import org.junit.jupiter.api.BeforeAll;
|
||||
import org.junit.jupiter.api.Tag;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.testcontainers.containers.MariaDBContainer;
|
||||
import org.testcontainers.junit.jupiter.Container;
|
||||
import org.testcontainers.junit.jupiter.Testcontainers;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.List;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertFalse;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
|
||||
|
||||
@Tag("slow")
|
||||
@Testcontainers
|
||||
class NsfwDomainFilterTest extends AbstractModule {
|
||||
|
||||
@Container
|
||||
static MariaDBContainer<?> mariaDBContainer = new MariaDBContainer<>("mariadb")
|
||||
.withDatabaseName("WMSA_prod")
|
||||
.withUsername("wmsa")
|
||||
.withPassword("wmsa")
|
||||
.withNetworkAliases("mariadb");
|
||||
|
||||
static HikariDataSource dataSource;
|
||||
static Path tempDir;
|
||||
|
||||
@BeforeAll
|
||||
public static void setUpDb() throws IOException {
|
||||
tempDir = Files.createTempDirectory(NsfwDomainFilterTest.class.getSimpleName());
|
||||
|
||||
System.setProperty("system.homePath", tempDir.toString());
|
||||
|
||||
HikariConfig config = new HikariConfig();
|
||||
config.setJdbcUrl(mariaDBContainer.getJdbcUrl());
|
||||
config.setUsername("wmsa");
|
||||
config.setPassword("wmsa");
|
||||
|
||||
dataSource = new HikariDataSource(config);
|
||||
|
||||
TestMigrationLoader.flywayMigration(dataSource);
|
||||
|
||||
try (var conn = dataSource.getConnection();
|
||||
var stmt = conn.prepareStatement("INSERT INTO EC_DOMAIN (DOMAIN_NAME, DOMAIN_TOP, NODE_AFFINITY) VALUES (?, ?, 1)")
|
||||
) {
|
||||
|
||||
// Ensure the database is ready
|
||||
conn.createStatement().execute("SELECT 1");
|
||||
|
||||
stmt.setString(1, "www.google.com");
|
||||
stmt.setString(2, "google.com");
|
||||
stmt.executeUpdate();
|
||||
stmt.setString(1, "www.bing.com");
|
||||
stmt.setString(2, "bing.com");
|
||||
stmt.executeUpdate();
|
||||
} catch (Exception e) {
|
||||
throw new RuntimeException("Failed to connect to the database", e);
|
||||
}
|
||||
}
|
||||
|
||||
@Provides
|
||||
@Named("nsfw.dangerLists")
|
||||
public List<String> nsfwDomainLists1() {
|
||||
return List.of(
|
||||
"https://downloads.marginalia.nu/test/list1"
|
||||
);
|
||||
}
|
||||
|
||||
@Provides
|
||||
@Named("nsfw.smutLists")
|
||||
public List<String> nsfwDomainLists2() {
|
||||
return List.of(
|
||||
"https://downloads.marginalia.nu/test/list2.gz"
|
||||
);
|
||||
}
|
||||
|
||||
public void configure() {
|
||||
bind(HikariDataSource.class).toInstance(dataSource);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test() {
|
||||
var filter = Guice
|
||||
.createInjector(this)
|
||||
.getInstance(NsfwDomainFilter.class);
|
||||
|
||||
filter.fetchLists();
|
||||
|
||||
assertTrue(filter.isBlocked(1, NsfwDomainFilter.NSFW_BLOCK_DANGER));
|
||||
assertTrue(filter.isBlocked(1, NsfwDomainFilter.NSFW_BLOCK_SMUT));
|
||||
assertFalse(filter.isBlocked(2, NsfwDomainFilter.NSFW_BLOCK_DANGER));
|
||||
assertTrue(filter.isBlocked(2, NsfwDomainFilter.NSFW_BLOCK_SMUT));
|
||||
}
|
||||
|
||||
}
|
@@ -1,9 +1,6 @@
|
||||
package nu.marginalia.api.searchquery;
|
||||
|
||||
import nu.marginalia.api.searchquery.model.query.ProcessedQuery;
|
||||
import nu.marginalia.api.searchquery.model.query.QueryParams;
|
||||
import nu.marginalia.api.searchquery.model.query.QueryResponse;
|
||||
import nu.marginalia.api.searchquery.model.query.SearchSpecification;
|
||||
import nu.marginalia.api.searchquery.model.query.*;
|
||||
import nu.marginalia.api.searchquery.model.results.DecoratedSearchResultItem;
|
||||
import nu.marginalia.api.searchquery.model.results.PrototypeRankingParameters;
|
||||
import nu.marginalia.api.searchquery.model.results.SearchResultItem;
|
||||
@@ -32,6 +29,8 @@ public class QueryProtobufCodec {
|
||||
builder.setSearchSetIdentifier(query.specs.searchSetIdentifier);
|
||||
builder.setHumanQuery(request.getHumanQuery());
|
||||
|
||||
builder.setNsfwFilterTierValue(request.getNsfwFilterTierValue());
|
||||
|
||||
builder.setQuality(IndexProtobufCodec.convertSpecLimit(query.specs.quality));
|
||||
builder.setYear(IndexProtobufCodec.convertSpecLimit(query.specs.year));
|
||||
builder.setSize(IndexProtobufCodec.convertSpecLimit(query.specs.size));
|
||||
@@ -78,6 +77,8 @@ public class QueryProtobufCodec {
|
||||
builder.setSearchSetIdentifier(query.specs.searchSetIdentifier);
|
||||
builder.setHumanQuery(humanQuery);
|
||||
|
||||
builder.setNsfwFilterTier(RpcIndexQuery.NSFW_FILTER_TIER.DANGER);
|
||||
|
||||
builder.setQuality(IndexProtobufCodec.convertSpecLimit(query.specs.quality));
|
||||
builder.setYear(IndexProtobufCodec.convertSpecLimit(query.specs.year));
|
||||
builder.setSize(IndexProtobufCodec.convertSpecLimit(query.specs.size));
|
||||
@@ -112,6 +113,7 @@ public class QueryProtobufCodec {
|
||||
request.getSearchSetIdentifier(),
|
||||
QueryStrategy.valueOf(request.getQueryStrategy()),
|
||||
RpcTemporalBias.Bias.valueOf(request.getTemporalBias().getBias().name()),
|
||||
NsfwFilterTier.fromCodedValue(request.getNsfwFilterTierValue()),
|
||||
request.getPagination().getPage()
|
||||
);
|
||||
}
|
||||
@@ -327,6 +329,7 @@ public class QueryProtobufCodec {
|
||||
.setRank(IndexProtobufCodec.convertSpecLimit(params.rank()))
|
||||
.setSearchSetIdentifier(params.identifier())
|
||||
.setQueryStrategy(params.queryStrategy().name())
|
||||
.setNsfwFilterTierValue(params.filterTier().getCodedValue())
|
||||
.setTemporalBias(RpcTemporalBias.newBuilder()
|
||||
.setBias(RpcTemporalBias.Bias.valueOf(params.temporalBias().name()))
|
||||
.build())
|
||||
|
@@ -0,0 +1,26 @@
|
||||
package nu.marginalia.api.searchquery.model.query;
|
||||
|
||||
public enum NsfwFilterTier {
|
||||
OFF(0),
|
||||
DANGER(1),
|
||||
PORN_AND_GAMBLING(2);
|
||||
|
||||
private final int codedValue; // same as ordinal() for now, but can be changed later if needed
|
||||
|
||||
NsfwFilterTier(int codedValue) {
|
||||
this.codedValue = codedValue;
|
||||
}
|
||||
|
||||
public static NsfwFilterTier fromCodedValue(int codedValue) {
|
||||
for (NsfwFilterTier tier : NsfwFilterTier.values()) {
|
||||
if (tier.codedValue == codedValue) {
|
||||
return tier;
|
||||
}
|
||||
}
|
||||
throw new IllegalArgumentException("Invalid coded value for NsfwFilterTirer: " + codedValue);
|
||||
}
|
||||
|
||||
public int getCodedValue() {
|
||||
return codedValue;
|
||||
}
|
||||
}
|
@@ -25,10 +25,11 @@ public record QueryParams(
|
||||
String identifier,
|
||||
QueryStrategy queryStrategy,
|
||||
RpcTemporalBias.Bias temporalBias,
|
||||
NsfwFilterTier filterTier,
|
||||
int page
|
||||
)
|
||||
{
|
||||
public QueryParams(String query, RpcQueryLimits limits, String identifier) {
|
||||
public QueryParams(String query, RpcQueryLimits limits, String identifier, NsfwFilterTier filterTier) {
|
||||
this(query, null,
|
||||
List.of(),
|
||||
List.of(),
|
||||
@@ -43,6 +44,7 @@ public record QueryParams(
|
||||
identifier,
|
||||
QueryStrategy.AUTO,
|
||||
RpcTemporalBias.Bias.NONE,
|
||||
filterTier,
|
||||
1 // page
|
||||
);
|
||||
}
|
||||
|
@@ -32,6 +32,14 @@ message RpcQsQuery {
|
||||
RpcTemporalBias temporalBias = 16;
|
||||
|
||||
RpcQsQueryPagination pagination = 17;
|
||||
|
||||
NSFW_FILTER_TIER nsfwFilterTier = 18;
|
||||
|
||||
enum NSFW_FILTER_TIER {
|
||||
NONE = 0;
|
||||
DANGER = 1;
|
||||
PORN_AND_GAMBLING = 2;
|
||||
};
|
||||
}
|
||||
|
||||
/* Query service query response */
|
||||
@@ -78,8 +86,17 @@ message RpcIndexQuery {
|
||||
RpcQueryLimits queryLimits = 10;
|
||||
string queryStrategy = 11; // Named query configuration
|
||||
RpcResultRankingParameters parameters = 12;
|
||||
|
||||
NSFW_FILTER_TIER nsfwFilterTier = 13;
|
||||
|
||||
enum NSFW_FILTER_TIER {
|
||||
NONE = 0;
|
||||
DANGER = 1;
|
||||
PORN_AND_GAMBLING = 2;
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
/* A tagged union encoding some limit on a field */
|
||||
message RpcSpecLimit {
|
||||
int32 value = 1;
|
||||
|
@@ -19,6 +19,7 @@ dependencies {
|
||||
implementation project(':code:common:model')
|
||||
implementation project(':code:common:service')
|
||||
|
||||
implementation project(':code:functions:nsfw-domain-filter')
|
||||
implementation project(':code:functions:search-query:api')
|
||||
|
||||
implementation project(':code:index:query')
|
||||
|
@@ -11,6 +11,7 @@ import nu.marginalia.api.searchquery.model.query.QueryParams;
|
||||
import nu.marginalia.api.searchquery.model.results.DecoratedSearchResultItem;
|
||||
import nu.marginalia.api.searchquery.model.results.PrototypeRankingParameters;
|
||||
import nu.marginalia.index.api.IndexClient;
|
||||
import nu.marginalia.nsfw.NsfwDomainFilter;
|
||||
import nu.marginalia.service.server.DiscoverableService;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
@@ -34,13 +35,16 @@ public class QueryGRPCService
|
||||
|
||||
|
||||
private final QueryFactory queryFactory;
|
||||
private final NsfwDomainFilter nsfwDomainFilter;
|
||||
private final IndexClient indexClient;
|
||||
|
||||
@Inject
|
||||
public QueryGRPCService(QueryFactory queryFactory,
|
||||
NsfwDomainFilter nsfwDomainFilter,
|
||||
IndexClient indexClient)
|
||||
{
|
||||
this.queryFactory = queryFactory;
|
||||
this.nsfwDomainFilter = nsfwDomainFilter;
|
||||
this.indexClient = indexClient;
|
||||
}
|
||||
|
||||
|
@@ -3,6 +3,7 @@ package nu.marginalia.query.svc;
|
||||
import nu.marginalia.WmsaHome;
|
||||
import nu.marginalia.api.searchquery.RpcQueryLimits;
|
||||
import nu.marginalia.api.searchquery.RpcTemporalBias;
|
||||
import nu.marginalia.api.searchquery.model.query.NsfwFilterTier;
|
||||
import nu.marginalia.api.searchquery.model.query.QueryParams;
|
||||
import nu.marginalia.api.searchquery.model.query.SearchSpecification;
|
||||
import nu.marginalia.functions.searchquery.QueryFactory;
|
||||
@@ -58,6 +59,7 @@ public class QueryFactoryTest {
|
||||
"NONE",
|
||||
QueryStrategy.AUTO,
|
||||
RpcTemporalBias.Bias.NONE,
|
||||
NsfwFilterTier.OFF,
|
||||
0), null).specs;
|
||||
}
|
||||
|
||||
|
@@ -17,6 +17,7 @@ dependencies {
|
||||
implementation project(':code:common:service')
|
||||
implementation project(':code:common:db')
|
||||
implementation project(':code:libraries:message-queue')
|
||||
implementation project(':code:functions:nsfw-domain-filter')
|
||||
implementation project(':code:functions:search-query:api')
|
||||
|
||||
implementation libs.bundles.slf4j
|
||||
|
@@ -2,11 +2,13 @@ package nu.marginalia.index.api;
|
||||
|
||||
import com.google.inject.Inject;
|
||||
import com.google.inject.Singleton;
|
||||
import io.prometheus.client.Counter;
|
||||
import nu.marginalia.api.searchquery.IndexApiGrpc;
|
||||
import nu.marginalia.api.searchquery.RpcDecoratedResultItem;
|
||||
import nu.marginalia.api.searchquery.RpcIndexQuery;
|
||||
import nu.marginalia.db.DomainBlacklistImpl;
|
||||
import nu.marginalia.model.id.UrlIdCodec;
|
||||
import nu.marginalia.nsfw.NsfwDomainFilter;
|
||||
import nu.marginalia.service.client.GrpcChannelPoolFactory;
|
||||
import nu.marginalia.service.client.GrpcMultiNodeChannelPool;
|
||||
import nu.marginalia.service.discovery.property.ServiceKey;
|
||||
@@ -28,14 +30,26 @@ public class IndexClient {
|
||||
private static final Logger logger = LoggerFactory.getLogger(IndexClient.class);
|
||||
private final GrpcMultiNodeChannelPool<IndexApiGrpc.IndexApiBlockingStub> channelPool;
|
||||
private final DomainBlacklistImpl blacklist;
|
||||
private final NsfwDomainFilter nsfwDomainFilter;
|
||||
|
||||
Counter wmsa_index_query_count = Counter.build()
|
||||
.name("wmsa_nsfw_filter_result_count")
|
||||
.labelNames("tier")
|
||||
.help("Count of results filtered by NSFW tier")
|
||||
.register();
|
||||
|
||||
private static final ExecutorService executor = Executors.newCachedThreadPool();
|
||||
|
||||
@Inject
|
||||
public IndexClient(GrpcChannelPoolFactory channelPoolFactory, DomainBlacklistImpl blacklist) {
|
||||
public IndexClient(GrpcChannelPoolFactory channelPoolFactory,
|
||||
DomainBlacklistImpl blacklist,
|
||||
NsfwDomainFilter nsfwDomainFilter
|
||||
) {
|
||||
this.channelPool = channelPoolFactory.createMulti(
|
||||
ServiceKey.forGrpcApi(IndexApiGrpc.class, ServicePartition.multi()),
|
||||
IndexApiGrpc::newBlockingStub);
|
||||
this.blacklist = blacklist;
|
||||
this.nsfwDomainFilter = nsfwDomainFilter;
|
||||
}
|
||||
|
||||
private static final Comparator<RpcDecoratedResultItem> comparator =
|
||||
@@ -52,7 +66,7 @@ public class IndexClient {
|
||||
public AggregateQueryResponse executeQueries(RpcIndexQuery indexRequest, Pagination pagination) {
|
||||
|
||||
final int requestedMaxResults = indexRequest.getQueryLimits().getResultsTotal();
|
||||
|
||||
int filterTier = indexRequest.getNsfwFilterTierValue();
|
||||
AtomicInteger totalNumResults = new AtomicInteger(0);
|
||||
|
||||
List<RpcDecoratedResultItem> results =
|
||||
@@ -74,7 +88,7 @@ public class IndexClient {
|
||||
}
|
||||
})
|
||||
.flatMap(List::stream)
|
||||
.filter(item -> !isBlacklisted(item))
|
||||
.filter(item -> !isBlacklisted(item, filterTier))
|
||||
.sorted(comparator)
|
||||
.skip(Math.max(0, (pagination.page - 1) * pagination.pageSize))
|
||||
.limit(pagination.pageSize)
|
||||
@@ -83,8 +97,23 @@ public class IndexClient {
|
||||
return new AggregateQueryResponse(results, pagination.page(), totalNumResults.get());
|
||||
}
|
||||
|
||||
private boolean isBlacklisted(RpcDecoratedResultItem item) {
|
||||
return blacklist.isBlacklisted(UrlIdCodec.getDomainId(item.getRawItem().getCombinedId()));
|
||||
static String[] tierNames = {
|
||||
"OFF",
|
||||
"DANGER",
|
||||
"NSFW"
|
||||
};
|
||||
|
||||
private boolean isBlacklisted(RpcDecoratedResultItem item, int filterTier) {
|
||||
int domainId = UrlIdCodec.getDomainId(item.getRawItem().getCombinedId());
|
||||
|
||||
if (blacklist.isBlacklisted(domainId)) {
|
||||
return true;
|
||||
}
|
||||
if (nsfwDomainFilter.isBlocked(domainId, filterTier)) {
|
||||
wmsa_index_query_count.labels(tierNames[filterTier]).inc();
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
}
|
||||
|
@@ -84,7 +84,7 @@ public class ForwardIndexConverter {
|
||||
|
||||
LongArray docFileData = LongArrayFactory.mmapForWritingConfined(outputFileDocsData, ForwardIndexParameters.ENTRY_SIZE * docsFileId.size());
|
||||
|
||||
ByteBuffer workArea = ByteBuffer.allocate(65536);
|
||||
ByteBuffer workArea = ByteBuffer.allocate(1024*1024*100);
|
||||
for (var instance : journal.pages()) {
|
||||
try (var slopTable = new SlopTable(instance.baseDir(), instance.page()))
|
||||
{
|
||||
|
@@ -15,6 +15,10 @@ dependencies {
|
||||
|
||||
implementation libs.bundles.slf4j
|
||||
implementation libs.opencsv
|
||||
implementation libs.guava
|
||||
implementation dependencies.create(libs.guice.get()) {
|
||||
exclude group: 'com.google.guava'
|
||||
}
|
||||
|
||||
testImplementation libs.bundles.slf4j.test
|
||||
testImplementation libs.bundles.junit
|
||||
|
@@ -1,5 +1,6 @@
|
||||
package nu.marginalia.geoip;
|
||||
|
||||
import com.google.inject.Singleton;
|
||||
import nu.marginalia.WmsaHome;
|
||||
import nu.marginalia.geoip.sources.AsnMapping;
|
||||
import nu.marginalia.geoip.sources.AsnTable;
|
||||
@@ -10,6 +11,7 @@ import org.slf4j.LoggerFactory;
|
||||
import java.net.InetAddress;
|
||||
import java.util.Optional;
|
||||
|
||||
@Singleton
|
||||
public class GeoIpDictionary {
|
||||
private volatile IP2LocationMapping ip2locMapping = null;
|
||||
private volatile AsnTable asnTable = null;
|
||||
@@ -76,7 +78,7 @@ public class GeoIpDictionary {
|
||||
}
|
||||
|
||||
public Optional<AsnTable.AsnInfo> getAsnInfo(int ipAddress) {
|
||||
if (null == asnTable) { // not loaded yet or failed to load
|
||||
if (null == asnMapping || null == asnTable) { // not loaded yet or failed to load
|
||||
return Optional.empty();
|
||||
}
|
||||
|
||||
|
@@ -53,6 +53,7 @@ public class SideloaderProcessing {
|
||||
"",
|
||||
body.getBytes(StandardCharsets.UTF_8),
|
||||
false,
|
||||
-1,
|
||||
null,
|
||||
null
|
||||
);
|
||||
|
@@ -2002,12 +2002,11 @@ public class HeadingAwarePDFTextStripper extends LegacyPDFStreamEngine
|
||||
float minFontWeight = Integer.MAX_VALUE;
|
||||
for (var word : line)
|
||||
{
|
||||
int i = 0;
|
||||
for (var textPosition : word.getTextPositions())
|
||||
{
|
||||
if (word.text.charAt(i++) == ' ') {
|
||||
continue;
|
||||
}
|
||||
// Skip empty text positions as they may have a different font
|
||||
if (word.text.isBlank()) continue;
|
||||
|
||||
var font = textPosition.getFont();
|
||||
if (font == null) continue;
|
||||
var descriptor = font.getFontDescriptor();
|
||||
|
@@ -148,6 +148,7 @@ public class ConvertingIntegrationTest {
|
||||
"",
|
||||
readClassPathFile(p.toString()).getBytes(),
|
||||
false,
|
||||
-1,
|
||||
null,
|
||||
null
|
||||
);
|
||||
|
@@ -50,7 +50,7 @@ class PdfDocumentProcessorPluginTest {
|
||||
));
|
||||
}
|
||||
public AbstractDocumentProcessorPlugin.DetailsWithWords testPdfFile(byte[] pdfBytes) throws Exception {
|
||||
var doc = new CrawledDocument("test", "https://www.example.com/sample.pdf", "application/pdf", Instant.now().toString(), 200, "OK", "OK", "", pdfBytes, false, null, null);
|
||||
var doc = new CrawledDocument("test", "https://www.example.com/sample.pdf", "application/pdf", Instant.now().toString(), 200, "OK", "OK", "", pdfBytes, false, -1, null, null);
|
||||
return plugin.createDetails(doc, new LinkTexts(), DocumentClass.NORMAL);
|
||||
}
|
||||
|
||||
|
@@ -58,6 +58,7 @@ dependencies {
|
||||
implementation libs.jsoup
|
||||
implementation libs.opencsv
|
||||
implementation libs.fastutil
|
||||
implementation libs.bundles.curator
|
||||
|
||||
implementation libs.bundles.mariadb
|
||||
implementation libs.bundles.httpcomponents
|
||||
|
@@ -25,9 +25,12 @@ import nu.marginalia.mq.MessageQueueFactory;
|
||||
import nu.marginalia.process.ProcessConfiguration;
|
||||
import nu.marginalia.process.ProcessConfigurationModule;
|
||||
import nu.marginalia.process.ProcessMainClass;
|
||||
import nu.marginalia.process.control.ProcessEventLog;
|
||||
import nu.marginalia.process.control.ProcessHeartbeatImpl;
|
||||
import nu.marginalia.process.log.WorkLog;
|
||||
import nu.marginalia.service.discovery.ServiceRegistryIf;
|
||||
import nu.marginalia.service.module.DatabaseModule;
|
||||
import nu.marginalia.service.module.ServiceDiscoveryModule;
|
||||
import nu.marginalia.slop.SlopCrawlDataRecord;
|
||||
import nu.marginalia.storage.FileStorageService;
|
||||
import nu.marginalia.storage.model.FileStorageId;
|
||||
@@ -54,6 +57,7 @@ public class CrawlerMain extends ProcessMainClass {
|
||||
|
||||
private final UserAgent userAgent;
|
||||
private final ProcessHeartbeatImpl heartbeat;
|
||||
private final ProcessEventLog eventLog;
|
||||
private final DomainProber domainProber;
|
||||
private final FileStorageService fileStorageService;
|
||||
private final AnchorTagsSourceFactory anchorTagsSourceFactory;
|
||||
@@ -61,6 +65,7 @@ public class CrawlerMain extends ProcessMainClass {
|
||||
private final HikariDataSource dataSource;
|
||||
private final DomainBlacklist blacklist;
|
||||
private final int node;
|
||||
private final ServiceRegistryIf serviceRegistry;
|
||||
private final SimpleBlockingThreadPool pool;
|
||||
|
||||
private final DomainLocks domainLocks = new DomainLocks();
|
||||
@@ -84,6 +89,7 @@ public class CrawlerMain extends ProcessMainClass {
|
||||
public CrawlerMain(UserAgent userAgent,
|
||||
HttpFetcherImpl httpFetcher,
|
||||
ProcessHeartbeatImpl heartbeat,
|
||||
ProcessEventLog eventLog,
|
||||
MessageQueueFactory messageQueueFactory, DomainProber domainProber,
|
||||
FileStorageService fileStorageService,
|
||||
ProcessConfiguration processConfiguration,
|
||||
@@ -91,6 +97,7 @@ public class CrawlerMain extends ProcessMainClass {
|
||||
WarcArchiverFactory warcArchiverFactory,
|
||||
HikariDataSource dataSource,
|
||||
DomainBlacklist blacklist,
|
||||
ServiceRegistryIf serviceRegistry,
|
||||
Gson gson) throws InterruptedException {
|
||||
|
||||
super(messageQueueFactory, processConfiguration, gson, CRAWLER_INBOX);
|
||||
@@ -98,6 +105,7 @@ public class CrawlerMain extends ProcessMainClass {
|
||||
this.userAgent = userAgent;
|
||||
this.fetcher = httpFetcher;
|
||||
this.heartbeat = heartbeat;
|
||||
this.eventLog = eventLog;
|
||||
this.domainProber = domainProber;
|
||||
this.fileStorageService = fileStorageService;
|
||||
this.anchorTagsSourceFactory = anchorTagsSourceFactory;
|
||||
@@ -105,6 +113,7 @@ public class CrawlerMain extends ProcessMainClass {
|
||||
this.dataSource = dataSource;
|
||||
this.blacklist = blacklist;
|
||||
this.node = processConfiguration.node();
|
||||
this.serviceRegistry = serviceRegistry;
|
||||
|
||||
SimpleBlockingThreadPool.ThreadType threadType;
|
||||
if (Boolean.getBoolean("crawler.useVirtualThreads")) {
|
||||
@@ -147,12 +156,17 @@ public class CrawlerMain extends ProcessMainClass {
|
||||
Injector injector = Guice.createInjector(
|
||||
new CrawlerModule(),
|
||||
new ProcessConfigurationModule("crawler"),
|
||||
new ServiceDiscoveryModule(),
|
||||
new DatabaseModule(false)
|
||||
);
|
||||
var crawler = injector.getInstance(CrawlerMain.class);
|
||||
|
||||
var instructions = crawler.fetchInstructions(nu.marginalia.mqapi.crawling.CrawlRequest.class);
|
||||
|
||||
crawler.serviceRegistry.registerProcess("crawler", crawler.node);
|
||||
|
||||
try {
|
||||
crawler.eventLog.logEvent("CRAWLER-INFO", "Crawling started");
|
||||
var req = instructions.value();
|
||||
if (req.targetDomainName != null) {
|
||||
crawler.runForSingleDomain(req.targetDomainName, req.crawlStorage);
|
||||
@@ -160,11 +174,15 @@ public class CrawlerMain extends ProcessMainClass {
|
||||
else {
|
||||
crawler.runForDatabaseDomains(req.crawlStorage);
|
||||
}
|
||||
crawler.eventLog.logEvent("CRAWLER-INFO", "Crawl completed successfully");
|
||||
instructions.ok();
|
||||
} catch (Exception ex) {
|
||||
logger.error("Crawler failed", ex);
|
||||
instructions.err();
|
||||
}
|
||||
finally {
|
||||
crawler.serviceRegistry.deregisterProcess("crawler", crawler.node);
|
||||
}
|
||||
|
||||
TimeUnit.SECONDS.sleep(5);
|
||||
}
|
||||
|
@@ -36,6 +36,7 @@ import org.apache.hc.core5.http.io.support.ClassicRequestBuilder;
|
||||
import org.apache.hc.core5.http.message.MessageSupport;
|
||||
import org.apache.hc.core5.http.protocol.HttpContext;
|
||||
import org.apache.hc.core5.pool.PoolStats;
|
||||
import org.apache.hc.core5.ssl.SSLContextBuilder;
|
||||
import org.apache.hc.core5.util.TimeValue;
|
||||
import org.apache.hc.core5.util.Timeout;
|
||||
import org.jsoup.Jsoup;
|
||||
@@ -48,11 +49,15 @@ import org.slf4j.MarkerFactory;
|
||||
|
||||
import javax.net.ssl.SSLContext;
|
||||
import javax.net.ssl.SSLException;
|
||||
import javax.net.ssl.TrustManager;
|
||||
import javax.net.ssl.X509TrustManager;
|
||||
import java.io.IOException;
|
||||
import java.net.SocketTimeoutException;
|
||||
import java.net.URISyntaxException;
|
||||
import java.net.UnknownHostException;
|
||||
import java.security.KeyManagementException;
|
||||
import java.security.NoSuchAlgorithmException;
|
||||
import java.security.cert.X509Certificate;
|
||||
import java.time.Duration;
|
||||
import java.time.Instant;
|
||||
import java.util.*;
|
||||
@@ -87,18 +92,49 @@ public class HttpFetcherImpl implements HttpFetcher, HttpRequestRetryStrategy {
|
||||
return connectionManager.getTotalStats();
|
||||
}
|
||||
|
||||
private CloseableHttpClient createClient() throws NoSuchAlgorithmException {
|
||||
private CloseableHttpClient createClient() throws NoSuchAlgorithmException, KeyManagementException {
|
||||
final ConnectionConfig connectionConfig = ConnectionConfig.custom()
|
||||
.setSocketTimeout(10, TimeUnit.SECONDS)
|
||||
.setConnectTimeout(30, TimeUnit.SECONDS)
|
||||
.setValidateAfterInactivity(TimeValue.ofSeconds(5))
|
||||
.build();
|
||||
|
||||
// No-op up front validation of server certificates.
|
||||
//
|
||||
// We will validate certificates later, after the connection is established
|
||||
// as we want to store the certificate chain and validation
|
||||
// outcome to the database.
|
||||
|
||||
var trustMeBro = new X509TrustManager() {
|
||||
private X509Certificate[] lastServerCertChain;
|
||||
|
||||
@Override
|
||||
public void checkClientTrusted(X509Certificate[] chain, String authType) {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void checkServerTrusted(X509Certificate[] chain, String authType) {
|
||||
this.lastServerCertChain = chain.clone();
|
||||
}
|
||||
|
||||
@Override
|
||||
public X509Certificate[] getAcceptedIssuers() {
|
||||
return new X509Certificate[0];
|
||||
}
|
||||
|
||||
public X509Certificate[] getLastServerCertChain() {
|
||||
return lastServerCertChain != null ? lastServerCertChain.clone() : null;
|
||||
}
|
||||
};
|
||||
|
||||
SSLContext sslContext = SSLContextBuilder.create().build();
|
||||
sslContext.init(null, new TrustManager[]{trustMeBro}, null);
|
||||
|
||||
connectionManager = PoolingHttpClientConnectionManagerBuilder.create()
|
||||
.setMaxConnPerRoute(2)
|
||||
.setMaxConnTotal(5000)
|
||||
.setDefaultConnectionConfig(connectionConfig)
|
||||
.setTlsSocketStrategy(new DefaultClientTlsStrategy(SSLContext.getDefault()))
|
||||
.setTlsSocketStrategy(new DefaultClientTlsStrategy(sslContext))
|
||||
.build();
|
||||
|
||||
connectionManager.setDefaultSocketConfig(SocketConfig.custom()
|
||||
@@ -183,6 +219,8 @@ public class HttpFetcherImpl implements HttpFetcher, HttpRequestRetryStrategy {
|
||||
this.client = createClient();
|
||||
} catch (NoSuchAlgorithmException e) {
|
||||
throw new RuntimeException(e);
|
||||
} catch (KeyManagementException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
this.userAgentString = userAgent.uaString();
|
||||
this.userAgentIdentifier = userAgent.uaIdentifier();
|
||||
@@ -193,6 +231,8 @@ public class HttpFetcherImpl implements HttpFetcher, HttpRequestRetryStrategy {
|
||||
this.client = createClient();
|
||||
} catch (NoSuchAlgorithmException e) {
|
||||
throw new RuntimeException(e);
|
||||
} catch (KeyManagementException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
this.userAgentString = userAgent;
|
||||
this.userAgentIdentifier = userAgent;
|
||||
|
@@ -10,6 +10,7 @@ import java.net.http.HttpClient;
|
||||
import java.net.http.HttpHeaders;
|
||||
import java.net.http.HttpResponse;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.time.Duration;
|
||||
import java.util.*;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
@@ -90,8 +91,8 @@ public class WarcProtocolReconstructor {
|
||||
return "HTTP/" + version + " " + statusCode + " " + statusMessage + "\r\n" + headerString + "\r\n\r\n";
|
||||
}
|
||||
|
||||
static String getResponseHeader(ClassicHttpResponse response, long size) {
|
||||
String headerString = getHeadersAsString(response.getHeaders(), size);
|
||||
static String getResponseHeader(ClassicHttpResponse response, Duration responseDuration, long size) {
|
||||
String headerString = getHeadersAsString(response.getHeaders(), responseDuration, size);
|
||||
|
||||
return response.getVersion().format() + " " + response.getCode() + " " + response.getReasonPhrase() + "\r\n" + headerString + "\r\n\r\n";
|
||||
}
|
||||
@@ -160,7 +161,7 @@ public class WarcProtocolReconstructor {
|
||||
|
||||
|
||||
|
||||
static private String getHeadersAsString(Header[] headers, long responseSize) {
|
||||
static private String getHeadersAsString(Header[] headers, Duration responseDuration, long responseSize) {
|
||||
StringJoiner joiner = new StringJoiner("\r\n");
|
||||
|
||||
for (var header : headers) {
|
||||
@@ -176,6 +177,7 @@ public class WarcProtocolReconstructor {
|
||||
if (headerCapitalized.equals("Content-Encoding"))
|
||||
continue;
|
||||
|
||||
|
||||
// Since we're transparently decoding gzip, we need to update the Content-Length header
|
||||
// to reflect the actual size of the response body. We'll do this at the end.
|
||||
if (headerCapitalized.equals("Content-Length"))
|
||||
@@ -184,6 +186,7 @@ public class WarcProtocolReconstructor {
|
||||
joiner.add(headerCapitalized + ": " + header.getValue());
|
||||
}
|
||||
|
||||
joiner.add("X-Marginalia-Response-Time: " + responseDuration.toMillis());
|
||||
joiner.add("Content-Length: " + responseSize);
|
||||
|
||||
return joiner.toString();
|
||||
|
@@ -93,7 +93,7 @@ public class WarcRecorder implements AutoCloseable {
|
||||
WarcDigestBuilder responseDigestBuilder = new WarcDigestBuilder();
|
||||
WarcDigestBuilder payloadDigestBuilder = new WarcDigestBuilder();
|
||||
|
||||
Instant date = Instant.now();
|
||||
Instant requestDate = Instant.now();
|
||||
|
||||
// Not entirely sure why we need to do this, but keeping it due to Chesterton's Fence
|
||||
Map<String, List<String>> extraHeaders = new HashMap<>(request.getHeaders().length);
|
||||
@@ -108,6 +108,8 @@ public class WarcRecorder implements AutoCloseable {
|
||||
try (WarcInputBuffer inputBuffer = WarcInputBuffer.forResponse(response, request, timeout);
|
||||
InputStream inputStream = inputBuffer.read()) {
|
||||
|
||||
Instant responseDate = Instant.now();
|
||||
|
||||
cookies.updateCookieStore(response);
|
||||
|
||||
// Build and write the request
|
||||
@@ -126,7 +128,7 @@ public class WarcRecorder implements AutoCloseable {
|
||||
|
||||
WarcRequest warcRequest = new WarcRequest.Builder(requestUri)
|
||||
.blockDigest(requestDigestBuilder.build())
|
||||
.date(date)
|
||||
.date(requestDate)
|
||||
.body(MediaType.HTTP_REQUEST, httpRequestString)
|
||||
.build();
|
||||
|
||||
@@ -138,7 +140,9 @@ public class WarcRecorder implements AutoCloseable {
|
||||
response.addHeader("X-Has-Cookies", 1);
|
||||
}
|
||||
|
||||
byte[] responseHeaders = WarcProtocolReconstructor.getResponseHeader(response, inputBuffer.size()).getBytes(StandardCharsets.UTF_8);
|
||||
byte[] responseHeaders = WarcProtocolReconstructor.getResponseHeader(response,
|
||||
Duration.between(requestDate, responseDate),
|
||||
inputBuffer.size()).getBytes(StandardCharsets.UTF_8);
|
||||
|
||||
ResponseDataBuffer responseDataBuffer = new ResponseDataBuffer(inputBuffer.size() + responseHeaders.length);
|
||||
|
||||
@@ -169,7 +173,7 @@ public class WarcRecorder implements AutoCloseable {
|
||||
|
||||
WarcResponse.Builder responseBuilder = new WarcResponse.Builder(responseUri)
|
||||
.blockDigest(responseDigestBuilder.build())
|
||||
.date(date)
|
||||
.date(responseDate)
|
||||
.concurrentTo(warcRequest.id())
|
||||
.body(MediaType.HTTP_RESPONSE, responseDataBuffer.copyBytes());
|
||||
|
||||
@@ -184,7 +188,7 @@ public class WarcRecorder implements AutoCloseable {
|
||||
warcResponse.http(); // force HTTP header to be parsed before body is consumed so that caller can use it
|
||||
writer.write(warcResponse);
|
||||
|
||||
if (Duration.between(date, Instant.now()).compareTo(Duration.ofSeconds(9)) > 0
|
||||
if (Duration.between(requestDate, Instant.now()).compareTo(Duration.ofSeconds(9)) > 0
|
||||
&& inputBuffer.size() < 2048
|
||||
&& !requestUri.getPath().endsWith("robots.txt")) // don't bail on robots.txt
|
||||
{
|
||||
@@ -196,7 +200,7 @@ public class WarcRecorder implements AutoCloseable {
|
||||
|
||||
logger.warn("URL {} took too long to fetch ({}s) and was too small for the effort ({}b)",
|
||||
requestUri,
|
||||
Duration.between(date, Instant.now()).getSeconds(),
|
||||
Duration.between(requestDate, Instant.now()).getSeconds(),
|
||||
inputBuffer.size()
|
||||
);
|
||||
|
||||
|
@@ -148,6 +148,7 @@ public class ParquetSerializableCrawlDataStream implements AutoCloseable, Serial
|
||||
nextRecord.body,
|
||||
// this field isn't actually used, maybe we can skip calculating it?
|
||||
nextRecord.cookies,
|
||||
-1,
|
||||
lastModified,
|
||||
etag));
|
||||
}
|
||||
|
@@ -166,6 +166,7 @@ public class SlopSerializableCrawlDataStream implements AutoCloseable, Serializa
|
||||
nextRecord.body(),
|
||||
// this field isn't actually used, maybe we can skip calculating it?
|
||||
nextRecord.cookies(),
|
||||
nextRecord.requestTimeMs(),
|
||||
null,
|
||||
null));
|
||||
}
|
||||
|
@@ -23,6 +23,7 @@ public final class CrawledDocument implements SerializableCrawlData {
|
||||
|
||||
public String crawlerStatus;
|
||||
public String crawlerStatusDesc;
|
||||
public int requestTimeMs;
|
||||
|
||||
@Nullable
|
||||
public String headers;
|
||||
@@ -82,7 +83,7 @@ public final class CrawledDocument implements SerializableCrawlData {
|
||||
public String lastModifiedMaybe;
|
||||
public String etagMaybe;
|
||||
|
||||
public CrawledDocument(String crawlId, String url, String contentType, String timestamp, int httpStatus, String crawlerStatus, String crawlerStatusDesc, @Nullable String headers, byte[] documentBodyBytes, Boolean hasCookies, String lastModifiedMaybe, String etagMaybe) {
|
||||
public CrawledDocument(String crawlId, String url, String contentType, String timestamp, int httpStatus, String crawlerStatus, String crawlerStatusDesc, @Nullable String headers, byte[] documentBodyBytes, Boolean hasCookies, int requestTimeMs, String lastModifiedMaybe, String etagMaybe) {
|
||||
this.crawlId = crawlId;
|
||||
this.url = url;
|
||||
this.contentType = contentType;
|
||||
@@ -94,6 +95,7 @@ public final class CrawledDocument implements SerializableCrawlData {
|
||||
this.documentBodyBytes = Objects.requireNonNullElse(documentBodyBytes, new byte[] {});
|
||||
this.hasCookies = hasCookies;
|
||||
this.lastModifiedMaybe = lastModifiedMaybe;
|
||||
this.requestTimeMs = requestTimeMs;
|
||||
this.etagMaybe = etagMaybe;
|
||||
}
|
||||
|
||||
@@ -173,6 +175,7 @@ public final class CrawledDocument implements SerializableCrawlData {
|
||||
private byte[] documentBodyBytes = new byte[0];
|
||||
private String recrawlState;
|
||||
private Boolean hasCookies;
|
||||
private int requestTimeMs;
|
||||
private String lastModifiedMaybe;
|
||||
private String etagMaybe;
|
||||
|
||||
@@ -248,8 +251,13 @@ public final class CrawledDocument implements SerializableCrawlData {
|
||||
return this;
|
||||
}
|
||||
|
||||
public CrawledDocumentBuilder requestTimeMs(int requestTimeMs) {
|
||||
this.requestTimeMs = requestTimeMs;
|
||||
return this;
|
||||
}
|
||||
|
||||
public CrawledDocument build() {
|
||||
return new CrawledDocument(this.crawlId, this.url, this.contentType, this.timestamp, this.httpStatus, this.crawlerStatus, this.crawlerStatusDesc, this.headers, this.documentBodyBytes, this.hasCookies, this.lastModifiedMaybe, this.etagMaybe);
|
||||
return new CrawledDocument(this.crawlId, this.url, this.contentType, this.timestamp, this.httpStatus, this.crawlerStatus, this.crawlerStatusDesc, this.headers, this.documentBodyBytes, this.hasCookies, this.requestTimeMs, this.lastModifiedMaybe, this.etagMaybe);
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
|
@@ -9,6 +9,7 @@ import nu.marginalia.parquet.crawldata.CrawledDocumentParquetRecord;
|
||||
import nu.marginalia.parquet.crawldata.CrawledDocumentParquetRecordFileReader;
|
||||
import nu.marginalia.slop.column.array.ByteArrayColumn;
|
||||
import nu.marginalia.slop.column.primitive.ByteColumn;
|
||||
import nu.marginalia.slop.column.primitive.IntColumn;
|
||||
import nu.marginalia.slop.column.primitive.LongColumn;
|
||||
import nu.marginalia.slop.column.primitive.ShortColumn;
|
||||
import nu.marginalia.slop.column.string.EnumColumn;
|
||||
@@ -39,6 +40,7 @@ public record SlopCrawlDataRecord(String domain,
|
||||
long timestamp,
|
||||
String contentType,
|
||||
byte[] body,
|
||||
int requestTimeMs,
|
||||
String headers)
|
||||
{
|
||||
private static final EnumColumn domainColumn = new EnumColumn("domain", StandardCharsets.UTF_8, StorageType.ZSTD);
|
||||
@@ -49,6 +51,7 @@ public record SlopCrawlDataRecord(String domain,
|
||||
private static final LongColumn timestampColumn = new LongColumn("timestamp");
|
||||
private static final EnumColumn contentTypeColumn = new EnumColumn("contentType", StandardCharsets.UTF_8);
|
||||
private static final ByteArrayColumn bodyColumn = new ByteArrayColumn("body", StorageType.ZSTD);
|
||||
private static final ShortColumn requestTimeColumn = new ShortColumn("requestTimeMs");
|
||||
private static final StringColumn headerColumn = new StringColumn("header", StandardCharsets.UTF_8, StorageType.ZSTD);
|
||||
|
||||
public SlopCrawlDataRecord(CrawledDocumentParquetRecord parquetRecord) {
|
||||
@@ -60,6 +63,7 @@ public record SlopCrawlDataRecord(String domain,
|
||||
parquetRecord.timestamp.toEpochMilli(),
|
||||
parquetRecord.contentType,
|
||||
parquetRecord.body,
|
||||
-1,
|
||||
parquetRecord.headers
|
||||
);
|
||||
}
|
||||
@@ -74,6 +78,7 @@ public record SlopCrawlDataRecord(String domain,
|
||||
date.toEpochMilli(),
|
||||
"x-marginalia/advisory;state=redirect",
|
||||
new byte[0],
|
||||
-1,
|
||||
""
|
||||
);
|
||||
}
|
||||
@@ -87,6 +92,7 @@ public record SlopCrawlDataRecord(String domain,
|
||||
date.toEpochMilli(),
|
||||
"x-marginalia/advisory;state=error",
|
||||
errorStatus.getBytes(),
|
||||
-1,
|
||||
""
|
||||
);
|
||||
}
|
||||
@@ -100,6 +106,7 @@ public record SlopCrawlDataRecord(String domain,
|
||||
date.toEpochMilli(),
|
||||
errorStatus,
|
||||
new byte[0],
|
||||
-1,
|
||||
""
|
||||
);
|
||||
}
|
||||
@@ -321,6 +328,7 @@ public record SlopCrawlDataRecord(String domain,
|
||||
private final LongColumn.Writer timestampColumnWriter;
|
||||
private final EnumColumn.Writer contentTypeColumnWriter;
|
||||
private final ByteArrayColumn.Writer bodyColumnWriter;
|
||||
private final ShortColumn.Writer requestTimeColumnWriter;
|
||||
private final StringColumn.Writer headerColumnWriter;
|
||||
|
||||
public Writer(Path path) throws IOException {
|
||||
@@ -334,6 +342,7 @@ public record SlopCrawlDataRecord(String domain,
|
||||
timestampColumnWriter = timestampColumn.create(this);
|
||||
contentTypeColumnWriter = contentTypeColumn.create(this);
|
||||
bodyColumnWriter = bodyColumn.create(this);
|
||||
requestTimeColumnWriter = requestTimeColumn.create(this);
|
||||
headerColumnWriter = headerColumn.create(this);
|
||||
}
|
||||
|
||||
@@ -346,6 +355,7 @@ public record SlopCrawlDataRecord(String domain,
|
||||
timestampColumnWriter.put(record.timestamp);
|
||||
contentTypeColumnWriter.put(record.contentType);
|
||||
bodyColumnWriter.put(record.body);
|
||||
requestTimeColumnWriter.put((short) record.requestTimeMs);
|
||||
headerColumnWriter.put(record.headers);
|
||||
}
|
||||
|
||||
@@ -391,10 +401,20 @@ public record SlopCrawlDataRecord(String domain,
|
||||
|
||||
String headersStr;
|
||||
StringJoiner headersStrBuilder = new StringJoiner("\n");
|
||||
int requestTimeMs = -1;
|
||||
for (var header : headers) {
|
||||
if (header.getName().equalsIgnoreCase("X-Cookies") && "1".equals(header.getValue())) {
|
||||
hasCookies = true;
|
||||
}
|
||||
if (header.getName().equals("X-Marginalia-Response-Time")) {
|
||||
try {
|
||||
requestTimeMs = Integer.parseInt(header.getValue());
|
||||
}
|
||||
catch (NumberFormatException ex) {
|
||||
logger.warn("Failed to parse X-Marginalia-Response-Time header: {}", header.getValue());
|
||||
}
|
||||
continue;
|
||||
}
|
||||
headersStrBuilder.add(header.getName() + ": " + header.getValue());
|
||||
}
|
||||
headersStr = headersStrBuilder.toString();
|
||||
@@ -409,6 +429,7 @@ public record SlopCrawlDataRecord(String domain,
|
||||
response.date().toEpochMilli(),
|
||||
contentType,
|
||||
bodyBytes,
|
||||
requestTimeMs,
|
||||
headersStr
|
||||
)
|
||||
);
|
||||
@@ -461,6 +482,7 @@ public record SlopCrawlDataRecord(String domain,
|
||||
private final LongColumn.Reader timestampColumnReader;
|
||||
private final EnumColumn.Reader contentTypeColumnReader;
|
||||
private final ByteArrayColumn.Reader bodyColumnReader;
|
||||
private final ShortColumn.Reader requestTimeColumnReader;
|
||||
private final StringColumn.Reader headerColumnReader;
|
||||
|
||||
public Reader(Path path) throws IOException {
|
||||
@@ -475,6 +497,17 @@ public record SlopCrawlDataRecord(String domain,
|
||||
contentTypeColumnReader = contentTypeColumn.open(this);
|
||||
bodyColumnReader = bodyColumn.open(this);
|
||||
headerColumnReader = headerColumn.open(this);
|
||||
|
||||
// FIXME: After 2025-06-XX, we can remove this migration workaround
|
||||
ShortColumn.Reader timeColumnReader;
|
||||
try {
|
||||
timeColumnReader = requestTimeColumn.open(this);
|
||||
}
|
||||
catch (Exception ex) {
|
||||
// Migration workaround
|
||||
timeColumnReader = null;
|
||||
}
|
||||
requestTimeColumnReader = timeColumnReader;
|
||||
}
|
||||
|
||||
public SlopCrawlDataRecord get() throws IOException {
|
||||
@@ -487,6 +520,7 @@ public record SlopCrawlDataRecord(String domain,
|
||||
timestampColumnReader.get(),
|
||||
contentTypeColumnReader.get(),
|
||||
bodyColumnReader.get(),
|
||||
requestTimeColumnReader != null ? requestTimeColumnReader.get() : -1,
|
||||
headerColumnReader.get()
|
||||
);
|
||||
}
|
||||
@@ -506,6 +540,7 @@ public record SlopCrawlDataRecord(String domain,
|
||||
private final LongColumn.Reader timestampColumnReader;
|
||||
private final EnumColumn.Reader contentTypeColumnReader;
|
||||
private final ByteArrayColumn.Reader bodyColumnReader;
|
||||
private final ShortColumn.Reader requestTimeColumnReader;
|
||||
private final StringColumn.Reader headerColumnReader;
|
||||
|
||||
private SlopCrawlDataRecord next = null;
|
||||
@@ -522,6 +557,17 @@ public record SlopCrawlDataRecord(String domain,
|
||||
contentTypeColumnReader = contentTypeColumn.open(this);
|
||||
bodyColumnReader = bodyColumn.open(this);
|
||||
headerColumnReader = headerColumn.open(this);
|
||||
|
||||
// FIXME: After 2025-06-XX, we can remove this migration workaround
|
||||
ShortColumn.Reader timeColumnReader;
|
||||
try {
|
||||
timeColumnReader = requestTimeColumn.open(this);
|
||||
}
|
||||
catch (Exception ex) {
|
||||
// Migration workaround
|
||||
timeColumnReader = null;
|
||||
}
|
||||
requestTimeColumnReader = timeColumnReader;
|
||||
}
|
||||
|
||||
public abstract boolean filter(String url, int status, String contentType);
|
||||
@@ -548,6 +594,7 @@ public record SlopCrawlDataRecord(String domain,
|
||||
boolean cookies = cookiesColumnReader.get() == 1;
|
||||
int status = statusColumnReader.get();
|
||||
long timestamp = timestampColumnReader.get();
|
||||
int requestTimeMs = requestTimeColumnReader != null ? requestTimeColumnReader.get() : -1;
|
||||
String contentType = contentTypeColumnReader.get();
|
||||
|
||||
LargeItem<byte[]> body = bodyColumnReader.getLarge();
|
||||
@@ -555,7 +602,7 @@ public record SlopCrawlDataRecord(String domain,
|
||||
|
||||
if (filter(url, status, contentType)) {
|
||||
next = new SlopCrawlDataRecord(
|
||||
domain, url, ip, cookies, status, timestamp, contentType, body.get(), headers.get()
|
||||
domain, url, ip, cookies, status, timestamp, contentType, body.get(), requestTimeMs, headers.get()
|
||||
);
|
||||
return true;
|
||||
}
|
||||
|
@@ -195,6 +195,7 @@ public class LiveCrawlDataSet implements AutoCloseable {
|
||||
headers,
|
||||
body,
|
||||
false,
|
||||
-1,
|
||||
"",
|
||||
""
|
||||
));
|
||||
|
71
code/processes/ping-process/build.gradle
Normal file
71
code/processes/ping-process/build.gradle
Normal file
@@ -0,0 +1,71 @@
|
||||
plugins {
|
||||
id 'java'
|
||||
|
||||
id 'application'
|
||||
id 'jvm-test-suite'
|
||||
}
|
||||
|
||||
java {
|
||||
toolchain {
|
||||
languageVersion.set(JavaLanguageVersion.of(rootProject.ext.jvmVersion))
|
||||
}
|
||||
}
|
||||
|
||||
application {
|
||||
mainClass = 'nu.marginalia.ping.PingMain'
|
||||
applicationName = 'ping-process'
|
||||
}
|
||||
|
||||
tasks.distZip.enabled = false
|
||||
|
||||
apply from: "$rootProject.projectDir/srcsets.gradle"
|
||||
|
||||
dependencies {
|
||||
|
||||
implementation project(':code:common:db')
|
||||
implementation project(':code:common:model')
|
||||
implementation project(':code:common:config')
|
||||
implementation project(':code:common:service')
|
||||
|
||||
implementation project(':code:libraries:geo-ip')
|
||||
implementation project(':code:libraries:message-queue')
|
||||
|
||||
implementation project(':code:processes:process-mq-api')
|
||||
implementation libs.bundles.slf4j
|
||||
implementation libs.notnull
|
||||
implementation libs.guava
|
||||
|
||||
implementation dependencies.create(libs.guice.get()) {
|
||||
exclude group: 'com.google.guava'
|
||||
}
|
||||
implementation libs.gson
|
||||
implementation libs.zstd
|
||||
implementation libs.bucket4j
|
||||
implementation libs.crawlercommons
|
||||
implementation libs.jsoup
|
||||
implementation libs.fastutil
|
||||
implementation libs.bundles.curator
|
||||
implementation libs.bundles.mariadb
|
||||
implementation libs.bundles.httpcomponents
|
||||
implementation libs.commons.lang3
|
||||
|
||||
implementation 'org.bouncycastle:bcprov-jdk18on:1.80'
|
||||
implementation 'org.bouncycastle:bcpkix-jdk18on:1.80'
|
||||
implementation 'dnsjava:dnsjava:3.5.2'
|
||||
|
||||
testImplementation libs.bundles.slf4j.test
|
||||
testImplementation libs.bundles.junit
|
||||
testImplementation libs.mockito
|
||||
|
||||
testImplementation libs.wiremock
|
||||
|
||||
|
||||
testImplementation platform('org.testcontainers:testcontainers-bom:1.17.4')
|
||||
testImplementation libs.commons.codec
|
||||
testImplementation 'org.testcontainers:mariadb:1.17.4'
|
||||
testImplementation 'org.testcontainers:junit-jupiter:1.17.4'
|
||||
testImplementation project(':code:libraries:test-helpers')
|
||||
|
||||
testImplementation project(':code:processes:test-data')
|
||||
}
|
||||
|
@@ -0,0 +1,84 @@
|
||||
package nu.marginalia.ping;
|
||||
|
||||
import com.google.inject.Inject;
|
||||
import nu.marginalia.ping.model.ErrorClassification;
|
||||
|
||||
import java.time.Duration;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.ThreadLocalRandom;
|
||||
|
||||
public class BackoffStrategy {
|
||||
|
||||
private final Map<ErrorClassification, Duration> baseIntervals;
|
||||
private final Map<ErrorClassification, Duration> maxIntervals;
|
||||
private final Duration okInterval;
|
||||
|
||||
@Inject
|
||||
public BackoffStrategy(PingIntervalsConfiguration pingIntervalsConfiguration) {
|
||||
this.baseIntervals = pingIntervalsConfiguration.baseIntervals();
|
||||
this.maxIntervals = pingIntervalsConfiguration.maxIntervals();
|
||||
this.okInterval = baseIntervals.get(ErrorClassification.NONE);
|
||||
}
|
||||
|
||||
public Duration getOkInterval() {
|
||||
return okInterval;
|
||||
}
|
||||
|
||||
public Duration getUpdateTime(Duration currentDuration,
|
||||
ErrorClassification errorClassification,
|
||||
int backoffConsecutiveFailures) {
|
||||
|
||||
Duration nextBackoff = calculateBackoff(errorClassification, currentDuration, backoffConsecutiveFailures + 1);
|
||||
nextBackoff = addJitter(nextBackoff);
|
||||
|
||||
return nextBackoff;
|
||||
}
|
||||
|
||||
private Duration calculateBackoff(ErrorClassification errorClassification,
|
||||
Duration currentDuration,
|
||||
int backoffConsecutiveFailures) {
|
||||
|
||||
if (currentDuration == null) {
|
||||
return baseIntervals.get(errorClassification);
|
||||
}
|
||||
|
||||
Duration baseInterval = baseIntervals.get(errorClassification);
|
||||
Duration maxInterval = maxIntervals.get(errorClassification);
|
||||
|
||||
if (currentDuration.compareTo(maxInterval) >= 0) {
|
||||
return maxInterval;
|
||||
}
|
||||
|
||||
double multiplier = switch(errorClassification) {
|
||||
case ErrorClassification.UNKNOWN -> 1.5;
|
||||
case ErrorClassification.TIMEOUT -> 2.5;
|
||||
case ErrorClassification.CONNECTION_ERROR -> 2.0;
|
||||
case ErrorClassification.HTTP_CLIENT_ERROR -> 1.7;
|
||||
case ErrorClassification.HTTP_SERVER_ERROR -> 2.0;
|
||||
case ErrorClassification.SSL_ERROR -> 1.8;
|
||||
case ErrorClassification.DNS_ERROR -> 1.5;
|
||||
default -> 2.0; // Default multiplier for any other classification
|
||||
};
|
||||
|
||||
double backoffMinutes = baseInterval.toMinutes()
|
||||
* Math.pow(multiplier, backoffConsecutiveFailures - 1);
|
||||
|
||||
Duration newDuration = Duration.ofMinutes(Math.round(0.5+backoffMinutes));
|
||||
if (newDuration.compareTo(maxInterval) > 0) {
|
||||
return maxInterval;
|
||||
}
|
||||
|
||||
return newDuration;
|
||||
}
|
||||
|
||||
private Duration addJitter(Duration duration) {
|
||||
// Add ±15% jitter to prevent synchronized retries
|
||||
double jitterPercent = 0.15;
|
||||
long baseMinutes = duration.toMinutes();
|
||||
long jitterRange = (long) (baseMinutes * jitterPercent * 2);
|
||||
long jitterOffset = ThreadLocalRandom.current().nextLong(jitterRange + 1) - (jitterRange / 2);
|
||||
|
||||
long finalMinutes = Math.max(1, baseMinutes + jitterOffset);
|
||||
return Duration.ofMinutes(finalMinutes);
|
||||
}
|
||||
}
|
270
code/processes/ping-process/java/nu/marginalia/ping/PingDao.java
Normal file
270
code/processes/ping-process/java/nu/marginalia/ping/PingDao.java
Normal file
@@ -0,0 +1,270 @@
|
||||
package nu.marginalia.ping;
|
||||
|
||||
import com.google.gson.Gson;
|
||||
import com.google.inject.Inject;
|
||||
import com.google.inject.Singleton;
|
||||
import com.zaxxer.hikari.HikariDataSource;
|
||||
import nu.marginalia.model.gson.GsonFactory;
|
||||
import nu.marginalia.ping.model.*;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.sql.ResultSet;
|
||||
import java.sql.SQLException;
|
||||
import java.time.Instant;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
|
||||
@Singleton
|
||||
public class PingDao {
|
||||
private final HikariDataSource dataSource;
|
||||
private static final Gson gson = GsonFactory.get();
|
||||
private static final Logger logger = LoggerFactory.getLogger(PingDao.class);
|
||||
|
||||
@Inject
|
||||
public PingDao(HikariDataSource dataSource) {
|
||||
this.dataSource = dataSource;
|
||||
}
|
||||
|
||||
public void write(WritableModel model) {
|
||||
write(List.of(model));
|
||||
}
|
||||
|
||||
public void write(Collection<WritableModel> models) {
|
||||
logger.debug("Writing: {}", models);
|
||||
|
||||
try (var conn = dataSource.getConnection()) {
|
||||
|
||||
// Don't bother with a transaction if there's only one model to write.
|
||||
if (models.size() <= 1) {
|
||||
for (WritableModel model : models) {
|
||||
model.write(conn);
|
||||
}
|
||||
}
|
||||
else { // If there are multiple models, use a transaction to ensure atomicity.
|
||||
conn.setAutoCommit(false);
|
||||
try {
|
||||
for (WritableModel model : models) {
|
||||
model.write(conn);
|
||||
}
|
||||
conn.commit();
|
||||
} catch (SQLException e) {
|
||||
conn.rollback();
|
||||
throw e;
|
||||
} finally {
|
||||
conn.setAutoCommit(true);
|
||||
}
|
||||
}
|
||||
} catch (SQLException e) {
|
||||
throw new RuntimeException("Failed to write model", e);
|
||||
}
|
||||
}
|
||||
|
||||
public void scheduleDnsUpdate(String rootDomainName, Instant timestamp, int priority) throws SQLException {
|
||||
try (var conn = dataSource.getConnection();
|
||||
var ps = conn.prepareStatement("""
|
||||
UPDATE DOMAIN_DNS_INFORMATION
|
||||
SET TS_NEXT_DNS_CHECK = ?, DNS_CHECK_PRIORITY = ?
|
||||
WHERE ROOT_DOMAIN_NAME = ?
|
||||
""")) {
|
||||
|
||||
ps.setTimestamp(1, java.sql.Timestamp.from(timestamp));
|
||||
ps.setInt(2, priority);
|
||||
ps.setString(3, rootDomainName);
|
||||
ps.executeUpdate();
|
||||
}
|
||||
}
|
||||
|
||||
public List<DomainReference> getNewDomains(int nodeId, int cnt) throws SQLException {
|
||||
List<DomainReference> domains = new ArrayList<>();
|
||||
try (var conn = dataSource.getConnection();
|
||||
var ps = conn.prepareStatement("""
|
||||
SELECT domain_id, domain_name
|
||||
FROM EC_DOMAIN
|
||||
LEFT JOIN DOMAIN_AVAILABILITY_INFORMATION
|
||||
ON EC_DOMAIN.domain_id = DOMAIN_AVAILABILITY_INFORMATION.domain_id
|
||||
WHERE DOMAIN_AVAILABILITY_INFORMATION.server_available IS NULL
|
||||
AND EC_DOMAIN.NODE_ID = ?
|
||||
LIMIT ?
|
||||
"""))
|
||||
{
|
||||
ps.setInt(1, nodeId);
|
||||
ps.setInt(2, cnt);
|
||||
|
||||
ResultSet rs = ps.executeQuery();
|
||||
|
||||
while (rs.next()) {
|
||||
domains.add(new DomainReference(rs.getInt("domain_id"), nodeId, rs.getString("domain_name").toLowerCase()));
|
||||
}
|
||||
}
|
||||
|
||||
return domains;
|
||||
}
|
||||
|
||||
public DomainAvailabilityRecord getDomainPingStatus(int domainId) throws SQLException {
|
||||
|
||||
try (var conn = dataSource.getConnection();
|
||||
var ps = conn.prepareStatement("SELECT * FROM DOMAIN_AVAILABILITY_INFORMATION WHERE domain_id = ?")) {
|
||||
|
||||
ps.setInt(1, domainId);
|
||||
ResultSet rs = ps.executeQuery();
|
||||
if (rs.next()) {
|
||||
return new DomainAvailabilityRecord(rs);
|
||||
} else {
|
||||
return null; // or throw an exception if preferred
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public DomainSecurityRecord getDomainSecurityInformation(int domainId) throws SQLException {
|
||||
try (var conn = dataSource.getConnection();
|
||||
var ps = conn.prepareStatement("SELECT * FROM DOMAIN_SECURITY_INFORMATION WHERE domain_id = ?")) {
|
||||
|
||||
ps.setInt(1, domainId);
|
||||
ResultSet rs = ps.executeQuery();
|
||||
if (rs.next()) {
|
||||
return new DomainSecurityRecord(rs);
|
||||
} else {
|
||||
return null; // or throw an exception if preferred
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public DomainDnsRecord getDomainDnsRecord(int dnsRootDomainId) throws SQLException {
|
||||
try (var conn = dataSource.getConnection();
|
||||
var ps = conn.prepareStatement("SELECT * FROM DOMAIN_DNS_INFORMATION WHERE DNS_ROOT_DOMAIN_ID = ?")) {
|
||||
|
||||
ps.setObject(1, dnsRootDomainId, java.sql.Types.INTEGER);
|
||||
ResultSet rs = ps.executeQuery();
|
||||
if (rs.next()) {
|
||||
return new DomainDnsRecord(rs);
|
||||
} else {
|
||||
return null; // or throw an exception if preferred
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public DomainDnsRecord getDomainDnsRecord(String rootDomainName) throws SQLException {
|
||||
try (var conn = dataSource.getConnection();
|
||||
var ps = conn.prepareStatement("SELECT * FROM DOMAIN_DNS_INFORMATION WHERE ROOT_DOMAIN_NAME = ?")) {
|
||||
|
||||
ps.setString(1, rootDomainName);
|
||||
ResultSet rs = ps.executeQuery();
|
||||
if (rs.next()) {
|
||||
return new DomainDnsRecord(rs);
|
||||
} else {
|
||||
return null; // or throw an exception if preferred
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public List<HistoricalAvailabilityData> getNextDomainPingStatuses(int count, int nodeId) throws SQLException {
|
||||
List<HistoricalAvailabilityData> domainAvailabilityRecords = new ArrayList<>(count);
|
||||
|
||||
var query = """
|
||||
SELECT DOMAIN_AVAILABILITY_INFORMATION.*, DOMAIN_SECURITY_INFORMATION.*, EC_DOMAIN.DOMAIN_NAME FROM DOMAIN_AVAILABILITY_INFORMATION
|
||||
LEFT JOIN DOMAIN_SECURITY_INFORMATION
|
||||
ON DOMAIN_AVAILABILITY_INFORMATION.DOMAIN_ID = DOMAIN_SECURITY_INFORMATION.DOMAIN_ID
|
||||
INNER JOIN EC_DOMAIN ON EC_DOMAIN.ID = DOMAIN_AVAILABILITY_INFORMATION.DOMAIN_ID
|
||||
WHERE NEXT_SCHEDULED_UPDATE <= ? AND DOMAIN_AVAILABILITY_INFORMATION.NODE_ID = ?
|
||||
ORDER BY NEXT_SCHEDULED_UPDATE ASC
|
||||
LIMIT ?
|
||||
""";
|
||||
try (var conn = dataSource.getConnection();
|
||||
var ps = conn.prepareStatement(query)) {
|
||||
// Use Java time since this is how we generate the timestamps in the ping process
|
||||
// to avoid timezone weirdness.
|
||||
ps.setTimestamp(1, java.sql.Timestamp.from(Instant.now()));
|
||||
ps.setInt(2, nodeId);
|
||||
ps.setInt(3, count);
|
||||
ResultSet rs = ps.executeQuery();
|
||||
while (rs.next()) {
|
||||
String domainName = rs.getString("EC_DOMAIN.DOMAIN_NAME");
|
||||
var domainAvailabilityRecord = new DomainAvailabilityRecord(rs);
|
||||
if (rs.getObject("DOMAIN_SECURITY_INFORMATION.DOMAIN_ID", Integer.class) != null) {
|
||||
var securityRecord = new DomainSecurityRecord(rs);
|
||||
domainAvailabilityRecords.add(
|
||||
new HistoricalAvailabilityData.AvailabilityAndSecurity(domainName, domainAvailabilityRecord, securityRecord)
|
||||
);
|
||||
} else {
|
||||
domainAvailabilityRecords.add(new HistoricalAvailabilityData.JustAvailability(domainName, domainAvailabilityRecord));
|
||||
}
|
||||
}
|
||||
}
|
||||
return domainAvailabilityRecords;
|
||||
}
|
||||
|
||||
public List<DomainDnsRecord> getNextDnsDomainRecords(int count, int nodeId) throws SQLException {
|
||||
List<DomainDnsRecord> domainDnsRecords = new ArrayList<>(count);
|
||||
|
||||
var query = """
|
||||
SELECT * FROM DOMAIN_DNS_INFORMATION
|
||||
WHERE TS_NEXT_DNS_CHECK <= ? AND NODE_AFFINITY = ?
|
||||
ORDER BY DNS_CHECK_PRIORITY ASC, TS_NEXT_DNS_CHECK ASC
|
||||
LIMIT ?
|
||||
""";
|
||||
try (var conn = dataSource.getConnection();
|
||||
var ps = conn.prepareStatement(query)) {
|
||||
ps.setTimestamp(1, java.sql.Timestamp.from(Instant.now()));
|
||||
ps.setInt(2, nodeId);
|
||||
ps.setInt(3, count);
|
||||
ResultSet rs = ps.executeQuery();
|
||||
while (rs.next()) {
|
||||
domainDnsRecords.add(new DomainDnsRecord(rs));
|
||||
}
|
||||
}
|
||||
return domainDnsRecords;
|
||||
}
|
||||
|
||||
public List<DomainReference> getOrphanedDomains(int nodeId) {
|
||||
List<DomainReference> orphanedDomains = new ArrayList<>();
|
||||
try (var conn = dataSource.getConnection();
|
||||
var stmt = conn.prepareStatement("""
|
||||
SELECT e.DOMAIN_NAME, e.ID
|
||||
FROM EC_DOMAIN e
|
||||
LEFT JOIN DOMAIN_AVAILABILITY_INFORMATION d ON e.ID = d.DOMAIN_ID
|
||||
WHERE d.DOMAIN_ID IS NULL AND e.NODE_AFFINITY = ?;
|
||||
""")) {
|
||||
stmt.setInt(1, nodeId);
|
||||
stmt.setFetchSize(10_000);
|
||||
ResultSet rs = stmt.executeQuery();
|
||||
while (rs.next()) {
|
||||
String domainName = rs.getString("DOMAIN_NAME");
|
||||
int domainId = rs.getInt("ID");
|
||||
|
||||
orphanedDomains.add(new DomainReference(domainId, nodeId, domainName));
|
||||
}
|
||||
}
|
||||
catch (SQLException e) {
|
||||
throw new RuntimeException("Failed to retrieve orphaned domains", e);
|
||||
}
|
||||
|
||||
return orphanedDomains;
|
||||
}
|
||||
|
||||
public List<String> getOrphanedRootDomains(int nodeId) {
|
||||
List<String> orphanedDomains = new ArrayList<>();
|
||||
try (var conn = dataSource.getConnection();
|
||||
var stmt = conn.prepareStatement("""
|
||||
SELECT DISTINCT(DOMAIN_TOP)
|
||||
FROM EC_DOMAIN e
|
||||
LEFT JOIN DOMAIN_DNS_INFORMATION d ON e.DOMAIN_TOP = d.ROOT_DOMAIN_NAME
|
||||
WHERE d.ROOT_DOMAIN_NAME IS NULL AND e.NODE_AFFINITY = ?;
|
||||
""")) {
|
||||
stmt.setInt(1, nodeId);
|
||||
stmt.setFetchSize(10_000);
|
||||
ResultSet rs = stmt.executeQuery();
|
||||
while (rs.next()) {
|
||||
String domainName = rs.getString("DOMAIN_TOP");
|
||||
orphanedDomains.add(domainName.toLowerCase());
|
||||
}
|
||||
}
|
||||
catch (SQLException e) {
|
||||
throw new RuntimeException("Failed to retrieve orphaned domains", e);
|
||||
}
|
||||
|
||||
return orphanedDomains;
|
||||
}
|
||||
}
|
@@ -0,0 +1,13 @@
|
||||
package nu.marginalia.ping;
|
||||
|
||||
import nu.marginalia.ping.model.ErrorClassification;
|
||||
|
||||
import java.time.Duration;
|
||||
import java.util.Map;
|
||||
|
||||
public record PingIntervalsConfiguration(
|
||||
Duration dnsUpdateInterval,
|
||||
Map<ErrorClassification, Duration> baseIntervals,
|
||||
Map<ErrorClassification, Duration> maxIntervals
|
||||
) {
|
||||
}
|
@@ -0,0 +1,363 @@
|
||||
package nu.marginalia.ping;
|
||||
|
||||
import com.google.inject.Inject;
|
||||
import nu.marginalia.ping.model.*;
|
||||
import nu.marginalia.ping.svc.DnsPingService;
|
||||
import nu.marginalia.ping.svc.HttpPingService;
|
||||
import org.jetbrains.annotations.NotNull;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import javax.annotation.Nullable;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.ArrayBlockingQueue;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
/** PingJobScheduler is responsible for scheduling and processing ping jobs
|
||||
* for both HTTP pings and DNS lookups. It manages a queue of jobs and processes them
|
||||
* in separate threads, ensuring that domains are pinged and DNS records are updated
|
||||
* efficiently.
|
||||
*/
|
||||
public class PingJobScheduler {
|
||||
private final HttpPingService httpPingService;
|
||||
private final DnsPingService dnsPingService;
|
||||
private final PingDao pingDao;
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(PingJobScheduler.class);
|
||||
|
||||
sealed interface DnsJob {
|
||||
Object reference();
|
||||
|
||||
record DnsFetch(String rootDomain) implements DnsJob {
|
||||
@Override
|
||||
public Object reference() {
|
||||
return rootDomain;
|
||||
}
|
||||
}
|
||||
record DnsRefresh(DomainDnsRecord oldRecord) implements DnsJob {
|
||||
@Override
|
||||
public Object reference() {
|
||||
return oldRecord.rootDomainName();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
sealed interface AvailabilityJob {
|
||||
Object reference();
|
||||
|
||||
record Availability(DomainReference domainReference) implements AvailabilityJob {
|
||||
@Override
|
||||
public Object reference() {
|
||||
return domainReference.domainName();
|
||||
}
|
||||
}
|
||||
record AvailabilityRefresh(String domain, @NotNull DomainAvailabilityRecord availability, @Nullable DomainSecurityRecord securityRecord) implements AvailabilityJob {
|
||||
@Override
|
||||
public Object reference() {
|
||||
return domain;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Keeps track of ongoing ping and DNS processing to avoid duplicate work,
|
||||
// which is mainly a scenario that will occur when there is not a lot of data
|
||||
// in the database. In real-world scenarios, the queues will be full most
|
||||
// of the time, and prevent this from being an issue.
|
||||
|
||||
private static final ConcurrentHashMap<Object, Boolean> processingDomainsAvailability = new ConcurrentHashMap<>();
|
||||
private static final ConcurrentHashMap<Object, Boolean> processingDomainsDns = new ConcurrentHashMap<>();
|
||||
|
||||
private static final ArrayBlockingQueue<DnsJob> dnsJobQueue = new ArrayBlockingQueue<>(8);
|
||||
private static final ArrayBlockingQueue<AvailabilityJob> availabilityJobQueue = new ArrayBlockingQueue<>(8);
|
||||
|
||||
public volatile Integer nodeId = null;
|
||||
public volatile boolean running = false;
|
||||
|
||||
private final List<Thread> allThreads = new ArrayList<>();
|
||||
|
||||
@Inject
|
||||
public PingJobScheduler(HttpPingService httpPingService,
|
||||
DnsPingService dnsPingService,
|
||||
PingDao pingDao)
|
||||
{
|
||||
this.httpPingService = httpPingService;
|
||||
this.dnsPingService = dnsPingService;
|
||||
this.pingDao = pingDao;
|
||||
}
|
||||
|
||||
public synchronized void start(boolean startPaused) {
|
||||
if (running)
|
||||
return;
|
||||
|
||||
nodeId = null;
|
||||
|
||||
running = true;
|
||||
|
||||
allThreads.add(Thread.ofPlatform().daemon().name("new-dns").start(this::fetchNewDnsRecords));
|
||||
allThreads.add(Thread.ofPlatform().daemon().name("new-pings").start(this::fetchNewAvailabilityJobs));
|
||||
allThreads.add(Thread.ofPlatform().daemon().name("update-pings").start(this::updateAvailabilityJobs));
|
||||
allThreads.add(Thread.ofPlatform().daemon().name("update-dns").start(this::updateDnsJobs));
|
||||
|
||||
for (int i = 0; i < 4; i++) {
|
||||
allThreads.add(Thread.ofPlatform().daemon().name("ping-job-consumer-" + i).start(this::availabilityJobConsumer));
|
||||
}
|
||||
for (int i = 0; i < 4; i++) {
|
||||
allThreads.add(Thread.ofPlatform().daemon().name("dns-job-consumer-" + i).start(this::dnsJobConsumer));
|
||||
}
|
||||
}
|
||||
|
||||
public void stop() {
|
||||
running = false;
|
||||
for (Thread thread : allThreads) {
|
||||
try {
|
||||
thread.interrupt();
|
||||
thread.join();
|
||||
} catch (InterruptedException e) {
|
||||
Thread.currentThread().interrupt();
|
||||
logger.error("Failed to join thread: " + thread.getName(), e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void pause(int nodeId) {
|
||||
if (this.nodeId != null && this.nodeId != nodeId) {
|
||||
logger.warn("Attempted to pause PingJobScheduler with mismatched nodeId: expected {}, got {}", this.nodeId, nodeId);
|
||||
return;
|
||||
}
|
||||
this.nodeId = null;
|
||||
logger.info("PingJobScheduler paused");
|
||||
}
|
||||
|
||||
public synchronized void resume(int nodeId) {
|
||||
if (this.nodeId != null) {
|
||||
logger.warn("Attempted to resume PingJobScheduler with mismatched nodeId: expected null, got {}", this.nodeId, nodeId);
|
||||
return;
|
||||
}
|
||||
this.nodeId = nodeId;
|
||||
|
||||
notifyAll();
|
||||
logger.info("PingJobScheduler resumed");
|
||||
}
|
||||
|
||||
public synchronized void waitForResume() throws InterruptedException {
|
||||
while (nodeId == null) {
|
||||
wait();
|
||||
}
|
||||
}
|
||||
|
||||
private void availabilityJobConsumer() {
|
||||
while (running) {
|
||||
try {
|
||||
AvailabilityJob job = availabilityJobQueue.poll(1, TimeUnit.SECONDS);
|
||||
if (job == null) {
|
||||
continue; // No job available, continue to the next iteration
|
||||
}
|
||||
|
||||
try {
|
||||
switch (job) {
|
||||
case AvailabilityJob.Availability(DomainReference reference) -> {
|
||||
logger.info("Availability check: {}", reference.domainName());
|
||||
pingDao.write(httpPingService.pingDomain(reference, null, null));
|
||||
}
|
||||
case AvailabilityJob.AvailabilityRefresh(String domain, DomainAvailabilityRecord availability, DomainSecurityRecord security) -> {
|
||||
logger.info("Availability check with reference: {}", domain);
|
||||
pingDao.write(httpPingService.pingDomain(
|
||||
new DomainReference(availability.domainId(), availability.nodeId(), domain),
|
||||
availability,
|
||||
security));
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (Exception e) {
|
||||
logger.error("Error processing availability job for domain: " + job.reference(), e);
|
||||
}
|
||||
finally {
|
||||
// Remove the domain from the processing map
|
||||
processingDomainsAvailability.remove(job.reference());
|
||||
}
|
||||
} catch (InterruptedException e) {
|
||||
Thread.currentThread().interrupt();
|
||||
logger.error("Availability job consumer interrupted", e);
|
||||
break;
|
||||
} catch (Exception e) {
|
||||
logger.error("Error processing availability job", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void dnsJobConsumer() {
|
||||
while (running) {
|
||||
try {
|
||||
DnsJob job = dnsJobQueue.poll(1, TimeUnit.SECONDS);
|
||||
if (job == null) {
|
||||
continue; // No job available, continue to the next iteration
|
||||
}
|
||||
|
||||
try {
|
||||
switch (job) {
|
||||
case DnsJob.DnsFetch(String rootDomain) -> {
|
||||
logger.info("Fetching DNS records for root domain: {}", rootDomain);
|
||||
pingDao.write(dnsPingService.pingDomain(rootDomain, null));
|
||||
}
|
||||
case DnsJob.DnsRefresh(DomainDnsRecord oldRecord) -> {
|
||||
logger.info("Refreshing DNS records for domain: {}", oldRecord.rootDomainName());
|
||||
pingDao.write(dnsPingService.pingDomain(oldRecord.rootDomainName(), oldRecord));
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (Exception e) {
|
||||
logger.error("Error processing DNS job for domain: " + job.reference(), e);
|
||||
}
|
||||
finally {
|
||||
// Remove the domain from the processing map
|
||||
processingDomainsDns.remove(job.reference());
|
||||
}
|
||||
} catch (InterruptedException e) {
|
||||
Thread.currentThread().interrupt();
|
||||
logger.error("DNS job consumer interrupted", e);
|
||||
break;
|
||||
} catch (Exception e) {
|
||||
logger.error("Error processing DNS job", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void fetchNewAvailabilityJobs() {
|
||||
try {
|
||||
while (running) {
|
||||
|
||||
Integer nid = nodeId;
|
||||
if (nid == null) {
|
||||
waitForResume();
|
||||
continue; // re-fetch the records after resuming
|
||||
}
|
||||
|
||||
List<DomainReference> domains = pingDao.getOrphanedDomains(nid);
|
||||
for (DomainReference domain : domains) {
|
||||
|
||||
if (nodeId == null) {
|
||||
waitForResume();
|
||||
break; // re-fetch the records after resuming
|
||||
}
|
||||
|
||||
try {
|
||||
availabilityJobQueue.put(new AvailabilityJob.Availability(domain));
|
||||
} catch (InterruptedException e) {
|
||||
Thread.currentThread().interrupt();
|
||||
logger.error("Failed to add new ping job for domain: " + domain, e);
|
||||
}
|
||||
}
|
||||
|
||||
// This is an incredibly expensive operation, so we only do it once a day
|
||||
try {
|
||||
TimeUnit.HOURS.sleep(24);
|
||||
} catch (InterruptedException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (Exception e) {
|
||||
logger.error("Error fetching new ping jobs", e);
|
||||
}
|
||||
}
|
||||
|
||||
private void fetchNewDnsRecords() {
|
||||
try {
|
||||
while (running) {
|
||||
Integer nid = nodeId;
|
||||
if (nid == null) {
|
||||
waitForResume();
|
||||
continue; // re-fetch the records after resuming
|
||||
}
|
||||
|
||||
List<String> rootDomains = pingDao.getOrphanedRootDomains(nid);
|
||||
for (String rootDomain : rootDomains) {
|
||||
|
||||
if (nodeId == null) {
|
||||
waitForResume();
|
||||
break; // re-fetch the records after resuming
|
||||
}
|
||||
|
||||
try {
|
||||
dnsJobQueue.put(new DnsJob.DnsFetch(rootDomain));
|
||||
} catch (InterruptedException e) {
|
||||
Thread.currentThread().interrupt();
|
||||
logger.error("Failed to add new DNS job for root domain: " + rootDomain, e);
|
||||
}
|
||||
}
|
||||
// This is an incredibly expensive operation, so we only do it once a day
|
||||
TimeUnit.HOURS.sleep(24);
|
||||
}
|
||||
} catch (InterruptedException e) {
|
||||
Thread.currentThread().interrupt();
|
||||
logger.error("DNS job fetch interrupted", e);
|
||||
}
|
||||
}
|
||||
|
||||
private void updateAvailabilityJobs() {
|
||||
|
||||
while (running) {
|
||||
try {
|
||||
Integer nid = nodeId;
|
||||
if (nid == null) {
|
||||
waitForResume();
|
||||
continue; // re-fetch the records after resuming
|
||||
}
|
||||
|
||||
var statuses = pingDao.getNextDomainPingStatuses(100, nid);
|
||||
|
||||
if (nodeId == null) {
|
||||
waitForResume();
|
||||
break; // re-fetch the records after resuming
|
||||
}
|
||||
|
||||
for (var status : statuses) {
|
||||
var job = switch (status) {
|
||||
case HistoricalAvailabilityData.JustAvailability(String domain, DomainAvailabilityRecord record)
|
||||
-> new AvailabilityJob.AvailabilityRefresh(domain, record, null);
|
||||
case HistoricalAvailabilityData.AvailabilityAndSecurity(String domain, DomainAvailabilityRecord availability, DomainSecurityRecord security)
|
||||
-> new AvailabilityJob.AvailabilityRefresh(domain, availability, security);
|
||||
};
|
||||
|
||||
if (processingDomainsAvailability.putIfAbsent(job.reference(), true) == null) {
|
||||
availabilityJobQueue.put(job);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
catch (Exception e) {
|
||||
logger.error("Error fetching next domain ping statuses", e);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private void updateDnsJobs() {
|
||||
while (running) {
|
||||
try {
|
||||
Integer nid = nodeId;
|
||||
if (nid == null) {
|
||||
waitForResume();
|
||||
continue; // re-fetch the records after resuming
|
||||
}
|
||||
|
||||
var dnsRecords = pingDao.getNextDnsDomainRecords(1000, nid);
|
||||
for (var record : dnsRecords) {
|
||||
if (nodeId == null) {
|
||||
waitForResume();
|
||||
break; // re-fetch the records after resuming
|
||||
}
|
||||
if (processingDomainsDns.putIfAbsent(record.rootDomainName(), true) == null) {
|
||||
dnsJobQueue.put(new DnsJob.DnsRefresh(record));
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (Exception e) {
|
||||
logger.error("Error fetching next domain DNS records", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@@ -0,0 +1,167 @@
|
||||
package nu.marginalia.ping;
|
||||
|
||||
import com.google.gson.Gson;
|
||||
import com.google.inject.Guice;
|
||||
import com.google.inject.Inject;
|
||||
import com.google.inject.Injector;
|
||||
import nu.marginalia.WmsaHome;
|
||||
import nu.marginalia.geoip.GeoIpDictionary;
|
||||
import nu.marginalia.mq.MessageQueueFactory;
|
||||
import nu.marginalia.mqapi.ProcessInboxNames;
|
||||
import nu.marginalia.mqapi.ping.PingRequest;
|
||||
import nu.marginalia.nodecfg.NodeConfigurationService;
|
||||
import nu.marginalia.nodecfg.model.NodeConfiguration;
|
||||
import nu.marginalia.process.ProcessConfiguration;
|
||||
import nu.marginalia.process.ProcessConfigurationModule;
|
||||
import nu.marginalia.process.ProcessMainClass;
|
||||
import nu.marginalia.service.discovery.ServiceRegistryIf;
|
||||
import nu.marginalia.service.module.DatabaseModule;
|
||||
import nu.marginalia.service.module.ServiceDiscoveryModule;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.security.Security;
|
||||
import java.util.List;
|
||||
|
||||
public class PingMain extends ProcessMainClass {
|
||||
private static final Logger log = LoggerFactory.getLogger(PingMain.class);
|
||||
|
||||
private final PingJobScheduler pingJobScheduler;
|
||||
private final ServiceRegistryIf serviceRegistry;
|
||||
private final NodeConfigurationService nodeConfigurationService;
|
||||
private final int node;
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(PingMain.class);
|
||||
|
||||
@Inject
|
||||
public PingMain(MessageQueueFactory messageQueueFactory,
|
||||
ProcessConfiguration config,
|
||||
Gson gson,
|
||||
PingJobScheduler pingJobScheduler,
|
||||
ServiceRegistryIf serviceRegistry,
|
||||
NodeConfigurationService nodeConfigurationService,
|
||||
ProcessConfiguration processConfiguration
|
||||
) {
|
||||
super(messageQueueFactory, config, gson, ProcessInboxNames.PING_INBOX);
|
||||
|
||||
this.pingJobScheduler = pingJobScheduler;
|
||||
this.serviceRegistry = serviceRegistry;
|
||||
this.nodeConfigurationService = nodeConfigurationService;
|
||||
this.node = processConfiguration.node();
|
||||
}
|
||||
|
||||
public void runPrimary() {
|
||||
log.info("Starting PingMain...");
|
||||
|
||||
// Start the ping job scheduler
|
||||
pingJobScheduler.start(true);
|
||||
|
||||
// Watch the crawler process to suspend/resume the ping job scheduler
|
||||
try {
|
||||
serviceRegistry.watchProcess("crawler", node, (running) -> {
|
||||
if (running) {
|
||||
log.info("Crawler process is running, suspending ping job scheduler.");
|
||||
pingJobScheduler.pause(node);
|
||||
} else {
|
||||
log.warn("Crawler process is not running, resuming ping job scheduler.");
|
||||
pingJobScheduler.resume(node);
|
||||
}
|
||||
});
|
||||
}
|
||||
catch (Exception e) {
|
||||
throw new RuntimeException("Failed to watch crawler process", e);
|
||||
}
|
||||
|
||||
log.info("PingMain started successfully.");
|
||||
}
|
||||
|
||||
|
||||
public void runSecondary() {
|
||||
log.info("Starting PingMain...");
|
||||
|
||||
List<Integer> crawlerNodes = nodeConfigurationService.getAll()
|
||||
.stream()
|
||||
.filter(node -> !node.disabled())
|
||||
.filter(node -> node.profile().permitBatchCrawl())
|
||||
.map(NodeConfiguration::node)
|
||||
.toList()
|
||||
;
|
||||
|
||||
// Start the ping job scheduler
|
||||
pingJobScheduler.start(true);
|
||||
|
||||
// Watch the crawler process to suspend/resume the ping job scheduler
|
||||
try {
|
||||
serviceRegistry.watchProcessAnyNode("crawler", crawlerNodes, (running, n) -> {
|
||||
if (running) {
|
||||
log.info("Crawler process is running on node {} taking over ", n);
|
||||
pingJobScheduler.resume(n);
|
||||
} else {
|
||||
log.warn("Crawler process stopped, resuming ping job scheduler.");
|
||||
pingJobScheduler.pause(n);
|
||||
}
|
||||
});
|
||||
}
|
||||
catch (Exception e) {
|
||||
throw new RuntimeException("Failed to watch crawler process", e);
|
||||
}
|
||||
|
||||
log.info("PingMain started successfully.");
|
||||
}
|
||||
|
||||
public static void main(String... args) throws Exception {
|
||||
// Prevent Java from caching DNS lookups forever (filling up the system RAM as a result)
|
||||
Security.setProperty("networkaddress.cache.ttl" , "3600");
|
||||
|
||||
// This must run *early*
|
||||
System.setProperty("http.agent", WmsaHome.getUserAgent().uaString());
|
||||
|
||||
// If these aren't set properly, the JVM will hang forever on some requests
|
||||
System.setProperty("sun.net.client.defaultConnectTimeout", "30000");
|
||||
System.setProperty("sun.net.client.defaultReadTimeout", "30000");
|
||||
|
||||
// Set the maximum number of connections to keep alive in the connection pool
|
||||
System.setProperty("jdk.httpclient.idleTimeout", "15"); // 15 seconds
|
||||
System.setProperty("jdk.httpclient.connectionPoolSize", "256");
|
||||
|
||||
// We don't want to use too much memory caching sessions for https
|
||||
System.setProperty("javax.net.ssl.sessionCacheSize", "2048");
|
||||
|
||||
|
||||
Injector injector = Guice.createInjector(
|
||||
new PingModule(),
|
||||
new ServiceDiscoveryModule(),
|
||||
new ProcessConfigurationModule("ping"),
|
||||
new DatabaseModule(false)
|
||||
);
|
||||
|
||||
GeoIpDictionary geoIpDictionary = injector.getInstance(GeoIpDictionary.class);
|
||||
|
||||
geoIpDictionary.waitReady(); // Ensure the GeoIpDictionary is ready before proceeding
|
||||
|
||||
PingMain main = injector.getInstance(PingMain.class);
|
||||
|
||||
var instructions = main.fetchInstructions(PingRequest.class);
|
||||
|
||||
try {
|
||||
switch (instructions.value().runClass) {
|
||||
case "primary":
|
||||
log.info("Running as primary node");
|
||||
main.runPrimary();
|
||||
break;
|
||||
case "secondary":
|
||||
log.info("Running as secondary node");
|
||||
main.runSecondary();
|
||||
break;
|
||||
default:
|
||||
throw new IllegalArgumentException("Invalid runClass: " + instructions.value().runClass);
|
||||
}
|
||||
for(;;);
|
||||
}
|
||||
catch (Throwable ex) {
|
||||
logger.error("Error running ping process", ex);
|
||||
instructions.err();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@@ -0,0 +1,82 @@
|
||||
package nu.marginalia.ping;
|
||||
|
||||
import com.google.inject.AbstractModule;
|
||||
import com.google.inject.Provides;
|
||||
import com.google.inject.name.Named;
|
||||
import nu.marginalia.ping.io.HttpClientProvider;
|
||||
import nu.marginalia.ping.model.ErrorClassification;
|
||||
import org.apache.hc.client5.http.classic.HttpClient;
|
||||
|
||||
import java.security.NoSuchAlgorithmException;
|
||||
import java.time.Duration;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
public class PingModule extends AbstractModule {
|
||||
|
||||
public PingModule() throws NoSuchAlgorithmException {
|
||||
}
|
||||
|
||||
public static PingIntervalsConfiguration createPingIntervalsConfiguration() {
|
||||
Map<ErrorClassification, Duration> initialTimeouts = new HashMap<>();
|
||||
Map<ErrorClassification, Duration> maxTimeouts = new HashMap<>();
|
||||
|
||||
for (var classification : ErrorClassification.values()) {
|
||||
switch (classification) {
|
||||
case CONNECTION_ERROR -> {
|
||||
initialTimeouts.put(classification, Duration.ofMinutes(15));
|
||||
maxTimeouts.put(classification, Duration.ofDays(1));
|
||||
}
|
||||
case HTTP_CLIENT_ERROR -> {
|
||||
initialTimeouts.put(classification, Duration.ofMinutes(15));
|
||||
maxTimeouts.put(classification, Duration.ofDays(1));
|
||||
}
|
||||
case HTTP_SERVER_ERROR -> {
|
||||
initialTimeouts.put(classification, Duration.ofMinutes(8));
|
||||
maxTimeouts.put(classification, Duration.ofHours(6));
|
||||
}
|
||||
case SSL_ERROR -> {
|
||||
initialTimeouts.put(classification, Duration.ofMinutes(45));
|
||||
maxTimeouts.put(classification, Duration.ofDays(1));
|
||||
}
|
||||
case DNS_ERROR -> {
|
||||
initialTimeouts.put(classification, Duration.ofMinutes(60));
|
||||
maxTimeouts.put(classification, Duration.ofDays(7));
|
||||
}
|
||||
case TIMEOUT -> {
|
||||
initialTimeouts.put(classification, Duration.ofMinutes(5));
|
||||
maxTimeouts.put(classification, Duration.ofHours(6));
|
||||
}
|
||||
case UNKNOWN -> {
|
||||
initialTimeouts.put(classification, Duration.ofMinutes(30));
|
||||
maxTimeouts.put(classification, Duration.ofDays(1));
|
||||
}
|
||||
case NONE -> {
|
||||
initialTimeouts.put(classification, Duration.ofHours(6));
|
||||
maxTimeouts.put(classification, Duration.ofDays(6));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return new PingIntervalsConfiguration(
|
||||
Duration.ofHours(3),
|
||||
initialTimeouts,
|
||||
maxTimeouts
|
||||
);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void configure() {
|
||||
bind(HttpClient.class).toProvider(HttpClientProvider.class);
|
||||
|
||||
bind(PingIntervalsConfiguration.class).toInstance(createPingIntervalsConfiguration());
|
||||
}
|
||||
|
||||
@Provides
|
||||
@Named("ping.nameservers")
|
||||
public List<String> providePingNameservers() {
|
||||
// Google's public DNS servers currently have the best rate limiting
|
||||
return List.of("8.8.8.8", "8.8.4.4");
|
||||
}
|
||||
}
|
@@ -0,0 +1,95 @@
|
||||
package nu.marginalia.ping.fetcher;
|
||||
|
||||
import com.google.inject.Inject;
|
||||
import com.google.inject.name.Named;
|
||||
import nu.marginalia.ping.model.SingleDnsRecord;
|
||||
import org.xbill.DNS.ExtendedResolver;
|
||||
import org.xbill.DNS.Lookup;
|
||||
import org.xbill.DNS.TextParseException;
|
||||
import org.xbill.DNS.Type;
|
||||
|
||||
import java.net.UnknownHostException;
|
||||
import java.time.Duration;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.*;
|
||||
|
||||
public class PingDnsFetcher {
|
||||
private final ThreadLocal<ExtendedResolver> resolver;
|
||||
private static final ExecutorService digExecutor = Executors.newFixedThreadPool(100);
|
||||
|
||||
private static final int[] RECORD_TYPES = {
|
||||
Type.A, Type.AAAA, Type.NS, Type.MX, Type.TXT,
|
||||
Type.SOA, Type.CNAME, Type.CAA, Type.SPF
|
||||
};
|
||||
|
||||
@Inject
|
||||
public PingDnsFetcher(@Named("ping.nameservers")
|
||||
List<String> nameservers) throws UnknownHostException
|
||||
{
|
||||
resolver = ThreadLocal.withInitial(() -> createResolver(nameservers));
|
||||
}
|
||||
|
||||
private ExtendedResolver createResolver(List<String> nameservers) {
|
||||
try {
|
||||
ExtendedResolver r = new ExtendedResolver(
|
||||
nameservers.toArray(new String[0])
|
||||
);
|
||||
r.setLoadBalance(true);
|
||||
r.setTimeout(Duration.ofSeconds(5));
|
||||
return r;
|
||||
}
|
||||
catch (UnknownHostException e) {
|
||||
throw new RuntimeException("Failed to create DNS resolver", e);
|
||||
}
|
||||
}
|
||||
|
||||
private List<SingleDnsRecord> query(String domainName, int recordType) throws TextParseException {
|
||||
var resolver = this.resolver.get();
|
||||
var query = new Lookup(domainName, recordType);
|
||||
query.setResolver(resolver);
|
||||
|
||||
var result = query.run();
|
||||
|
||||
if (result == null || result.length == 0) {
|
||||
return List.of();
|
||||
}
|
||||
|
||||
List<SingleDnsRecord> records = new ArrayList<>(result.length);
|
||||
|
||||
for (var record : result) {
|
||||
if (record == null) continue;
|
||||
records.add(new SingleDnsRecord(
|
||||
Type.string(recordType),
|
||||
record.toString())
|
||||
);
|
||||
|
||||
}
|
||||
|
||||
return records;
|
||||
}
|
||||
|
||||
public List<SingleDnsRecord> dig(String domainName) {
|
||||
List<Callable<List<SingleDnsRecord>>> tasks = new ArrayList<>(RECORD_TYPES.length);
|
||||
for (var recordType : RECORD_TYPES) {
|
||||
tasks.add(() -> query(domainName, recordType));
|
||||
}
|
||||
List<SingleDnsRecord> results = new ArrayList<>(RECORD_TYPES.length);
|
||||
try {
|
||||
List<Future<List<SingleDnsRecord>>> futures = digExecutor.invokeAll(tasks);
|
||||
for (Future<List<SingleDnsRecord>> future : futures) {
|
||||
try {
|
||||
results.addAll(future.get(1, TimeUnit.MINUTES));
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
System.err.println("Error fetching DNS records: " + e.getMessage());
|
||||
}
|
||||
}
|
||||
} catch (InterruptedException e) {
|
||||
Thread.currentThread().interrupt();
|
||||
System.err.println("DNS query interrupted: " + e.getMessage());
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
}
|
@@ -0,0 +1,90 @@
|
||||
package nu.marginalia.ping.fetcher;
|
||||
|
||||
import com.google.inject.Inject;
|
||||
import nu.marginalia.UserAgent;
|
||||
import nu.marginalia.WmsaHome;
|
||||
import nu.marginalia.ping.fetcher.response.*;
|
||||
import org.apache.hc.client5.http.classic.HttpClient;
|
||||
import org.apache.hc.client5.http.protocol.HttpClientContext;
|
||||
import org.apache.hc.core5.http.Header;
|
||||
import org.apache.hc.core5.http.io.entity.EntityUtils;
|
||||
import org.apache.hc.core5.http.io.support.ClassicRequestBuilder;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.SocketTimeoutException;
|
||||
import java.time.Duration;
|
||||
import java.time.Instant;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
public class PingHttpFetcher {
|
||||
private final UserAgent userAgent = WmsaHome.getUserAgent();
|
||||
private final HttpClient client;
|
||||
|
||||
@Inject
|
||||
public PingHttpFetcher(HttpClient client) {
|
||||
this.client = client;
|
||||
}
|
||||
|
||||
public PingRequestResponse fetchUrl(String url, Method method, String etag, String lastModified) {
|
||||
|
||||
var builder = ClassicRequestBuilder.create(method.name())
|
||||
.setUri(url)
|
||||
.addHeader("Accept", "text/*, */*;q=0.9")
|
||||
.addHeader("User-Agent", userAgent.uaString())
|
||||
.addHeader("Accept-Encoding", "gzip");
|
||||
if (etag != null) {
|
||||
builder.addHeader("If-None-Match", etag);
|
||||
}
|
||||
if (lastModified != null) {
|
||||
builder.addHeader("If-Modified-Since", lastModified);
|
||||
}
|
||||
|
||||
var req = builder.build();
|
||||
|
||||
HttpClientContext context = HttpClientContext.create();
|
||||
try {
|
||||
Instant start = Instant.now();
|
||||
return client.execute(req, context, (rsp) -> {
|
||||
|
||||
var entity = rsp.getEntity();
|
||||
|
||||
try {
|
||||
|
||||
Header[] rawHeaders = rsp.getHeaders();
|
||||
Map<String, List<String>> headers = new HashMap<>(rawHeaders.length);
|
||||
for (Header header : rawHeaders) {
|
||||
headers.computeIfAbsent(header.getName(), k -> new ArrayList<>())
|
||||
.add(header.getValue());
|
||||
}
|
||||
|
||||
if (method == Method.GET && entity == null) {
|
||||
return new ProtocolError("GET request returned no content");
|
||||
}
|
||||
|
||||
byte[] body = entity != null ? EntityUtils.toByteArray(entity) : null;
|
||||
|
||||
Duration responseTime = Duration.between(start, Instant.now());
|
||||
|
||||
return PingRequestResponse.of(
|
||||
rsp.getVersion(),
|
||||
rsp.getCode(),
|
||||
body,
|
||||
headers,
|
||||
responseTime,
|
||||
context.getSSLSession()
|
||||
);
|
||||
} finally {
|
||||
EntityUtils.consume(entity);
|
||||
}
|
||||
});
|
||||
} catch (SocketTimeoutException ex) {
|
||||
return new TimeoutResponse(ex.getMessage());
|
||||
} catch (IOException e) {
|
||||
return new ConnectionError(e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@@ -0,0 +1,4 @@
|
||||
package nu.marginalia.ping.fetcher.response;
|
||||
|
||||
public record ConnectionError(String errorMessage) implements PingRequestResponse {
|
||||
}
|
@@ -0,0 +1,18 @@
|
||||
package nu.marginalia.ping.fetcher.response;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
public record Headers(Map<String, List<String>> headers) {
|
||||
public List<String> get(String name) {
|
||||
return headers.getOrDefault(name, List.of());
|
||||
}
|
||||
|
||||
public String getFirst(String name) {
|
||||
return headers.getOrDefault(name, List.of()).stream().findFirst().orElse(null);
|
||||
}
|
||||
|
||||
public boolean contains(String name) {
|
||||
return headers.containsKey(name);
|
||||
}
|
||||
}
|
@@ -0,0 +1,12 @@
|
||||
package nu.marginalia.ping.fetcher.response;
|
||||
|
||||
import java.time.Duration;
|
||||
|
||||
public record HttpResponse(
|
||||
String version,
|
||||
int httpStatus,
|
||||
byte[] body,
|
||||
Headers headers,
|
||||
Duration httpResponseTime
|
||||
) implements PingRequestResponse {
|
||||
}
|
@@ -0,0 +1,15 @@
|
||||
package nu.marginalia.ping.fetcher.response;
|
||||
|
||||
import java.security.cert.Certificate;
|
||||
import java.time.Duration;
|
||||
|
||||
public record HttpsResponse(
|
||||
String version,
|
||||
int httpStatus,
|
||||
byte[] body,
|
||||
Headers headers,
|
||||
Certificate[] sslCertificates,
|
||||
SslMetadata sslMetadata,
|
||||
Duration httpResponseTime
|
||||
) implements PingRequestResponse {
|
||||
}
|
@@ -0,0 +1,5 @@
|
||||
package nu.marginalia.ping.fetcher.response;
|
||||
|
||||
public enum Method {
|
||||
GET, HEAD
|
||||
}
|
@@ -0,0 +1,22 @@
|
||||
package nu.marginalia.ping.fetcher.response;
|
||||
|
||||
import org.apache.hc.core5.http.ProtocolVersion;
|
||||
|
||||
import javax.net.ssl.SSLPeerUnverifiedException;
|
||||
import javax.net.ssl.SSLSession;
|
||||
import java.time.Duration;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
public sealed interface PingRequestResponse
|
||||
permits HttpResponse, HttpsResponse, TimeoutResponse, ConnectionError, ProtocolError, UnknownHostError {
|
||||
static PingRequestResponse of(ProtocolVersion version, int httpStatus, byte[] body, Map<String, List<String>> headers, Duration time, SSLSession sslSession) throws SSLPeerUnverifiedException {
|
||||
|
||||
if (sslSession == null) {
|
||||
return new HttpResponse(version.toString(), httpStatus, body, new Headers(headers), time);
|
||||
} else {
|
||||
return new HttpsResponse(version.toString(), httpStatus, body, new Headers(headers), sslSession.getPeerCertificates(), new SslMetadata(sslSession), time);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@@ -0,0 +1,4 @@
|
||||
package nu.marginalia.ping.fetcher.response;
|
||||
|
||||
public record ProtocolError(String errorMessage) implements PingRequestResponse {
|
||||
}
|
@@ -0,0 +1,14 @@
|
||||
package nu.marginalia.ping.fetcher.response;
|
||||
|
||||
import javax.net.ssl.SSLSession;
|
||||
|
||||
public record SslMetadata(
|
||||
String cipherSuite,
|
||||
String protocol) {
|
||||
public SslMetadata(SSLSession session) {
|
||||
this(
|
||||
session.getCipherSuite(),
|
||||
session.getProtocol()
|
||||
);
|
||||
}
|
||||
}
|
@@ -0,0 +1,4 @@
|
||||
package nu.marginalia.ping.fetcher.response;
|
||||
|
||||
public record TimeoutResponse(String errorMessage) implements PingRequestResponse {
|
||||
}
|
@@ -0,0 +1,4 @@
|
||||
package nu.marginalia.ping.fetcher.response;
|
||||
|
||||
public record UnknownHostError() implements PingRequestResponse {
|
||||
}
|
@@ -0,0 +1,129 @@
|
||||
package nu.marginalia.ping.io;
|
||||
|
||||
import com.google.inject.Provider;
|
||||
import org.apache.hc.client5.http.ConnectionKeepAliveStrategy;
|
||||
import org.apache.hc.client5.http.classic.HttpClient;
|
||||
import org.apache.hc.client5.http.config.ConnectionConfig;
|
||||
import org.apache.hc.client5.http.config.RequestConfig;
|
||||
import org.apache.hc.client5.http.cookie.StandardCookieSpec;
|
||||
import org.apache.hc.client5.http.impl.classic.CloseableHttpClient;
|
||||
import org.apache.hc.client5.http.impl.classic.HttpClients;
|
||||
import org.apache.hc.client5.http.impl.io.PoolingHttpClientConnectionManager;
|
||||
import org.apache.hc.client5.http.impl.io.PoolingHttpClientConnectionManagerBuilder;
|
||||
import org.apache.hc.client5.http.ssl.DefaultClientTlsStrategy;
|
||||
import org.apache.hc.client5.http.ssl.NoopHostnameVerifier;
|
||||
import org.apache.hc.core5.http.HeaderElement;
|
||||
import org.apache.hc.core5.http.HeaderElements;
|
||||
import org.apache.hc.core5.http.HttpResponse;
|
||||
import org.apache.hc.core5.http.io.SocketConfig;
|
||||
import org.apache.hc.core5.http.message.MessageSupport;
|
||||
import org.apache.hc.core5.http.protocol.HttpContext;
|
||||
import org.apache.hc.core5.util.TimeValue;
|
||||
import org.apache.hc.core5.util.Timeout;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import javax.net.ssl.SSLContext;
|
||||
import java.security.NoSuchAlgorithmException;
|
||||
import java.util.Iterator;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
public class HttpClientProvider implements Provider<HttpClient> {
|
||||
private static final HttpClient client;
|
||||
private static PoolingHttpClientConnectionManager connectionManager;
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(HttpClientProvider.class);
|
||||
|
||||
static {
|
||||
try {
|
||||
client = createClient();
|
||||
} catch (NoSuchAlgorithmException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
private static CloseableHttpClient createClient() throws NoSuchAlgorithmException {
|
||||
final ConnectionConfig connectionConfig = ConnectionConfig.custom()
|
||||
.setSocketTimeout(30, TimeUnit.SECONDS)
|
||||
.setConnectTimeout(30, TimeUnit.SECONDS)
|
||||
.setValidateAfterInactivity(TimeValue.ofSeconds(5))
|
||||
.build();
|
||||
|
||||
connectionManager = PoolingHttpClientConnectionManagerBuilder.create()
|
||||
.setMaxConnPerRoute(2)
|
||||
.setMaxConnTotal(5000)
|
||||
.setDefaultConnectionConfig(connectionConfig)
|
||||
.setTlsSocketStrategy(
|
||||
new DefaultClientTlsStrategy(SSLContext.getDefault(), NoopHostnameVerifier.INSTANCE))
|
||||
.build();
|
||||
|
||||
connectionManager.setDefaultSocketConfig(SocketConfig.custom()
|
||||
.setSoLinger(TimeValue.ofSeconds(-1))
|
||||
.setSoTimeout(Timeout.ofSeconds(10))
|
||||
.build()
|
||||
);
|
||||
|
||||
Thread.ofPlatform().daemon(true).start(() -> {
|
||||
try {
|
||||
for (;;) {
|
||||
TimeUnit.SECONDS.sleep(15);
|
||||
logger.info("Connection pool stats: {}", connectionManager.getTotalStats());
|
||||
}
|
||||
}
|
||||
catch (InterruptedException e) {
|
||||
Thread.currentThread().interrupt();
|
||||
}
|
||||
});
|
||||
|
||||
final RequestConfig defaultRequestConfig = RequestConfig.custom()
|
||||
.setCookieSpec(StandardCookieSpec.RELAXED)
|
||||
.setResponseTimeout(10, TimeUnit.SECONDS)
|
||||
.setConnectionRequestTimeout(5, TimeUnit.MINUTES)
|
||||
.build();
|
||||
|
||||
return HttpClients.custom()
|
||||
.setConnectionManager(connectionManager)
|
||||
.setRetryStrategy(new RetryStrategy())
|
||||
.setKeepAliveStrategy(new ConnectionKeepAliveStrategy() {
|
||||
// Default keep-alive duration is 3 minutes, but this is too long for us,
|
||||
// as we are either going to re-use it fairly quickly or close it for a long time.
|
||||
//
|
||||
// So we set it to 30 seconds or clamp the server-provided value to a minimum of 10 seconds.
|
||||
private static final TimeValue defaultValue = TimeValue.ofSeconds(30);
|
||||
|
||||
@Override
|
||||
public TimeValue getKeepAliveDuration(HttpResponse response, HttpContext context) {
|
||||
final Iterator<HeaderElement> it = MessageSupport.iterate(response, HeaderElements.KEEP_ALIVE);
|
||||
|
||||
while (it.hasNext()) {
|
||||
final HeaderElement he = it.next();
|
||||
final String param = he.getName();
|
||||
final String value = he.getValue();
|
||||
|
||||
if (value == null)
|
||||
continue;
|
||||
if (!"timeout".equalsIgnoreCase(param))
|
||||
continue;
|
||||
|
||||
try {
|
||||
long timeout = Long.parseLong(value);
|
||||
timeout = Math.clamp(timeout, 30, defaultValue.toSeconds());
|
||||
return TimeValue.ofSeconds(timeout);
|
||||
} catch (final NumberFormatException ignore) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return defaultValue;
|
||||
}
|
||||
})
|
||||
.disableRedirectHandling()
|
||||
.setDefaultRequestConfig(defaultRequestConfig)
|
||||
.build();
|
||||
}
|
||||
|
||||
@Override
|
||||
public HttpClient get() {
|
||||
return client;
|
||||
}
|
||||
}
|
||||
|
@@ -0,0 +1,70 @@
|
||||
package nu.marginalia.ping.io;
|
||||
|
||||
import org.apache.hc.client5.http.HttpRequestRetryStrategy;
|
||||
import org.apache.hc.core5.http.HttpRequest;
|
||||
import org.apache.hc.core5.http.HttpResponse;
|
||||
import org.apache.hc.core5.http.protocol.HttpContext;
|
||||
import org.apache.hc.core5.util.TimeValue;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import javax.net.ssl.SSLException;
|
||||
import java.io.IOException;
|
||||
import java.net.SocketTimeoutException;
|
||||
import java.net.UnknownHostException;
|
||||
|
||||
public class RetryStrategy implements HttpRequestRetryStrategy {
|
||||
private static final Logger logger = LoggerFactory.getLogger(RetryStrategy.class);
|
||||
|
||||
@Override
|
||||
public boolean retryRequest(HttpRequest request, IOException exception, int executionCount, HttpContext context) {
|
||||
return switch (exception) {
|
||||
case SocketTimeoutException ste -> false;
|
||||
case SSLException ssle -> false;
|
||||
case UnknownHostException uhe -> false;
|
||||
default -> executionCount <= 3;
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean retryRequest(HttpResponse response, int executionCount, HttpContext context) {
|
||||
return switch (response.getCode()) {
|
||||
case 500, 503 -> executionCount <= 2;
|
||||
case 429 -> executionCount <= 3;
|
||||
default -> false;
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public TimeValue getRetryInterval(HttpRequest request, IOException exception, int executionCount, HttpContext context) {
|
||||
return TimeValue.ofSeconds(1);
|
||||
}
|
||||
|
||||
@Override
|
||||
public TimeValue getRetryInterval(HttpResponse response, int executionCount, HttpContext context) {
|
||||
|
||||
int statusCode = response.getCode();
|
||||
|
||||
// Give 503 a bit more time
|
||||
if (statusCode == 503) return TimeValue.ofSeconds(5);
|
||||
|
||||
if (statusCode == 429) {
|
||||
// get the Retry-After header
|
||||
String retryAfter = response.getFirstHeader("Retry-After").getValue();
|
||||
if (retryAfter == null) {
|
||||
return TimeValue.ofSeconds(2);
|
||||
}
|
||||
|
||||
try {
|
||||
int retryAfterTime = Integer.parseInt(retryAfter);
|
||||
retryAfterTime = Math.clamp(retryAfterTime, 1, 5);
|
||||
|
||||
return TimeValue.ofSeconds(retryAfterTime);
|
||||
} catch (NumberFormatException e) {
|
||||
logger.warn("Invalid Retry-After header: {}", retryAfter);
|
||||
}
|
||||
}
|
||||
|
||||
return TimeValue.ofSeconds(2);
|
||||
}
|
||||
}
|
@@ -0,0 +1,29 @@
|
||||
package nu.marginalia.ping.model;
|
||||
|
||||
public enum AvailabilityOutageType {
|
||||
NONE,
|
||||
TIMEOUT,
|
||||
SSL_ERROR,
|
||||
DNS_ERROR,
|
||||
CONNECTION_ERROR,
|
||||
HTTP_CLIENT_ERROR,
|
||||
HTTP_SERVER_ERROR,
|
||||
UNKNOWN;
|
||||
|
||||
public static AvailabilityOutageType fromErrorClassification(ErrorClassification errorClassification) {
|
||||
if (null == errorClassification) {
|
||||
return UNKNOWN;
|
||||
}
|
||||
|
||||
return switch (errorClassification) {
|
||||
case NONE -> NONE;
|
||||
case TIMEOUT -> TIMEOUT;
|
||||
case SSL_ERROR -> SSL_ERROR;
|
||||
case DNS_ERROR -> DNS_ERROR;
|
||||
case CONNECTION_ERROR -> CONNECTION_ERROR;
|
||||
case HTTP_CLIENT_ERROR -> HTTP_CLIENT_ERROR;
|
||||
case HTTP_SERVER_ERROR -> HTTP_SERVER_ERROR;
|
||||
case UNKNOWN -> UNKNOWN;
|
||||
};
|
||||
}
|
||||
}
|
@@ -0,0 +1,49 @@
|
||||
package nu.marginalia.ping.model;
|
||||
|
||||
import java.sql.Connection;
|
||||
import java.sql.SQLException;
|
||||
import java.time.Instant;
|
||||
|
||||
public record DomainAvailabilityEvent(
|
||||
int domainId,
|
||||
int nodeId,
|
||||
boolean available,
|
||||
AvailabilityOutageType outageType, // e.g., 'TIMEOUT', 'DNS_ERROR', etc.
|
||||
Integer httpStatusCode, // Nullable, as it may not always be applicable
|
||||
String errorMessage, // Specific error details
|
||||
Instant tsUpdate // Timestamp of the last update
|
||||
) implements WritableModel {
|
||||
|
||||
@Override
|
||||
public void write(Connection conn) throws SQLException {
|
||||
try (var ps = conn.prepareStatement("""
|
||||
INSERT INTO DOMAIN_AVAILABILITY_EVENTS (
|
||||
domain_id,
|
||||
node_id,
|
||||
available,
|
||||
outage_type,
|
||||
http_status_code,
|
||||
error_message,
|
||||
ts_change
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?)
|
||||
"""))
|
||||
{
|
||||
ps.setInt(1, domainId());
|
||||
ps.setInt(2, nodeId());
|
||||
ps.setBoolean(3, available());
|
||||
ps.setString(4, outageType().name());
|
||||
if (httpStatusCode() == null) {
|
||||
ps.setNull(5, java.sql.Types.INTEGER);
|
||||
} else {
|
||||
ps.setInt(5, httpStatusCode());
|
||||
}
|
||||
if (errorMessage() == null) {
|
||||
ps.setNull(6, java.sql.Types.VARCHAR);
|
||||
} else {
|
||||
ps.setString(6, errorMessage());
|
||||
}
|
||||
ps.setTimestamp(7, java.sql.Timestamp.from(tsUpdate()));
|
||||
ps.executeUpdate();
|
||||
}
|
||||
}
|
||||
}
|
@@ -0,0 +1,356 @@
|
||||
package nu.marginalia.ping.model;
|
||||
|
||||
import javax.annotation.Nullable;
|
||||
import java.sql.Connection;
|
||||
import java.sql.ResultSet;
|
||||
import java.sql.SQLException;
|
||||
import java.time.Duration;
|
||||
import java.time.Instant;
|
||||
|
||||
public record DomainAvailabilityRecord(
|
||||
int domainId,
|
||||
int nodeId,
|
||||
boolean serverAvailable,
|
||||
@Nullable byte[] serverIp,
|
||||
@Nullable Integer asn,
|
||||
@Nullable Long dataHash,
|
||||
@Nullable Long securityConfigHash,
|
||||
@Nullable HttpSchema httpSchema,
|
||||
@Nullable String httpEtag,
|
||||
@Nullable String httpLastModified,
|
||||
@Nullable Integer httpStatus,
|
||||
@Nullable String httpLocation,
|
||||
@Nullable Duration httpResponseTime,
|
||||
@Nullable ErrorClassification errorClassification,
|
||||
@Nullable String errorMessage,
|
||||
|
||||
@Nullable Instant tsLastPing,
|
||||
@Nullable Instant tsLastAvailable,
|
||||
@Nullable Instant tsLastError,
|
||||
|
||||
Instant nextScheduledUpdate,
|
||||
int backoffConsecutiveFailures,
|
||||
Duration backoffFetchInterval
|
||||
)
|
||||
implements WritableModel
|
||||
{
|
||||
public DomainAvailabilityRecord(ResultSet rs) throws SQLException {
|
||||
this(
|
||||
rs.getInt("DOMAIN_AVAILABILITY_INFORMATION.DOMAIN_ID"),
|
||||
rs.getInt("DOMAIN_AVAILABILITY_INFORMATION.NODE_ID"),
|
||||
rs.getBoolean("DOMAIN_AVAILABILITY_INFORMATION.SERVER_AVAILABLE"),
|
||||
rs.getBytes("DOMAIN_AVAILABILITY_INFORMATION.SERVER_IP"),
|
||||
rs.getObject("DOMAIN_AVAILABILITY_INFORMATION.SERVER_IP_ASN", Integer.class),
|
||||
rs.getObject("DOMAIN_AVAILABILITY_INFORMATION.DATA_HASH", Long.class),
|
||||
rs.getObject("DOMAIN_AVAILABILITY_INFORMATION.SECURITY_CONFIG_HASH", Long.class),
|
||||
httpSchemaFromString(rs.getObject("DOMAIN_AVAILABILITY_INFORMATION.HTTP_SCHEMA", String.class)),
|
||||
rs.getString("DOMAIN_AVAILABILITY_INFORMATION.HTTP_ETAG"),
|
||||
rs.getString("DOMAIN_AVAILABILITY_INFORMATION.HTTP_LAST_MODIFIED"),
|
||||
rs.getObject("DOMAIN_AVAILABILITY_INFORMATION.HTTP_STATUS", Integer.class),
|
||||
rs.getString("DOMAIN_AVAILABILITY_INFORMATION.HTTP_LOCATION"),
|
||||
durationFromMillis(rs.getObject("DOMAIN_AVAILABILITY_INFORMATION.HTTP_RESPONSE_TIME_MS", Integer.class)),
|
||||
errorClassificationFromString(rs.getObject("DOMAIN_AVAILABILITY_INFORMATION.ERROR_CLASSIFICATION", String.class)),
|
||||
rs.getString("DOMAIN_AVAILABILITY_INFORMATION.ERROR_MESSAGE"),
|
||||
rs.getObject("DOMAIN_AVAILABILITY_INFORMATION.TS_LAST_PING", Instant.class),
|
||||
rs.getObject("DOMAIN_AVAILABILITY_INFORMATION.TS_LAST_AVAILABLE", Instant.class),
|
||||
rs.getObject("DOMAIN_AVAILABILITY_INFORMATION.TS_LAST_ERROR", Instant.class),
|
||||
rs.getObject("DOMAIN_AVAILABILITY_INFORMATION.NEXT_SCHEDULED_UPDATE", Instant.class),
|
||||
rs.getInt("DOMAIN_AVAILABILITY_INFORMATION.BACKOFF_CONSECUTIVE_FAILURES"),
|
||||
Duration.ofSeconds(rs.getInt("DOMAIN_AVAILABILITY_INFORMATION.BACKOFF_FETCH_INTERVAL"))
|
||||
);
|
||||
}
|
||||
|
||||
private static HttpSchema httpSchemaFromString(@Nullable String schema) {
|
||||
return schema == null ? null : HttpSchema.valueOf(schema);
|
||||
}
|
||||
private static ErrorClassification errorClassificationFromString(@Nullable String classification) {
|
||||
return classification == null ? null : ErrorClassification.valueOf(classification);
|
||||
}
|
||||
private static Duration durationFromMillis(@Nullable Integer millis) {
|
||||
return millis == null ? null : Duration.ofMillis(millis);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void write(Connection connection) throws SQLException {
|
||||
try (var ps = connection.prepareStatement(
|
||||
"""
|
||||
REPLACE INTO DOMAIN_AVAILABILITY_INFORMATION (
|
||||
domain_id,
|
||||
node_id,
|
||||
server_available,
|
||||
server_ip,
|
||||
data_hash,
|
||||
security_config_hash,
|
||||
http_schema,
|
||||
http_etag,
|
||||
http_last_modified,
|
||||
http_status,
|
||||
http_location,
|
||||
http_response_time_ms,
|
||||
error_classification,
|
||||
error_message,
|
||||
ts_last_ping,
|
||||
ts_last_available,
|
||||
ts_last_error,
|
||||
next_scheduled_update,
|
||||
backoff_consecutive_failures,
|
||||
backoff_fetch_interval,
|
||||
server_ip_asn)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?,?)
|
||||
""")) {
|
||||
|
||||
ps.setInt(1, domainId());
|
||||
ps.setInt(2, nodeId());
|
||||
ps.setBoolean(3, serverAvailable());
|
||||
if (serverIp() == null) {
|
||||
ps.setNull(4, java.sql.Types.BINARY);
|
||||
} else {
|
||||
ps.setBytes(4, serverIp());
|
||||
}
|
||||
if (dataHash() == null) {
|
||||
ps.setNull(5, java.sql.Types.BIGINT);
|
||||
} else {
|
||||
ps.setLong(5, dataHash());
|
||||
}
|
||||
if (securityConfigHash() == null) {
|
||||
ps.setNull(6, java.sql.Types.BIGINT);
|
||||
} else {
|
||||
ps.setLong(6, securityConfigHash());
|
||||
}
|
||||
if (httpSchema() == null) {
|
||||
ps.setNull(7, java.sql.Types.VARCHAR);
|
||||
} else {
|
||||
ps.setString(7, httpSchema().name());
|
||||
}
|
||||
if (httpEtag() == null) {
|
||||
ps.setNull(8, java.sql.Types.VARCHAR);
|
||||
} else {
|
||||
ps.setString(8, httpEtag());
|
||||
}
|
||||
if (httpLastModified() == null) {
|
||||
ps.setNull(9, java.sql.Types.VARCHAR);
|
||||
} else {
|
||||
ps.setString(9, httpLastModified());
|
||||
}
|
||||
if (httpStatus() == null) {
|
||||
ps.setNull(10, java.sql.Types.INTEGER);
|
||||
}
|
||||
else {
|
||||
ps.setInt(10, httpStatus());
|
||||
}
|
||||
if (httpLocation() == null) {
|
||||
ps.setNull(11, java.sql.Types.VARCHAR);
|
||||
}
|
||||
else {
|
||||
ps.setString(11, httpLocation());
|
||||
}
|
||||
|
||||
if (httpResponseTime() == null) {
|
||||
ps.setNull(12, java.sql.Types.SMALLINT);
|
||||
}
|
||||
else {
|
||||
ps.setShort(12, (short) httpResponseTime().toMillis());
|
||||
}
|
||||
|
||||
if (errorClassification() == null) {
|
||||
ps.setNull(13, java.sql.Types.VARCHAR);
|
||||
}
|
||||
else {
|
||||
ps.setString(13, errorClassification().name());
|
||||
}
|
||||
|
||||
if (errorMessage() == null) {
|
||||
ps.setNull(14, java.sql.Types.VARCHAR);
|
||||
}
|
||||
else {
|
||||
ps.setString(14, errorMessage());
|
||||
}
|
||||
|
||||
ps.setTimestamp(15, java.sql.Timestamp.from(tsLastPing()));
|
||||
|
||||
if (tsLastAvailable() == null) {
|
||||
ps.setNull(16, java.sql.Types.TIMESTAMP);
|
||||
}
|
||||
else {
|
||||
ps.setTimestamp(16, java.sql.Timestamp.from(tsLastAvailable()));
|
||||
}
|
||||
if (tsLastError() == null) {
|
||||
ps.setNull(17, java.sql.Types.TIMESTAMP);
|
||||
}
|
||||
else {
|
||||
ps.setTimestamp(17, java.sql.Timestamp.from(tsLastError()));
|
||||
}
|
||||
|
||||
ps.setTimestamp(18, java.sql.Timestamp.from(nextScheduledUpdate()));
|
||||
ps.setInt(19, backoffConsecutiveFailures());
|
||||
ps.setInt(20, (int) backoffFetchInterval().getSeconds());
|
||||
|
||||
if (asn() == null) {
|
||||
ps.setNull(21, java.sql.Types.INTEGER);
|
||||
} else {
|
||||
ps.setInt(21, asn());
|
||||
}
|
||||
|
||||
ps.executeUpdate();
|
||||
}
|
||||
}
|
||||
|
||||
public static class Builder {
|
||||
private int domainId;
|
||||
private int nodeId;
|
||||
private boolean serverAvailable;
|
||||
private byte[] serverIp;
|
||||
private Integer serverIpAsn;
|
||||
private Long dataHash;
|
||||
private Long securityConfigHash;
|
||||
private HttpSchema httpSchema;
|
||||
private String httpEtag;
|
||||
private String httpLastModified;
|
||||
private Integer httpStatus;
|
||||
private String httpLocation;
|
||||
private Duration httpResponseTime;
|
||||
private ErrorClassification errorClassification;
|
||||
private String errorMessage;
|
||||
private Instant tsLastPing;
|
||||
private Instant tsLastAvailable;
|
||||
private Instant tsLastError;
|
||||
private Instant nextScheduledUpdate;
|
||||
private int backoffConsecutiveFailures;
|
||||
private Duration backoffFetchInterval;
|
||||
|
||||
public Builder domainId(int domainId) {
|
||||
this.domainId = domainId;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder nodeId(int nodeId) {
|
||||
this.nodeId = nodeId;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder serverAvailable(boolean serverAvailable) {
|
||||
this.serverAvailable = serverAvailable;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder serverIp(byte[] serverIp) {
|
||||
this.serverIp = serverIp;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder serverIpAsn(Integer asn) {
|
||||
this.serverIpAsn = asn;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder dataHash(Long dataHash) {
|
||||
this.dataHash = dataHash;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder securityConfigHash(Long securityConfigHash) {
|
||||
this.securityConfigHash = securityConfigHash;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder httpSchema(HttpSchema httpSchema) {
|
||||
this.httpSchema = httpSchema;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder httpEtag(String httpEtag) {
|
||||
this.httpEtag = httpEtag;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder httpLastModified(String httpLastModified) {
|
||||
this.httpLastModified = httpLastModified;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder httpStatus(Integer httpStatus) {
|
||||
this.httpStatus = httpStatus;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder httpLocation(String httpLocation) {
|
||||
this.httpLocation = httpLocation;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder httpResponseTime(Duration httpResponseTime) {
|
||||
this.httpResponseTime = httpResponseTime;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder errorClassification(ErrorClassification errorClassification) {
|
||||
this.errorClassification = errorClassification;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder errorMessage(String errorMessage) {
|
||||
this.errorMessage = errorMessage;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder tsLastPing(Instant tsLastPing) {
|
||||
this.tsLastPing = tsLastPing;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder tsLastAvailable(Instant tsLastAvailable) {
|
||||
this.tsLastAvailable = tsLastAvailable;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder tsLastError(Instant tsLastError) {
|
||||
this.tsLastError = tsLastError;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder nextScheduledUpdate(Instant nextScheduledUpdate) {
|
||||
this.nextScheduledUpdate = nextScheduledUpdate;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder backoffConsecutiveFailures(int backoffConsecutiveFailures) {
|
||||
this.backoffConsecutiveFailures = backoffConsecutiveFailures;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder backoffFetchInterval(Duration backoffFetchInterval) {
|
||||
this.backoffFetchInterval = backoffFetchInterval;
|
||||
return this;
|
||||
}
|
||||
|
||||
public DomainAvailabilityRecord build() {
|
||||
return new DomainAvailabilityRecord(
|
||||
domainId,
|
||||
nodeId,
|
||||
serverAvailable,
|
||||
serverIp,
|
||||
serverIpAsn,
|
||||
dataHash,
|
||||
securityConfigHash,
|
||||
httpSchema,
|
||||
httpEtag,
|
||||
httpLastModified,
|
||||
httpStatus,
|
||||
httpLocation,
|
||||
httpResponseTime,
|
||||
errorClassification,
|
||||
errorMessage,
|
||||
tsLastPing,
|
||||
tsLastAvailable,
|
||||
tsLastError,
|
||||
nextScheduledUpdate,
|
||||
backoffConsecutiveFailures,
|
||||
backoffFetchInterval
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
public static Builder builder() {
|
||||
return new Builder();
|
||||
}
|
||||
}
|
@@ -0,0 +1,364 @@
|
||||
package nu.marginalia.ping.model;
|
||||
|
||||
import com.google.gson.Gson;
|
||||
import nu.marginalia.model.gson.GsonFactory;
|
||||
|
||||
import javax.annotation.Nullable;
|
||||
import java.sql.Connection;
|
||||
import java.sql.ResultSet;
|
||||
import java.sql.SQLException;
|
||||
import java.time.Instant;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
public record DomainDnsRecord(
|
||||
Integer dnsRootDomainId,
|
||||
String rootDomainName,
|
||||
int nodeAffinity,
|
||||
@Nullable List<String> aRecords,
|
||||
@Nullable List<String> aaaaRecords,
|
||||
@Nullable String cnameRecord,
|
||||
@Nullable List<String> mxRecords,
|
||||
@Nullable List<String> caaRecords,
|
||||
@Nullable List<String> txtRecords,
|
||||
@Nullable List<String> nsRecords,
|
||||
@Nullable String soaRecord,
|
||||
Instant tsLastUpdate,
|
||||
Instant tsNextScheduledUpdate,
|
||||
int dnsCheckPriority)
|
||||
implements WritableModel
|
||||
{
|
||||
private static Gson gson = GsonFactory.get();
|
||||
|
||||
public DomainDnsRecord(ResultSet rs) throws SQLException {
|
||||
this(
|
||||
rs.getObject("DNS_ROOT_DOMAIN_ID", Integer.class),
|
||||
rs.getString("ROOT_DOMAIN_NAME"),
|
||||
rs.getInt("NODE_AFFINITY"),
|
||||
deserializeJsonArray(rs.getString("DNS_A_RECORDS")),
|
||||
deserializeJsonArray(rs.getString("DNS_AAAA_RECORDS")),
|
||||
rs.getString("DNS_CNAME_RECORD"),
|
||||
deserializeJsonArray(rs.getString("DNS_MX_RECORDS")),
|
||||
deserializeJsonArray(rs.getString("DNS_CAA_RECORDS")),
|
||||
deserializeJsonArray(rs.getString("DNS_TXT_RECORDS")),
|
||||
deserializeJsonArray(rs.getString("DNS_NS_RECORDS")),
|
||||
rs.getString("DNS_SOA_RECORD"),
|
||||
rs.getObject("TS_LAST_DNS_CHECK", Instant.class),
|
||||
rs.getObject("TS_NEXT_DNS_CHECK", Instant.class),
|
||||
rs.getInt("DNS_CHECK_PRIORITY")
|
||||
);
|
||||
}
|
||||
|
||||
static List<String> deserializeJsonArray(@Nullable String json) {
|
||||
if (json == null || json.isEmpty()) {
|
||||
return List.of();
|
||||
}
|
||||
return gson.fromJson(json, List.class);
|
||||
}
|
||||
|
||||
public static Builder builder() {
|
||||
return new Builder();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void write(Connection connection) throws SQLException {
|
||||
|
||||
if (dnsRootDomainId() != null) {
|
||||
update(connection);
|
||||
return;
|
||||
}
|
||||
|
||||
try (var ps = connection.prepareStatement("""
|
||||
REPLACE INTO DOMAIN_DNS_INFORMATION (
|
||||
ROOT_DOMAIN_NAME,
|
||||
NODE_AFFINITY,
|
||||
DNS_A_RECORDS,
|
||||
DNS_AAAA_RECORDS,
|
||||
DNS_CNAME_RECORD,
|
||||
DNS_MX_RECORDS,
|
||||
DNS_CAA_RECORDS,
|
||||
DNS_TXT_RECORDS,
|
||||
DNS_NS_RECORDS,
|
||||
DNS_SOA_RECORD,
|
||||
TS_LAST_DNS_CHECK,
|
||||
TS_NEXT_DNS_CHECK,
|
||||
DNS_CHECK_PRIORITY
|
||||
) VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?)
|
||||
""")) {
|
||||
|
||||
ps.setString(1, rootDomainName());
|
||||
ps.setInt(2, nodeAffinity());
|
||||
|
||||
if (aRecords() == null) {
|
||||
ps.setNull(3, java.sql.Types.VARCHAR);
|
||||
} else {
|
||||
ps.setString(3, gson.toJson(aRecords()));
|
||||
}
|
||||
if (aaaaRecords() == null) {
|
||||
ps.setNull(4, java.sql.Types.VARCHAR);
|
||||
} else {
|
||||
ps.setString(4, gson.toJson(aaaaRecords()));
|
||||
}
|
||||
if (cnameRecord() == null) {
|
||||
ps.setNull(5, java.sql.Types.VARCHAR);
|
||||
} else {
|
||||
ps.setString(5, cnameRecord());
|
||||
}
|
||||
if (mxRecords() == null) {
|
||||
ps.setNull(6, java.sql.Types.VARCHAR);
|
||||
} else {
|
||||
ps.setString(6, gson.toJson(mxRecords()));
|
||||
}
|
||||
if (caaRecords() == null) {
|
||||
ps.setNull(7, java.sql.Types.VARCHAR);
|
||||
} else {
|
||||
ps.setString(7, gson.toJson(caaRecords()));
|
||||
}
|
||||
if (txtRecords() == null) {
|
||||
ps.setNull(8, java.sql.Types.VARCHAR);
|
||||
} else {
|
||||
ps.setString(8, gson.toJson(txtRecords()));
|
||||
}
|
||||
if (nsRecords() == null) {
|
||||
ps.setNull(9, java.sql.Types.VARCHAR);
|
||||
} else {
|
||||
ps.setString(9, gson.toJson(nsRecords()));
|
||||
}
|
||||
if (soaRecord() == null) {
|
||||
ps.setNull(10, java.sql.Types.VARCHAR);
|
||||
} else {
|
||||
ps.setString(10, soaRecord());
|
||||
}
|
||||
ps.setString(10, soaRecord());
|
||||
ps.setTimestamp(11, java.sql.Timestamp.from(tsLastUpdate()));
|
||||
ps.setTimestamp(12, java.sql.Timestamp.from(tsNextScheduledUpdate()));
|
||||
ps.setInt(13, dnsCheckPriority());
|
||||
ps.executeUpdate();
|
||||
}
|
||||
}
|
||||
|
||||
public void update(Connection connection) throws SQLException {
|
||||
|
||||
try (var ps = connection.prepareStatement("""
|
||||
REPLACE INTO DOMAIN_DNS_INFORMATION (
|
||||
DNS_ROOT_DOMAIN_ID,
|
||||
ROOT_DOMAIN_NAME,
|
||||
NODE_AFFINITY,
|
||||
DNS_A_RECORDS,
|
||||
DNS_AAAA_RECORDS,
|
||||
DNS_CNAME_RECORD,
|
||||
DNS_MX_RECORDS,
|
||||
DNS_CAA_RECORDS,
|
||||
DNS_TXT_RECORDS,
|
||||
DNS_NS_RECORDS,
|
||||
DNS_SOA_RECORD,
|
||||
TS_LAST_DNS_CHECK,
|
||||
TS_NEXT_DNS_CHECK,
|
||||
DNS_CHECK_PRIORITY
|
||||
) VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?)
|
||||
""")) {
|
||||
|
||||
ps.setObject(1, dnsRootDomainId(), java.sql.Types.INTEGER);
|
||||
ps.setString(2, rootDomainName());
|
||||
ps.setInt(3, nodeAffinity());
|
||||
|
||||
if (aRecords() == null) {
|
||||
ps.setNull(4, java.sql.Types.VARCHAR);
|
||||
} else {
|
||||
ps.setString(4, gson.toJson(aRecords()));
|
||||
}
|
||||
if (aaaaRecords() == null) {
|
||||
ps.setNull(5, java.sql.Types.VARCHAR);
|
||||
} else {
|
||||
ps.setString(5, gson.toJson(aaaaRecords()));
|
||||
}
|
||||
if (cnameRecord() == null) {
|
||||
ps.setNull(6, java.sql.Types.VARCHAR);
|
||||
} else {
|
||||
ps.setString(6, cnameRecord());
|
||||
}
|
||||
if (mxRecords() == null) {
|
||||
ps.setNull(7, java.sql.Types.VARCHAR);
|
||||
} else {
|
||||
ps.setString(7, gson.toJson(mxRecords()));
|
||||
}
|
||||
if (caaRecords() == null) {
|
||||
ps.setNull(8, java.sql.Types.VARCHAR);
|
||||
} else {
|
||||
ps.setString(8, gson.toJson(caaRecords()));
|
||||
}
|
||||
if (txtRecords() == null) {
|
||||
ps.setNull(9, java.sql.Types.VARCHAR);
|
||||
} else {
|
||||
ps.setString(9, gson.toJson(txtRecords()));
|
||||
}
|
||||
if (nsRecords() == null) {
|
||||
ps.setNull(10, java.sql.Types.VARCHAR);
|
||||
} else {
|
||||
ps.setString(10, gson.toJson(nsRecords()));
|
||||
}
|
||||
if (soaRecord() == null) {
|
||||
ps.setNull(11, java.sql.Types.VARCHAR);
|
||||
} else {
|
||||
ps.setString(11, soaRecord());
|
||||
}
|
||||
ps.setTimestamp(12, java.sql.Timestamp.from(tsLastUpdate()));
|
||||
ps.setTimestamp(13, java.sql.Timestamp.from(tsNextScheduledUpdate()));
|
||||
ps.setInt(14, dnsCheckPriority());
|
||||
ps.executeUpdate();
|
||||
}
|
||||
}
|
||||
|
||||
public static class Builder {
|
||||
private Integer dnsRootDomainId;
|
||||
private String rootDomainName;
|
||||
private int nodeAffinity;
|
||||
private List<String> aRecords;
|
||||
private List<String> aaaaRecords;
|
||||
private String cnameRecord;
|
||||
private List<String> mxRecords;
|
||||
private List<String> caaRecords;
|
||||
private List<String> txtRecords;
|
||||
private List<String> nsRecords;
|
||||
private String soaRecord;
|
||||
private Instant tsLastUpdate;
|
||||
private Instant tsNextScheduledUpdate;
|
||||
private int dnsCheckPriority;
|
||||
|
||||
public Builder dnsRootDomainId(Integer dnsRootDomainId) {
|
||||
this.dnsRootDomainId = dnsRootDomainId;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder rootDomainName(String rootDomainName) {
|
||||
this.rootDomainName = rootDomainName;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder nodeAffinity(int nodeAffinity) {
|
||||
this.nodeAffinity = nodeAffinity;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder addARecord(String aRecord) {
|
||||
if (this.aRecords == null) {
|
||||
this.aRecords = new ArrayList<>();
|
||||
}
|
||||
this.aRecords.add(aRecord);
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder aRecords(List<String> aRecords) {
|
||||
this.aRecords = aRecords;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder addAaaaRecord(String aaaaRecord) {
|
||||
if (this.aaaaRecords == null) {
|
||||
this.aaaaRecords = new ArrayList<>();
|
||||
}
|
||||
this.aaaaRecords.add(aaaaRecord);
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder aaaaRecords(List<String> aaaaRecords) {
|
||||
this.aaaaRecords = aaaaRecords;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder cnameRecord(String cnameRecord) {
|
||||
this.cnameRecord = cnameRecord;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder addMxRecord(String mxRecord) {
|
||||
if (this.mxRecords == null) {
|
||||
this.mxRecords = new ArrayList<>();
|
||||
}
|
||||
this.mxRecords.add(mxRecord);
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder mxRecords(List<String> mxRecords) {
|
||||
this.mxRecords = mxRecords;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder addCaaRecord(String caaRecord) {
|
||||
if (this.caaRecords == null) {
|
||||
this.caaRecords = new ArrayList<>();
|
||||
}
|
||||
this.caaRecords.add(caaRecord);
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder caaRecords(List<String> caaRecords) {
|
||||
this.caaRecords = caaRecords;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder addTxtRecord(String txtRecord) {
|
||||
if (this.txtRecords == null) {
|
||||
this.txtRecords = new ArrayList<>();
|
||||
}
|
||||
this.txtRecords.add(txtRecord);
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder txtRecords(List<String> txtRecords) {
|
||||
this.txtRecords = txtRecords;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder addNsRecord(String nsRecord) {
|
||||
if (this.nsRecords == null) {
|
||||
this.nsRecords = new ArrayList<>();
|
||||
}
|
||||
this.nsRecords.add(nsRecord);
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder nsRecords(List<String> nsRecords) {
|
||||
this.nsRecords = nsRecords;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder soaRecord(String soaRecord) {
|
||||
this.soaRecord = soaRecord;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder tsLastUpdate(Instant tsLastUpdate) {
|
||||
this.tsLastUpdate = tsLastUpdate;
|
||||
return this;
|
||||
}
|
||||
public Builder tsNextScheduledUpdate(Instant nextScheduledUpdate) {
|
||||
this.tsNextScheduledUpdate = nextScheduledUpdate;
|
||||
return this;
|
||||
}
|
||||
public Builder dnsCheckPriority(int dnsCheckPriority) {
|
||||
this.dnsCheckPriority = dnsCheckPriority;
|
||||
return this;
|
||||
}
|
||||
|
||||
public DomainDnsRecord build() {
|
||||
return new DomainDnsRecord(
|
||||
dnsRootDomainId,
|
||||
rootDomainName,
|
||||
nodeAffinity,
|
||||
aRecords,
|
||||
aaaaRecords,
|
||||
cnameRecord,
|
||||
mxRecords,
|
||||
caaRecords,
|
||||
txtRecords,
|
||||
nsRecords,
|
||||
soaRecord,
|
||||
tsLastUpdate,
|
||||
tsNextScheduledUpdate,
|
||||
dnsCheckPriority
|
||||
);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
@@ -0,0 +1,3 @@
|
||||
package nu.marginalia.ping.model;
|
||||
|
||||
public record DomainReference(int domainId, int nodeId, String domainName) { }
|
@@ -0,0 +1,81 @@
|
||||
package nu.marginalia.ping.model;
|
||||
|
||||
import nu.marginalia.ping.util.JsonObject;
|
||||
|
||||
import java.sql.Connection;
|
||||
import java.sql.SQLException;
|
||||
import java.time.Duration;
|
||||
import java.time.Instant;
|
||||
|
||||
public record DomainSecurityEvent(
|
||||
int domainId,
|
||||
int nodeId,
|
||||
Instant tsChange,
|
||||
boolean asnChanged,
|
||||
boolean certificateFingerprintChanged,
|
||||
boolean certificateProfileChanged,
|
||||
boolean certificateSanChanged,
|
||||
boolean certificatePublicKeyChanged,
|
||||
Duration oldCertificateTimeToExpiry,
|
||||
boolean securityHeadersChanged,
|
||||
boolean ipChanged,
|
||||
boolean softwareChanged,
|
||||
JsonObject<DomainSecurityRecord> securitySignatureBefore,
|
||||
JsonObject<DomainSecurityRecord> securitySignatureAfter
|
||||
) implements WritableModel {
|
||||
|
||||
@Override
|
||||
public void write(Connection connection) throws SQLException {
|
||||
try (var ps = connection.prepareStatement("""
|
||||
INSERT INTO DOMAIN_SECURITY_EVENTS (
|
||||
domain_id,
|
||||
node_id,
|
||||
ts_change,
|
||||
change_asn,
|
||||
change_certificate_fingerprint,
|
||||
change_certificate_profile,
|
||||
change_certificate_san,
|
||||
change_certificate_public_key,
|
||||
change_security_headers,
|
||||
change_ip_address,
|
||||
change_software,
|
||||
old_cert_time_to_expiry,
|
||||
security_signature_before,
|
||||
security_signature_after
|
||||
) VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?)
|
||||
"""))
|
||||
{
|
||||
|
||||
ps.setInt(1, domainId());
|
||||
ps.setInt(2, nodeId());
|
||||
ps.setTimestamp(3, java.sql.Timestamp.from(tsChange()));
|
||||
ps.setBoolean(4, asnChanged());
|
||||
ps.setBoolean(5, certificateFingerprintChanged());
|
||||
ps.setBoolean(6, certificateProfileChanged());
|
||||
ps.setBoolean(7, certificateSanChanged());
|
||||
ps.setBoolean(8, certificatePublicKeyChanged());
|
||||
ps.setBoolean(9, securityHeadersChanged());
|
||||
ps.setBoolean(10, ipChanged());
|
||||
ps.setBoolean(11, softwareChanged());
|
||||
|
||||
if (oldCertificateTimeToExpiry() == null) {
|
||||
ps.setNull(12, java.sql.Types.BIGINT);
|
||||
} else {
|
||||
ps.setLong(12, oldCertificateTimeToExpiry().toHours());
|
||||
}
|
||||
|
||||
if (securitySignatureBefore() == null) {
|
||||
ps.setNull(13, java.sql.Types.BLOB);
|
||||
} else {
|
||||
ps.setBytes(13, securitySignatureBefore().compressed());
|
||||
}
|
||||
if (securitySignatureAfter() == null) {
|
||||
ps.setNull(14, java.sql.Types.BLOB);
|
||||
} else {
|
||||
ps.setBytes(14, securitySignatureAfter().compressed());
|
||||
}
|
||||
|
||||
ps.executeUpdate();
|
||||
}
|
||||
}
|
||||
}
|
@@ -0,0 +1,544 @@
|
||||
package nu.marginalia.ping.model;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
import javax.annotation.Nullable;
|
||||
import java.sql.Connection;
|
||||
import java.sql.ResultSet;
|
||||
import java.sql.SQLException;
|
||||
import java.sql.Types;
|
||||
import java.time.Instant;
|
||||
import java.util.Objects;
|
||||
|
||||
public record DomainSecurityRecord(
|
||||
int domainId,
|
||||
int nodeId,
|
||||
@Nullable Integer asn,
|
||||
@Nullable HttpSchema httpSchema,
|
||||
@Nullable String httpVersion,
|
||||
@Nullable String httpCompression,
|
||||
@Nullable String httpCacheControl,
|
||||
@Nullable Instant sslCertNotBefore,
|
||||
@Nullable Instant sslCertNotAfter,
|
||||
@Nullable String sslCertIssuer,
|
||||
@Nullable String sslCertSubject,
|
||||
@Nullable byte[] sslCertPublicKeyHash,
|
||||
@Nullable String sslCertSerialNumber,
|
||||
@Nullable byte[] sslCertFingerprintSha256,
|
||||
@Nullable String sslCertSan,
|
||||
boolean sslCertWildcard,
|
||||
@Nullable String sslProtocol,
|
||||
@Nullable String sslCipherSuite,
|
||||
@Nullable String sslKeyExchange,
|
||||
@Nullable Integer sslCertificateChainLength,
|
||||
boolean sslCertificateValid,
|
||||
@Nullable String headerCorsAllowOrigin,
|
||||
boolean headerCorsAllowCredentials,
|
||||
@Nullable Integer headerContentSecurityPolicyHash,
|
||||
@Nullable String headerStrictTransportSecurity,
|
||||
@Nullable String headerReferrerPolicy,
|
||||
@Nullable String headerXFrameOptions,
|
||||
@Nullable String headerXContentTypeOptions,
|
||||
@Nullable String headerXXssProtection,
|
||||
@Nullable String headerServer,
|
||||
@Nullable String headerXPoweredBy,
|
||||
@Nullable Instant tsLastUpdate
|
||||
)
|
||||
implements WritableModel
|
||||
{
|
||||
|
||||
public int certificateProfileHash() {
|
||||
return Objects.hash(
|
||||
sslCertIssuer,
|
||||
sslCertSubject,
|
||||
sslCipherSuite,
|
||||
sslKeyExchange
|
||||
);
|
||||
}
|
||||
|
||||
public int securityHeadersHash() {
|
||||
return Objects.hash(
|
||||
headerCorsAllowOrigin,
|
||||
headerCorsAllowCredentials,
|
||||
headerContentSecurityPolicyHash,
|
||||
headerStrictTransportSecurity,
|
||||
headerReferrerPolicy,
|
||||
headerXFrameOptions,
|
||||
headerXContentTypeOptions,
|
||||
headerXXssProtection
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
public DomainSecurityRecord(ResultSet rs) throws SQLException {
|
||||
this(rs.getInt("DOMAIN_SECURITY_INFORMATION.DOMAIN_ID"),
|
||||
rs.getInt("DOMAIN_SECURITY_INFORMATION.NODE_ID"),
|
||||
rs.getObject("DOMAIN_SECURITY_INFORMATION.ASN", Integer.class),
|
||||
httpSchemaFromString(rs.getString("DOMAIN_SECURITY_INFORMATION.HTTP_SCHEMA")),
|
||||
rs.getString("DOMAIN_SECURITY_INFORMATION.HTTP_VERSION"),
|
||||
rs.getString("DOMAIN_SECURITY_INFORMATION.HTTP_COMPRESSION"),
|
||||
rs.getString("DOMAIN_SECURITY_INFORMATION.HTTP_CACHE_CONTROL"),
|
||||
rs.getObject("DOMAIN_SECURITY_INFORMATION.SSL_CERT_NOT_BEFORE", Instant.class),
|
||||
rs.getObject("DOMAIN_SECURITY_INFORMATION.SSL_CERT_NOT_AFTER", Instant.class),
|
||||
rs.getString("DOMAIN_SECURITY_INFORMATION.SSL_CERT_ISSUER"),
|
||||
rs.getString("DOMAIN_SECURITY_INFORMATION.SSL_CERT_SUBJECT"),
|
||||
rs.getBytes("DOMAIN_SECURITY_INFORMATION.SSL_CERT_PUBLIC_KEY_HASH"),
|
||||
rs.getString("DOMAIN_SECURITY_INFORMATION.SSL_CERT_SERIAL_NUMBER"),
|
||||
rs.getBytes("DOMAIN_SECURITY_INFORMATION.SSL_CERT_FINGERPRINT_SHA256"),
|
||||
rs.getString("DOMAIN_SECURITY_INFORMATION.SSL_CERT_SAN"),
|
||||
rs.getBoolean("DOMAIN_SECURITY_INFORMATION.SSL_CERT_WILDCARD"),
|
||||
rs.getString("DOMAIN_SECURITY_INFORMATION.SSL_PROTOCOL"),
|
||||
rs.getString("DOMAIN_SECURITY_INFORMATION.SSL_CIPHER_SUITE"),
|
||||
rs.getString("DOMAIN_SECURITY_INFORMATION.SSL_KEY_EXCHANGE"),
|
||||
rs.getObject("DOMAIN_SECURITY_INFORMATION.SSL_CERTIFICATE_CHAIN_LENGTH", Integer.class),
|
||||
rs.getBoolean("DOMAIN_SECURITY_INFORMATION.SSL_CERTIFICATE_VALID"),
|
||||
rs.getString("DOMAIN_SECURITY_INFORMATION.HEADER_CORS_ALLOW_ORIGIN"),
|
||||
rs.getBoolean("DOMAIN_SECURITY_INFORMATION.HEADER_CORS_ALLOW_CREDENTIALS"),
|
||||
rs.getInt("DOMAIN_SECURITY_INFORMATION.HEADER_CONTENT_SECURITY_POLICY_HASH"),
|
||||
rs.getString("DOMAIN_SECURITY_INFORMATION.HEADER_STRICT_TRANSPORT_SECURITY"),
|
||||
rs.getString("DOMAIN_SECURITY_INFORMATION.HEADER_REFERRER_POLICY"),
|
||||
rs.getString("DOMAIN_SECURITY_INFORMATION.HEADER_X_FRAME_OPTIONS"),
|
||||
rs.getString("DOMAIN_SECURITY_INFORMATION.HEADER_X_CONTENT_TYPE_OPTIONS"),
|
||||
rs.getString("DOMAIN_SECURITY_INFORMATION.HEADER_X_XSS_PROTECTION"),
|
||||
rs.getString("DOMAIN_SECURITY_INFORMATION.HEADER_SERVER"),
|
||||
rs.getString("DOMAIN_SECURITY_INFORMATION.HEADER_X_POWERED_BY"),
|
||||
rs.getObject("DOMAIN_SECURITY_INFORMATION.TS_LAST_UPDATE", Instant.class));
|
||||
}
|
||||
|
||||
private static HttpSchema httpSchemaFromString(@Nullable String schema) {
|
||||
return schema == null ? null : HttpSchema.valueOf(schema);
|
||||
}
|
||||
|
||||
private static SslCertRevocationStatus sslCertRevocationStatusFromString(@Nullable String status) {
|
||||
return status == null ? null : SslCertRevocationStatus.valueOf(status);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void write(Connection connection) throws SQLException {
|
||||
try (var ps = connection.prepareStatement(
|
||||
"""
|
||||
REPLACE INTO DOMAIN_SECURITY_INFORMATION (
|
||||
domain_id,
|
||||
node_id,
|
||||
http_schema,
|
||||
http_version,
|
||||
http_compression,
|
||||
http_cache_control,
|
||||
ssl_cert_not_before,
|
||||
ssl_cert_not_after,
|
||||
ssl_cert_issuer,
|
||||
ssl_cert_subject,
|
||||
ssl_cert_serial_number,
|
||||
ssl_cert_fingerprint_sha256,
|
||||
ssl_cert_san,
|
||||
ssl_cert_wildcard,
|
||||
ssl_protocol,
|
||||
ssl_cipher_suite,
|
||||
ssl_key_exchange,
|
||||
ssl_certificate_chain_length,
|
||||
ssl_certificate_valid,
|
||||
header_cors_allow_origin,
|
||||
header_cors_allow_credentials,
|
||||
header_content_security_policy_hash,
|
||||
header_strict_transport_security,
|
||||
header_referrer_policy,
|
||||
header_x_frame_options,
|
||||
header_x_content_type_options,
|
||||
header_x_xss_protection,
|
||||
header_server,
|
||||
header_x_powered_by,
|
||||
ssl_cert_public_key_hash,
|
||||
asn,
|
||||
ts_last_update)
|
||||
VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)
|
||||
"""))
|
||||
{
|
||||
ps.setInt(1, domainId());
|
||||
ps.setInt(2, nodeId());
|
||||
if (httpSchema() == null) {
|
||||
ps.setNull(3, java.sql.Types.VARCHAR);
|
||||
} else {
|
||||
ps.setString(3, httpSchema().name());
|
||||
}
|
||||
if (httpVersion() == null) {
|
||||
ps.setNull(4, java.sql.Types.VARCHAR);
|
||||
} else {
|
||||
ps.setString(4, httpVersion());
|
||||
}
|
||||
if (httpCompression() == null) {
|
||||
ps.setNull(5, java.sql.Types.VARCHAR);
|
||||
} else {
|
||||
ps.setString(5, httpCompression());
|
||||
}
|
||||
if (httpCacheControl() == null) {
|
||||
ps.setNull(6, java.sql.Types.VARCHAR);
|
||||
} else {
|
||||
ps.setString(6, httpCacheControl());
|
||||
}
|
||||
if (sslCertNotBefore() == null) {
|
||||
ps.setNull(7, java.sql.Types.TIMESTAMP);
|
||||
} else {
|
||||
ps.setTimestamp(7, java.sql.Timestamp.from(sslCertNotBefore()));
|
||||
}
|
||||
if (sslCertNotAfter() == null) {
|
||||
ps.setNull(8, java.sql.Types.TIMESTAMP);
|
||||
} else {
|
||||
ps.setTimestamp(8, java.sql.Timestamp.from(sslCertNotAfter()));
|
||||
}
|
||||
if (sslCertIssuer() == null) {
|
||||
ps.setNull(9, java.sql.Types.VARCHAR);
|
||||
} else {
|
||||
ps.setString(9, sslCertIssuer());
|
||||
}
|
||||
if (sslCertSubject() == null) {
|
||||
ps.setNull(10, java.sql.Types.VARCHAR);
|
||||
} else {
|
||||
ps.setString(10, sslCertSubject());
|
||||
}
|
||||
if (sslCertSerialNumber() == null) {
|
||||
ps.setNull(11, java.sql.Types.VARCHAR);
|
||||
} else {
|
||||
ps.setString(11, sslCertSerialNumber());
|
||||
}
|
||||
if (sslCertFingerprintSha256() == null) {
|
||||
ps.setNull(12, java.sql.Types.BINARY);
|
||||
} else {
|
||||
System.out.println(sslCertFingerprintSha256().length);
|
||||
ps.setBytes(12, sslCertFingerprintSha256());
|
||||
}
|
||||
if (sslCertSan() == null) {
|
||||
ps.setNull(13, java.sql.Types.VARCHAR);
|
||||
} else {
|
||||
ps.setString(13, sslCertSan());
|
||||
}
|
||||
ps.setBoolean(14, sslCertWildcard());
|
||||
if (sslProtocol() == null) {
|
||||
ps.setNull(15, java.sql.Types.VARCHAR);
|
||||
} else {
|
||||
ps.setString(15, sslProtocol());
|
||||
}
|
||||
if (sslCipherSuite() == null) {
|
||||
ps.setNull(16, java.sql.Types.VARCHAR);
|
||||
} else {
|
||||
ps.setString(16, sslCipherSuite());
|
||||
}
|
||||
if (sslKeyExchange() == null) {
|
||||
ps.setNull(17, java.sql.Types.VARCHAR);
|
||||
} else {
|
||||
ps.setString(17, sslKeyExchange());
|
||||
}
|
||||
if (sslCertificateChainLength() == null) {
|
||||
ps.setNull(18, java.sql.Types.INTEGER);
|
||||
} else {
|
||||
ps.setInt(18, sslCertificateChainLength());
|
||||
}
|
||||
ps.setBoolean(19, sslCertificateValid());
|
||||
if (headerCorsAllowOrigin() == null) {
|
||||
ps.setNull(20, java.sql.Types.VARCHAR);
|
||||
} else {
|
||||
ps.setString(20, headerCorsAllowOrigin());
|
||||
}
|
||||
ps.setBoolean(21, headerCorsAllowCredentials());
|
||||
if (headerContentSecurityPolicyHash() == null) {
|
||||
ps.setNull(22, Types.INTEGER);
|
||||
} else {
|
||||
ps.setInt(22, headerContentSecurityPolicyHash());
|
||||
}
|
||||
if (headerStrictTransportSecurity() == null) {
|
||||
ps.setNull(23, java.sql.Types.VARCHAR);
|
||||
} else {
|
||||
ps.setString(23, headerStrictTransportSecurity());
|
||||
}
|
||||
if (headerReferrerPolicy() == null) {
|
||||
ps.setNull(24, java.sql.Types.VARCHAR);
|
||||
} else {
|
||||
ps.setString(24, headerReferrerPolicy());
|
||||
}
|
||||
if (headerXFrameOptions() == null) {
|
||||
ps.setNull(25, java.sql.Types.VARCHAR);
|
||||
} else {
|
||||
ps.setString(25, headerXFrameOptions());
|
||||
}
|
||||
if (headerXContentTypeOptions() == null) {
|
||||
ps.setNull(26, java.sql.Types.VARCHAR);
|
||||
} else {
|
||||
ps.setString(26, headerXContentTypeOptions());
|
||||
}
|
||||
if (headerXXssProtection() == null) {
|
||||
ps.setNull(27, java.sql.Types.VARCHAR);
|
||||
} else {
|
||||
ps.setString(27, headerXXssProtection());
|
||||
}
|
||||
if (headerServer() == null) {
|
||||
ps.setNull(28, java.sql.Types.VARCHAR);
|
||||
} else {
|
||||
ps.setString(28, headerServer());
|
||||
}
|
||||
if (headerXPoweredBy() == null) {
|
||||
ps.setNull(29, java.sql.Types.VARCHAR);
|
||||
} else {
|
||||
ps.setString(29, headerXPoweredBy());
|
||||
}
|
||||
if (sslCertPublicKeyHash() == null) {
|
||||
ps.setNull(30, java.sql.Types.BINARY);
|
||||
} else {
|
||||
ps.setBytes(30, sslCertPublicKeyHash());
|
||||
}
|
||||
if (asn() == null) {
|
||||
ps.setNull(31, java.sql.Types.INTEGER);
|
||||
} else {
|
||||
ps.setInt(31, asn());
|
||||
}
|
||||
|
||||
if (tsLastUpdate() == null) {
|
||||
ps.setNull(32, java.sql.Types.TIMESTAMP);
|
||||
} else {
|
||||
ps.setTimestamp(32, java.sql.Timestamp.from(tsLastUpdate()));
|
||||
}
|
||||
ps.executeUpdate();
|
||||
}
|
||||
}
|
||||
|
||||
public static class Builder {
|
||||
private int domainId;
|
||||
private int nodeId;
|
||||
private Integer asn;
|
||||
private HttpSchema httpSchema;
|
||||
private String httpVersion;
|
||||
private String httpCompression;
|
||||
private String httpCacheControl;
|
||||
private Instant sslCertNotBefore;
|
||||
private Instant sslCertNotAfter;
|
||||
private String sslCertIssuer;
|
||||
private String sslCertSubject;
|
||||
private String sslCertSerialNumber;
|
||||
private byte[] sslCertPublicKeyHash;
|
||||
private byte[] sslCertFingerprintSha256;
|
||||
private String sslCertSan;
|
||||
private boolean sslCertWildcard;
|
||||
private String sslProtocol;
|
||||
private String sslCipherSuite;
|
||||
private String sslKeyExchange;
|
||||
private Integer sslCertificateChainLength;
|
||||
private boolean sslCertificateValid;
|
||||
private String headerCorsAllowOrigin;
|
||||
private boolean headerCorsAllowCredentials;
|
||||
private Integer headerContentSecurityPolicyHash;
|
||||
private String headerStrictTransportSecurity;
|
||||
private String headerReferrerPolicy;
|
||||
private String headerXFrameOptions;
|
||||
private String headerXContentTypeOptions;
|
||||
private String headerXXssProtection;
|
||||
private String headerServer;
|
||||
private String headerXPoweredBy;
|
||||
private Instant tsLastUpdate;
|
||||
|
||||
public Builder() {
|
||||
// Default values for boolean fields
|
||||
this.sslCertWildcard = false;
|
||||
this.sslCertificateValid = false;
|
||||
this.headerCorsAllowCredentials = false;
|
||||
}
|
||||
|
||||
public Builder domainId(int domainId) {
|
||||
this.domainId = domainId;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder nodeId(int nodeId) {
|
||||
this.nodeId = nodeId;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder asn(@Nullable Integer asn) {
|
||||
this.asn = asn;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder httpSchema(HttpSchema httpSchema) {
|
||||
this.httpSchema = httpSchema;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder httpVersion(String httpVersion) {
|
||||
this.httpVersion = httpVersion;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder httpCompression(String httpCompression) {
|
||||
this.httpCompression = httpCompression;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder httpCacheControl(String httpCacheControl) {
|
||||
this.httpCacheControl = httpCacheControl;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder sslCertNotBefore(Instant sslCertNotBefore) {
|
||||
this.sslCertNotBefore = sslCertNotBefore;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder sslCertNotAfter(Instant sslCertNotAfter) {
|
||||
this.sslCertNotAfter = sslCertNotAfter;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder sslCertIssuer(String sslCertIssuer) {
|
||||
this.sslCertIssuer = sslCertIssuer;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder sslCertSubject(String sslCertSubject) {
|
||||
this.sslCertSubject = sslCertSubject;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder sslCertSerialNumber(String sslCertSerialNumber) {
|
||||
this.sslCertSerialNumber = sslCertSerialNumber;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder sslCertPublicKeyHash(byte[] sslCertPublicKeyHash) {
|
||||
this.sslCertPublicKeyHash = sslCertPublicKeyHash;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder sslCertFingerprintSha256(byte[] sslCertFingerprintSha256) {
|
||||
this.sslCertFingerprintSha256 = sslCertFingerprintSha256;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder sslCertSan(String sslCertSan) {
|
||||
this.sslCertSan = sslCertSan;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder sslCertWildcard(boolean sslCertWildcard) {
|
||||
this.sslCertWildcard = sslCertWildcard;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder sslProtocol(String sslProtocol) {
|
||||
this.sslProtocol = sslProtocol;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder sslCipherSuite(String sslCipherSuite) {
|
||||
this.sslCipherSuite = sslCipherSuite;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder sslKeyExchange(String sslKeyExchange) {
|
||||
this.sslKeyExchange = sslKeyExchange;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder sslCertificateChainLength(Integer sslCertificateChainLength) {
|
||||
this.sslCertificateChainLength = sslCertificateChainLength;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder sslCertificateValid(boolean sslCertificateValid) {
|
||||
this.sslCertificateValid = sslCertificateValid;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder headerCorsAllowOrigin(String headerCorsAllowOrigin) {
|
||||
this.headerCorsAllowOrigin = headerCorsAllowOrigin;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder headerCorsAllowCredentials(boolean headerCorsAllowCredentials) {
|
||||
this.headerCorsAllowCredentials = headerCorsAllowCredentials;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder headerContentSecurityPolicyHash(Integer headerContentSecurityPolicyHash) {
|
||||
this.headerContentSecurityPolicyHash = headerContentSecurityPolicyHash;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder headerStrictTransportSecurity(String headerStrictTransportSecurity) {
|
||||
this.headerStrictTransportSecurity = headerStrictTransportSecurity;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder headerReferrerPolicy(String headerReferrerPolicy) {
|
||||
this.headerReferrerPolicy = StringUtils.truncate(headerReferrerPolicy, 50);
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder headerXFrameOptions(String headerXFrameOptions) {
|
||||
this.headerXFrameOptions = StringUtils.truncate(headerXFrameOptions, 50);
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder headerXContentTypeOptions(String headerXContentTypeOptions) {
|
||||
this.headerXContentTypeOptions = StringUtils.truncate(headerXContentTypeOptions, 50);
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder headerXXssProtection(String headerXXssProtection) {
|
||||
this.headerXXssProtection = StringUtils.truncate(headerXXssProtection, 50);
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder headerServer(String headerServer) {
|
||||
this.headerServer = headerServer;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder headerXPoweredBy(String headerXPoweredBy) {
|
||||
this.headerXPoweredBy = headerXPoweredBy;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder tsLastUpdate(Instant tsLastUpdate) {
|
||||
this.tsLastUpdate = tsLastUpdate;
|
||||
return this;
|
||||
}
|
||||
|
||||
public DomainSecurityRecord build() {
|
||||
return new DomainSecurityRecord(
|
||||
domainId,
|
||||
nodeId,
|
||||
asn,
|
||||
httpSchema,
|
||||
httpVersion,
|
||||
httpCompression,
|
||||
httpCacheControl,
|
||||
sslCertNotBefore,
|
||||
sslCertNotAfter,
|
||||
sslCertIssuer,
|
||||
sslCertSubject,
|
||||
sslCertPublicKeyHash,
|
||||
sslCertSerialNumber,
|
||||
sslCertFingerprintSha256,
|
||||
sslCertSan,
|
||||
sslCertWildcard,
|
||||
sslProtocol,
|
||||
sslCipherSuite,
|
||||
sslKeyExchange,
|
||||
sslCertificateChainLength,
|
||||
sslCertificateValid,
|
||||
headerCorsAllowOrigin,
|
||||
headerCorsAllowCredentials,
|
||||
headerContentSecurityPolicyHash,
|
||||
headerStrictTransportSecurity,
|
||||
headerReferrerPolicy,
|
||||
headerXFrameOptions,
|
||||
headerXContentTypeOptions,
|
||||
headerXXssProtection,
|
||||
headerServer,
|
||||
headerXPoweredBy,
|
||||
tsLastUpdate
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
public static Builder builder() {
|
||||
return new Builder();
|
||||
}
|
||||
}
|
@@ -0,0 +1,12 @@
|
||||
package nu.marginalia.ping.model;
|
||||
|
||||
public enum ErrorClassification {
|
||||
NONE,
|
||||
TIMEOUT,
|
||||
SSL_ERROR,
|
||||
DNS_ERROR,
|
||||
CONNECTION_ERROR,
|
||||
HTTP_CLIENT_ERROR,
|
||||
HTTP_SERVER_ERROR,
|
||||
UNKNOWN
|
||||
}
|
@@ -0,0 +1,6 @@
|
||||
package nu.marginalia.ping.model;
|
||||
|
||||
public sealed interface HistoricalAvailabilityData {
|
||||
record JustAvailability(String domain, DomainAvailabilityRecord record) implements HistoricalAvailabilityData {}
|
||||
record AvailabilityAndSecurity(String domain, DomainAvailabilityRecord availabilityRecord, DomainSecurityRecord securityRecord) implements HistoricalAvailabilityData {}
|
||||
}
|
@@ -0,0 +1,6 @@
|
||||
package nu.marginalia.ping.model;
|
||||
|
||||
public enum HttpSchema {
|
||||
HTTP,
|
||||
HTTPS;
|
||||
}
|
@@ -0,0 +1,8 @@
|
||||
package nu.marginalia.ping.model;
|
||||
|
||||
public record SingleDnsRecord(
|
||||
String recordType,
|
||||
String data
|
||||
) {
|
||||
|
||||
}
|
@@ -0,0 +1,8 @@
|
||||
package nu.marginalia.ping.model;
|
||||
|
||||
public enum SslCertRevocationStatus {
|
||||
NOT_CHECKED,
|
||||
VALID,
|
||||
REVOKED,
|
||||
UNKNOWN
|
||||
}
|
@@ -0,0 +1,8 @@
|
||||
package nu.marginalia.ping.model;
|
||||
|
||||
import java.sql.Connection;
|
||||
import java.sql.SQLException;
|
||||
|
||||
public interface WritableModel {
|
||||
void write(Connection connection) throws SQLException;
|
||||
}
|
@@ -0,0 +1,71 @@
|
||||
package nu.marginalia.ping.model.comparison;
|
||||
|
||||
import nu.marginalia.ping.model.DomainDnsRecord;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
import java.util.Set;
|
||||
|
||||
public sealed interface DnsRecordChange {
|
||||
record None() implements DnsRecordChange { }
|
||||
record Changed(
|
||||
boolean aRecordsChanged,
|
||||
boolean aaaaRecordsChanged,
|
||||
boolean cnameRecordChanged,
|
||||
boolean mxRecordsChanged,
|
||||
boolean caaRecordsChanged,
|
||||
boolean txtRecordsChanged,
|
||||
boolean nsRecordsChanged,
|
||||
boolean soaRecordChanged
|
||||
) implements DnsRecordChange {}
|
||||
|
||||
static DnsRecordChange between(DomainDnsRecord before, DomainDnsRecord after) {
|
||||
|
||||
boolean aaaaRecordsChanged = !compareRecords(before.aaaaRecords(), after.aaaaRecords());
|
||||
boolean aRecordsChanged = !compareRecords(before.aRecords(), after.aRecords());
|
||||
boolean cnameRecordChanged = !Objects.equals(before.cnameRecord(), after.cnameRecord());
|
||||
boolean mxRecordsChanged = !compareRecords(before.mxRecords(), after.mxRecords());
|
||||
boolean caaRecordsChanged = !compareRecords(before.caaRecords(), after.caaRecords());
|
||||
boolean txtRecordsChanged = !compareRecords(before.txtRecords(), after.txtRecords());
|
||||
boolean nsRecordsChanged = !compareRecords(before.nsRecords(), after.nsRecords());
|
||||
boolean soaRecordChanged = !Objects.equals(before.soaRecord(), after.soaRecord());
|
||||
|
||||
boolean anyChanged = aaaaRecordsChanged ||
|
||||
aRecordsChanged ||
|
||||
cnameRecordChanged ||
|
||||
mxRecordsChanged ||
|
||||
caaRecordsChanged ||
|
||||
txtRecordsChanged ||
|
||||
nsRecordsChanged ||
|
||||
soaRecordChanged;
|
||||
if (!anyChanged) {
|
||||
return new DnsRecordChange.None();
|
||||
} else {
|
||||
return new DnsRecordChange.Changed(
|
||||
aRecordsChanged,
|
||||
aaaaRecordsChanged,
|
||||
cnameRecordChanged,
|
||||
mxRecordsChanged,
|
||||
caaRecordsChanged,
|
||||
txtRecordsChanged,
|
||||
nsRecordsChanged,
|
||||
soaRecordChanged
|
||||
);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static boolean compareRecords(List<String> beforeRecords, List<String> afterRecords) {
|
||||
if (null == beforeRecords && null == afterRecords) {
|
||||
return true; // Both are null, no change
|
||||
}
|
||||
|
||||
// empty and null are semantically equivalent
|
||||
if (null == beforeRecords)
|
||||
return afterRecords.isEmpty();
|
||||
if (null == afterRecords)
|
||||
return beforeRecords.isEmpty();
|
||||
|
||||
return Set.copyOf(beforeRecords).equals(Set.copyOf(afterRecords)); // Compare the sets for equality
|
||||
}
|
||||
}
|
@@ -0,0 +1,45 @@
|
||||
package nu.marginalia.ping.model.comparison;
|
||||
|
||||
import nu.marginalia.ping.model.AvailabilityOutageType;
|
||||
import nu.marginalia.ping.model.DomainAvailabilityRecord;
|
||||
|
||||
import java.util.Objects;
|
||||
|
||||
public sealed interface DomainAvailabilityChange {
|
||||
record None() implements DomainAvailabilityChange { }
|
||||
record UnavailableToAvailable() implements DomainAvailabilityChange { }
|
||||
record AvailableToUnavailable(AvailabilityOutageType outageType) implements DomainAvailabilityChange { }
|
||||
record OutageTypeChange(AvailabilityOutageType newOutageType) implements DomainAvailabilityChange { }
|
||||
|
||||
static DomainAvailabilityChange between(
|
||||
DomainAvailabilityRecord oldStatus,
|
||||
DomainAvailabilityRecord newStatus
|
||||
) {
|
||||
if (oldStatus.serverAvailable() && newStatus.serverAvailable()) {
|
||||
return new DomainAvailabilityChange.None();
|
||||
}
|
||||
|
||||
if (oldStatus.serverAvailable()) {
|
||||
return new DomainAvailabilityChange.AvailableToUnavailable(
|
||||
AvailabilityOutageType.fromErrorClassification(newStatus.errorClassification())
|
||||
);
|
||||
}
|
||||
|
||||
if (newStatus.serverAvailable()) {
|
||||
return new DomainAvailabilityChange.UnavailableToAvailable();
|
||||
}
|
||||
else {
|
||||
var classOld = oldStatus.errorClassification();
|
||||
var classNew = newStatus.errorClassification();
|
||||
|
||||
if (!Objects.equals(classOld, classNew)) {
|
||||
return new DomainAvailabilityChange.OutageTypeChange(
|
||||
AvailabilityOutageType.fromErrorClassification(newStatus.errorClassification())
|
||||
);
|
||||
}
|
||||
else {
|
||||
return new DomainAvailabilityChange.None();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@@ -0,0 +1,176 @@
|
||||
package nu.marginalia.ping.model.comparison;
|
||||
|
||||
import nu.marginalia.ping.model.DomainDnsRecord;
|
||||
import nu.marginalia.ping.model.WritableModel;
|
||||
import nu.marginalia.ping.util.JsonObject;
|
||||
|
||||
import java.sql.Connection;
|
||||
import java.sql.SQLException;
|
||||
import java.sql.Types;
|
||||
import java.time.Instant;
|
||||
|
||||
public record DomainDnsEvent(
|
||||
int rootDomainId,
|
||||
int nodeId,
|
||||
|
||||
Instant tsChange,
|
||||
boolean changeARecords,
|
||||
boolean changeAaaaRecords,
|
||||
boolean changeCname,
|
||||
boolean changeMxRecords,
|
||||
boolean changeCaaRecords,
|
||||
boolean changeTxtRecords,
|
||||
boolean changeNsRecords,
|
||||
boolean changeSoaRecord,
|
||||
|
||||
JsonObject<DomainDnsRecord> dnsSignatureBefore,
|
||||
JsonObject<DomainDnsRecord> dnsSignatureAfter
|
||||
) implements WritableModel {
|
||||
|
||||
@Override
|
||||
public void write(Connection connection) throws SQLException {
|
||||
try (var ps = connection.prepareStatement("""
|
||||
INSERT INTO DOMAIN_DNS_EVENTS (
|
||||
DNS_ROOT_DOMAIN_ID,
|
||||
NODE_ID,
|
||||
TS_CHANGE,
|
||||
CHANGE_A_RECORDS,
|
||||
CHANGE_AAAA_RECORDS,
|
||||
CHANGE_CNAME,
|
||||
CHANGE_MX_RECORDS,
|
||||
CHANGE_CAA_RECORDS,
|
||||
CHANGE_TXT_RECORDS,
|
||||
CHANGE_NS_RECORDS,
|
||||
CHANGE_SOA_RECORD,
|
||||
DNS_SIGNATURE_BEFORE,
|
||||
DNS_SIGNATURE_AFTER
|
||||
) VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?)
|
||||
""")) {
|
||||
ps.setInt(1, rootDomainId());
|
||||
ps.setInt(2, nodeId());
|
||||
ps.setTimestamp(3, java.sql.Timestamp.from(tsChange()));
|
||||
ps.setBoolean(4, changeARecords());
|
||||
ps.setBoolean(5, changeAaaaRecords());
|
||||
ps.setBoolean(6, changeCname());
|
||||
ps.setBoolean(7, changeMxRecords());
|
||||
ps.setBoolean(8, changeCaaRecords());
|
||||
ps.setBoolean(9, changeTxtRecords());
|
||||
ps.setBoolean(10, changeNsRecords());
|
||||
ps.setBoolean(11, changeSoaRecord());
|
||||
if (dnsSignatureBefore() == null) {
|
||||
ps.setNull(12, Types.BLOB);
|
||||
} else {
|
||||
ps.setBytes(12, dnsSignatureBefore().compressed());
|
||||
}
|
||||
if (dnsSignatureAfter() == null) {
|
||||
ps.setNull(13, Types.BLOB);
|
||||
} else {
|
||||
ps.setBytes(13, dnsSignatureAfter().compressed());
|
||||
}
|
||||
ps.executeUpdate();
|
||||
}
|
||||
}
|
||||
|
||||
public static Builder builder() {
|
||||
return new Builder();
|
||||
}
|
||||
|
||||
public static class Builder {
|
||||
private int rootDomainId;
|
||||
private int nodeId;
|
||||
private Instant tsChange;
|
||||
private boolean changeARecords;
|
||||
private boolean changeAaaaRecords;
|
||||
private boolean changeCname;
|
||||
private boolean changeMxRecords;
|
||||
private boolean changeCaaRecords;
|
||||
private boolean changeTxtRecords;
|
||||
private boolean changeNsRecords;
|
||||
private boolean changeSoaRecord;
|
||||
private JsonObject<DomainDnsRecord> dnsSignatureBefore;
|
||||
private JsonObject<DomainDnsRecord> dnsSignatureAfter;
|
||||
|
||||
public Builder rootDomainId(int rootDomainId) {
|
||||
this.rootDomainId = rootDomainId;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder nodeId(int nodeId) {
|
||||
this.nodeId = nodeId;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder tsChange(Instant tsChange) {
|
||||
this.tsChange = tsChange;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder changeARecords(boolean changeARecords) {
|
||||
this.changeARecords = changeARecords;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder changeAaaaRecords(boolean changeAaaaRecords) {
|
||||
this.changeAaaaRecords = changeAaaaRecords;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder changeCname(boolean changeCname) {
|
||||
this.changeCname = changeCname;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder changeMxRecords(boolean changeMxRecords) {
|
||||
this.changeMxRecords = changeMxRecords;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder changeCaaRecords(boolean changeCaaRecords) {
|
||||
this.changeCaaRecords = changeCaaRecords;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder changeTxtRecords(boolean changeTxtRecords) {
|
||||
this.changeTxtRecords = changeTxtRecords;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder changeNsRecords(boolean changeNsRecords) {
|
||||
this.changeNsRecords = changeNsRecords;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder changeSoaRecord(boolean changeSoaRecord) {
|
||||
this.changeSoaRecord = changeSoaRecord;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder dnsSignatureBefore(JsonObject<DomainDnsRecord> dnsSignatureBefore) {
|
||||
this.dnsSignatureBefore = dnsSignatureBefore;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder dnsSignatureAfter(JsonObject<DomainDnsRecord> dnsSignatureAfter) {
|
||||
this.dnsSignatureAfter = dnsSignatureAfter;
|
||||
return this;
|
||||
}
|
||||
|
||||
public DomainDnsEvent build() {
|
||||
return new DomainDnsEvent(
|
||||
rootDomainId,
|
||||
nodeId,
|
||||
tsChange,
|
||||
changeARecords,
|
||||
changeAaaaRecords,
|
||||
changeCname,
|
||||
changeMxRecords,
|
||||
changeCaaRecords,
|
||||
changeTxtRecords,
|
||||
changeNsRecords,
|
||||
changeSoaRecord,
|
||||
dnsSignatureBefore,
|
||||
dnsSignatureAfter
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
@@ -0,0 +1,70 @@
|
||||
package nu.marginalia.ping.model.comparison;
|
||||
|
||||
import nu.marginalia.ping.model.DomainAvailabilityRecord;
|
||||
import nu.marginalia.ping.model.DomainSecurityRecord;
|
||||
|
||||
import java.time.Duration;
|
||||
import java.time.Instant;
|
||||
import java.util.Arrays;
|
||||
import java.util.Objects;
|
||||
|
||||
public record SecurityInformationChange(
|
||||
boolean isChanged,
|
||||
boolean isAsnChanged,
|
||||
boolean isCertificateFingerprintChanged,
|
||||
boolean isCertificateProfileChanged,
|
||||
boolean isCertificateSanChanged,
|
||||
boolean isCertificatePublicKeyChanged,
|
||||
Duration oldCertificateTimeToExpiry,
|
||||
boolean isSecurityHeadersChanged,
|
||||
boolean isIpAddressChanged,
|
||||
boolean isSoftwareHeaderChanged
|
||||
) {
|
||||
public static SecurityInformationChange between(
|
||||
DomainSecurityRecord before, DomainAvailabilityRecord availabilityBefore,
|
||||
DomainSecurityRecord after, DomainAvailabilityRecord availabilityAfter
|
||||
) {
|
||||
boolean asnChanged = !Objects.equals(before.asn(), after.asn());
|
||||
|
||||
boolean ipChanged = 0 != Arrays.compare(availabilityBefore.serverIp(), availabilityAfter.serverIp());
|
||||
|
||||
boolean certificateFingerprintChanged = 0 != Arrays.compare(before.sslCertFingerprintSha256(), after.sslCertFingerprintSha256());
|
||||
boolean certificateProfileChanged = before.certificateProfileHash() != after.certificateProfileHash();
|
||||
boolean certificatePublicKeyChanged = 0 != Arrays.compare(before.sslCertPublicKeyHash(), after.sslCertPublicKeyHash());
|
||||
boolean certificateSanChanged = !Objects.equals(before.sslCertSan(), after.sslCertSan());
|
||||
|
||||
Duration oldCertificateTimeToExpiry = before.sslCertNotAfter() == null ? null : Duration.between(
|
||||
Instant.now(),
|
||||
before.sslCertNotAfter()
|
||||
);
|
||||
|
||||
boolean securityHeadersChanged = before.securityHeadersHash() != after.securityHeadersHash();
|
||||
|
||||
boolean softwareChanged = !Objects.equals(before.headerServer(), after.headerServer());
|
||||
|
||||
// Note we don't include IP address changes in the overall change status,
|
||||
// as this is not alone considered a change in security information; we may have
|
||||
// multiple IP addresses for a domain, and the IP address may change frequently
|
||||
// within the same ASN or certificate profile.
|
||||
|
||||
boolean isChanged = asnChanged
|
||||
|| certificateFingerprintChanged
|
||||
|| securityHeadersChanged
|
||||
|| softwareChanged;
|
||||
|
||||
return new SecurityInformationChange(
|
||||
isChanged,
|
||||
asnChanged,
|
||||
certificateFingerprintChanged,
|
||||
certificateProfileChanged,
|
||||
certificateSanChanged,
|
||||
certificatePublicKeyChanged,
|
||||
oldCertificateTimeToExpiry,
|
||||
securityHeadersChanged,
|
||||
ipChanged,
|
||||
softwareChanged
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
}
|
@@ -0,0 +1,491 @@
|
||||
package nu.marginalia.ping.ssl;
|
||||
|
||||
import javax.net.ssl.*;
|
||||
import java.io.FileInputStream;
|
||||
import java.security.InvalidAlgorithmParameterException;
|
||||
import java.security.KeyStore;
|
||||
import java.security.KeyStoreException;
|
||||
import java.security.NoSuchAlgorithmException;
|
||||
import java.security.cert.*;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Custom PKIX validator for validating X.509 certificate chains with verbose output
|
||||
* for db export (i.e. not just SSLException).
|
||||
*/
|
||||
public class CustomPKIXValidator {
|
||||
|
||||
private final Set<TrustAnchor> trustAnchors;
|
||||
private final boolean revocationEnabled;
|
||||
private final boolean anyPolicyInhibited;
|
||||
private final boolean explicitPolicyRequired;
|
||||
private final boolean policyMappingInhibited;
|
||||
private final Set<String> initialPolicies;
|
||||
|
||||
private static final Set<String> EV_POLICY_OIDS = Set.of(
|
||||
"1.3.6.1.4.1.17326.10.14.2.1.2", // Entrust
|
||||
"1.3.6.1.4.1.17326.10.8.12.1.2", // Entrust
|
||||
"2.16.840.1.114028.10.1.2", // Entrust/AffirmTrust
|
||||
"1.3.6.1.4.1.6449.1.2.1.5.1", // Comodo
|
||||
"1.3.6.1.4.1.8024.0.2.100.1.2", // QuoVadis
|
||||
"2.16.840.1.114404.1.1.2.4.1", // GoDaddy
|
||||
"2.16.840.1.114413.1.7.23.3", // DigiCert
|
||||
"2.16.840.1.114414.1.7.23.3", // DigiCert
|
||||
"1.3.6.1.4.1.14370.1.6", // GlobalSign
|
||||
"2.16.756.1.89.1.2.1.1", // SwissSign
|
||||
"1.3.6.1.4.1.4146.1.1" // GlobalSign
|
||||
);
|
||||
|
||||
// Constructor with default settings
|
||||
public CustomPKIXValidator() throws Exception {
|
||||
this(true, false, false, false, null);
|
||||
}
|
||||
|
||||
// Constructor with custom settings
|
||||
public CustomPKIXValidator(boolean revocationEnabled,
|
||||
boolean anyPolicyInhibited,
|
||||
boolean explicitPolicyRequired,
|
||||
boolean policyMappingInhibited,
|
||||
Set<String> initialPolicies) throws Exception {
|
||||
this.trustAnchors = loadDefaultTrustAnchors();
|
||||
this.revocationEnabled = revocationEnabled;
|
||||
this.anyPolicyInhibited = anyPolicyInhibited;
|
||||
this.explicitPolicyRequired = explicitPolicyRequired;
|
||||
this.policyMappingInhibited = policyMappingInhibited;
|
||||
this.initialPolicies = initialPolicies;
|
||||
}
|
||||
|
||||
// Constructor with custom trust anchors
|
||||
public CustomPKIXValidator(Set<TrustAnchor> customTrustAnchors,
|
||||
boolean revocationEnabled) {
|
||||
this.trustAnchors = new HashSet<>(customTrustAnchors);
|
||||
this.revocationEnabled = revocationEnabled;
|
||||
this.anyPolicyInhibited = false;
|
||||
this.explicitPolicyRequired = false;
|
||||
this.policyMappingInhibited = false;
|
||||
this.initialPolicies = null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Validates certificate chain using PKIX algorithm
|
||||
*/
|
||||
public PKIXValidationResult validateCertificateChain(String hostname, X509Certificate[] certChain) {
|
||||
EnumSet<PkixValidationError> errors = EnumSet.noneOf(PkixValidationError.class);
|
||||
try {
|
||||
// 1. Basic input validation
|
||||
if (certChain == null || certChain.length == 0) {
|
||||
return new PKIXValidationResult(false, "Certificate chain is empty", errors,
|
||||
null, null, null, false);
|
||||
}
|
||||
|
||||
if (hostname == null || hostname.trim().isEmpty()) {
|
||||
return new PKIXValidationResult(false, "Hostname is null or empty", errors,
|
||||
null, null, null, false);
|
||||
}
|
||||
|
||||
// 2. Create certificate path
|
||||
CertPath certPath = createCertificatePath(certChain);
|
||||
if (certPath == null) {
|
||||
return new PKIXValidationResult(false, "Failed to create certificate path", errors,
|
||||
null, null, null, false);
|
||||
}
|
||||
|
||||
// 3. Build and validate certificate path using PKIX
|
||||
PKIXCertPathValidatorResult pkixResult = performPKIXValidation(certPath, errors);
|
||||
|
||||
// 4. Validate hostname
|
||||
boolean hostnameValid = validateHostname(hostname, certChain[0], errors);
|
||||
|
||||
// 5. Extract critical extensions information
|
||||
Set<String> criticalExtensions = extractCriticalExtensions(certChain);
|
||||
|
||||
boolean overallValid = (pkixResult != null) && hostnameValid;
|
||||
String errorMessage = null;
|
||||
|
||||
if (pkixResult == null) {
|
||||
errorMessage = "PKIX path validation failed";
|
||||
} else if (!hostnameValid) {
|
||||
errorMessage = "Hostname validation failed";
|
||||
}
|
||||
|
||||
return new PKIXValidationResult(overallValid, errorMessage, errors,
|
||||
pkixResult, certPath, criticalExtensions, hostnameValid);
|
||||
|
||||
} catch (Exception e) {
|
||||
return new PKIXValidationResult(false, "Validation exception: " + e.getMessage(),
|
||||
errors, null, null, null, false);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a certificate path from the certificate chain
|
||||
*/
|
||||
private CertPath createCertificatePath(X509Certificate[] certChain) throws CertificateException {
|
||||
CertificateFactory cf = CertificateFactory.getInstance("X.509");
|
||||
List<Certificate> certList = Arrays.asList(certChain);
|
||||
return cf.generateCertPath(certList);
|
||||
}
|
||||
|
||||
/**
|
||||
* Performs PKIX validation
|
||||
*/
|
||||
private PKIXCertPathValidatorResult performPKIXValidation(CertPath certPath, Set<PkixValidationError> warnings) {
|
||||
try {
|
||||
// Create PKIX parameters
|
||||
PKIXParameters params = new PKIXParameters(trustAnchors);
|
||||
|
||||
// Configure PKIX parameters
|
||||
params.setRevocationEnabled(revocationEnabled);
|
||||
params.setAnyPolicyInhibited(anyPolicyInhibited);
|
||||
params.setExplicitPolicyRequired(explicitPolicyRequired);
|
||||
params.setPolicyMappingInhibited(policyMappingInhibited);
|
||||
|
||||
if (initialPolicies != null && !initialPolicies.isEmpty()) {
|
||||
params.setInitialPolicies(initialPolicies);
|
||||
}
|
||||
|
||||
// Set up certificate stores for intermediate certificates if needed
|
||||
// This helps with path building when intermediate certs are missing
|
||||
List<Certificate> intermediateCerts = extractIntermediateCertificates(certPath);
|
||||
if (!intermediateCerts.isEmpty()) {
|
||||
CertStore certStore = CertStore.getInstance("Collection",
|
||||
new CollectionCertStoreParameters(intermediateCerts));
|
||||
params.addCertStore(certStore);
|
||||
}
|
||||
|
||||
// Configure revocation checking if enabled
|
||||
if (revocationEnabled) {
|
||||
configureRevocationChecking(params);
|
||||
}
|
||||
|
||||
// Create and run validator
|
||||
CertPathValidator validator = CertPathValidator.getInstance("PKIX");
|
||||
PKIXCertPathValidatorResult result = (PKIXCertPathValidatorResult)
|
||||
validator.validate(certPath, params);
|
||||
|
||||
return result;
|
||||
|
||||
} catch (CertPathValidatorException e) {
|
||||
warnings.add(PkixValidationError.PATH_VALIDATION_FAILED);
|
||||
return null;
|
||||
} catch (InvalidAlgorithmParameterException e) {
|
||||
warnings.add(PkixValidationError.INVALID_PKIX_PARAMETERS);
|
||||
return null;
|
||||
} catch (Exception e) {
|
||||
warnings.add(PkixValidationError.UNKNOWN);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Extracts intermediate certificates from the path
|
||||
*/
|
||||
private List<Certificate> extractIntermediateCertificates(CertPath certPath) {
|
||||
List<Certificate> certs = (List<Certificate>) certPath.getCertificates();
|
||||
if (certs.size() <= 2) {
|
||||
return new ArrayList<>(); // Only leaf and root, no intermediates
|
||||
}
|
||||
// Return all but the first (leaf) and potentially last (root)
|
||||
return new ArrayList<>(certs.subList(1, certs.size()));
|
||||
}
|
||||
|
||||
/**
|
||||
* Configures revocation checking (CRL/OCSP)
|
||||
*/
|
||||
private void configureRevocationChecking(PKIXParameters params) throws NoSuchAlgorithmException {
|
||||
// Create PKIX revocation checker
|
||||
PKIXRevocationChecker revocationChecker = (PKIXRevocationChecker)
|
||||
CertPathValidator.getInstance("PKIX").getRevocationChecker();
|
||||
|
||||
// Configure revocation checker options
|
||||
Set<PKIXRevocationChecker.Option> options = EnumSet.of(
|
||||
PKIXRevocationChecker.Option.PREFER_CRLS,
|
||||
PKIXRevocationChecker.Option.SOFT_FAIL // Don't fail if revocation info unavailable
|
||||
);
|
||||
revocationChecker.setOptions(options);
|
||||
|
||||
params.addCertPathChecker(revocationChecker);
|
||||
}
|
||||
|
||||
/**
|
||||
* Comprehensive hostname validation including SAN and CN
|
||||
*/
|
||||
private boolean validateHostname(String hostname, X509Certificate cert, Set<PkixValidationError> warnings) {
|
||||
try {
|
||||
// Use Java's built-in hostname verifier as a starting point
|
||||
HostnameVerifier defaultVerifier = HttpsURLConnection.getDefaultHostnameVerifier();
|
||||
|
||||
// Create a mock SSL session for the hostname verifier
|
||||
MockSSLSession mockSession = new MockSSLSession(cert);
|
||||
|
||||
boolean defaultResult = defaultVerifier.verify(hostname, mockSession);
|
||||
|
||||
if (defaultResult) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// If default fails, do manual validation
|
||||
return performManualHostnameValidation(hostname, cert, warnings);
|
||||
|
||||
} catch (Exception e) {
|
||||
warnings.add(PkixValidationError.UNSPECIFIED_HOST_ERROR);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Manual hostname validation implementation
|
||||
*/
|
||||
private boolean performManualHostnameValidation(String hostname, X509Certificate cert, Set<PkixValidationError> warnings) {
|
||||
try {
|
||||
// 1. Check Subject Alternative Names (SAN) - preferred method
|
||||
Collection<List<?>> sanEntries = cert.getSubjectAlternativeNames();
|
||||
if (sanEntries != null) {
|
||||
for (List<?> sanEntry : sanEntries) {
|
||||
if (sanEntry.size() >= 2) {
|
||||
Integer type = (Integer) sanEntry.get(0);
|
||||
if (type == 2) { // DNS name
|
||||
String dnsName = (String) sanEntry.get(1);
|
||||
if (matchesHostname(hostname, dnsName)) {
|
||||
return true;
|
||||
}
|
||||
} else if (type == 7) { // IP address
|
||||
String ipAddress = (String) sanEntry.get(1);
|
||||
if (hostname.equals(ipAddress)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// If SAN is present but no match found, don't check CN (RFC 6125)
|
||||
warnings.add(PkixValidationError.SAN_MISMATCH);
|
||||
return false;
|
||||
}
|
||||
|
||||
// 2. Fallback to Common Name (CN) in subject if no SAN present
|
||||
String subjectDN = cert.getSubjectDN().getName();
|
||||
String cn = extractCommonName(subjectDN);
|
||||
if (cn != null) {
|
||||
if (matchesHostname(hostname, cn)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
warnings.add(PkixValidationError.SAN_MISMATCH);
|
||||
return false;
|
||||
|
||||
} catch (Exception e) {
|
||||
warnings.add(PkixValidationError.UNKNOWN);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if hostname matches certificate name (handles wildcards)
|
||||
*/
|
||||
private boolean matchesHostname(String hostname, String certName) {
|
||||
if (hostname == null || certName == null) {
|
||||
return false;
|
||||
}
|
||||
|
||||
hostname = hostname.toLowerCase();
|
||||
certName = certName.toLowerCase();
|
||||
|
||||
// Exact match
|
||||
if (hostname.equals(certName)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Wildcard matching (*.example.com)
|
||||
if (certName.startsWith("*.")) {
|
||||
String domain = certName.substring(2);
|
||||
|
||||
// Wildcard must match exactly one level
|
||||
if (hostname.endsWith("." + domain)) {
|
||||
String prefix = hostname.substring(0, hostname.length() - domain.length() - 1);
|
||||
// Ensure wildcard doesn't match multiple levels (no dots in prefix)
|
||||
return !prefix.contains(".");
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extracts Common Name from Subject DN
|
||||
*/
|
||||
private String extractCommonName(String subjectDN) {
|
||||
if (subjectDN == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// Parse DN components
|
||||
String[] components = subjectDN.split(",");
|
||||
for (String component : components) {
|
||||
component = component.trim();
|
||||
if (component.startsWith("CN=")) {
|
||||
return component.substring(3).trim();
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extracts critical extensions from all certificates in the chain
|
||||
*/
|
||||
private Set<String> extractCriticalExtensions(X509Certificate[] certChain) {
|
||||
Set<String> allCriticalExtensions = new HashSet<>();
|
||||
|
||||
for (X509Certificate cert : certChain) {
|
||||
Set<String> criticalExtensions = cert.getCriticalExtensionOIDs();
|
||||
if (criticalExtensions != null) {
|
||||
allCriticalExtensions.addAll(criticalExtensions);
|
||||
}
|
||||
}
|
||||
|
||||
return allCriticalExtensions;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Gets the key length from a certificate
|
||||
*/
|
||||
private int getKeyLength(X509Certificate cert) {
|
||||
try {
|
||||
java.security.PublicKey publicKey = cert.getPublicKey();
|
||||
if (publicKey instanceof java.security.interfaces.RSAPublicKey) {
|
||||
return ((java.security.interfaces.RSAPublicKey) publicKey).getModulus().bitLength();
|
||||
} else if (publicKey instanceof java.security.interfaces.DSAPublicKey) {
|
||||
return ((java.security.interfaces.DSAPublicKey) publicKey).getParams().getP().bitLength();
|
||||
} else if (publicKey instanceof java.security.interfaces.ECPublicKey) {
|
||||
return ((java.security.interfaces.ECPublicKey) publicKey).getParams().getOrder().bitLength();
|
||||
}
|
||||
} catch (Exception e) {
|
||||
// Ignore
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if signature algorithm is considered weak
|
||||
*/
|
||||
private boolean isWeakSignatureAlgorithm(String sigAlg) {
|
||||
if (sigAlg == null) return false;
|
||||
|
||||
sigAlg = sigAlg.toLowerCase();
|
||||
return sigAlg.contains("md5") ||
|
||||
sigAlg.contains("sha1") ||
|
||||
sigAlg.equals("md2withrsa") ||
|
||||
sigAlg.equals("md4withrsa");
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks for deprecated or problematic extensions
|
||||
*/
|
||||
private void checkDeprecatedExtensions(X509Certificate cert, int index, List<String> warnings) {
|
||||
// Check for Netscape extensions (deprecated)
|
||||
if (cert.getNonCriticalExtensionOIDs() != null) {
|
||||
for (String oid : cert.getNonCriticalExtensionOIDs()) {
|
||||
if (oid.startsWith("2.16.840.1.113730")) { // Netscape OID space
|
||||
warnings.add("Certificate " + index + " contains deprecated Netscape extension: " + oid);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Additional extension checks can be added here
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads default trust anchors from Java's cacerts keystore
|
||||
*/
|
||||
private Set<TrustAnchor> loadDefaultTrustAnchors() throws Exception {
|
||||
Set<TrustAnchor> trustAnchors = new HashSet<>();
|
||||
|
||||
// Try to load from default locations
|
||||
String[] keystorePaths = {
|
||||
System.getProperty("javax.net.ssl.trustStore"),
|
||||
System.getProperty("java.home") + "/lib/security/cacerts",
|
||||
System.getProperty("java.home") + "/jre/lib/security/cacerts"
|
||||
};
|
||||
|
||||
String[] keystorePasswords = {
|
||||
System.getProperty("javax.net.ssl.trustStorePassword"),
|
||||
"changeit",
|
||||
""
|
||||
};
|
||||
|
||||
for (String keystorePath : keystorePaths) {
|
||||
if (keystorePath != null) {
|
||||
for (String password : keystorePasswords) {
|
||||
try {
|
||||
KeyStore trustStore = loadKeyStore(keystorePath, password);
|
||||
if (trustStore != null) {
|
||||
trustAnchors.addAll(extractTrustAnchors(trustStore));
|
||||
if (!trustAnchors.isEmpty()) {
|
||||
return trustAnchors;
|
||||
}
|
||||
}
|
||||
} catch (Exception e) {
|
||||
// Try next combination
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback: try to get from default trust manager
|
||||
try {
|
||||
TrustManagerFactory tmf = TrustManagerFactory.getInstance(
|
||||
TrustManagerFactory.getDefaultAlgorithm());
|
||||
tmf.init((KeyStore) null);
|
||||
|
||||
for (TrustManager tm : tmf.getTrustManagers()) {
|
||||
if (tm instanceof X509TrustManager) {
|
||||
X509TrustManager x509tm = (X509TrustManager) tm;
|
||||
for (X509Certificate cert : x509tm.getAcceptedIssuers()) {
|
||||
trustAnchors.add(new TrustAnchor(cert, null));
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (Exception e) {
|
||||
throw new Exception("Failed to load any trust anchors", e);
|
||||
}
|
||||
|
||||
if (trustAnchors.isEmpty()) {
|
||||
throw new Exception("No trust anchors could be loaded");
|
||||
}
|
||||
|
||||
return trustAnchors;
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads a keystore from file
|
||||
*/
|
||||
private KeyStore loadKeyStore(String keystorePath, String password) throws Exception {
|
||||
KeyStore keystore = KeyStore.getInstance(KeyStore.getDefaultType());
|
||||
try (FileInputStream fis = new FileInputStream(keystorePath)) {
|
||||
keystore.load(fis, password != null ? password.toCharArray() : null);
|
||||
return keystore;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Extracts trust anchors from a keystore
|
||||
*/
|
||||
private Set<TrustAnchor> extractTrustAnchors(KeyStore trustStore) throws KeyStoreException {
|
||||
Set<TrustAnchor> trustAnchors = new HashSet<>();
|
||||
|
||||
Enumeration<String> aliases = trustStore.aliases();
|
||||
while (aliases.hasMoreElements()) {
|
||||
String alias = aliases.nextElement();
|
||||
if (trustStore.isCertificateEntry(alias)) {
|
||||
Certificate cert = trustStore.getCertificate(alias);
|
||||
if (cert instanceof X509Certificate) {
|
||||
trustAnchors.add(new TrustAnchor((X509Certificate) cert, null));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return trustAnchors;
|
||||
}
|
||||
|
||||
}
|
@@ -0,0 +1,116 @@
|
||||
package nu.marginalia.ping.ssl;
|
||||
|
||||
import javax.net.ssl.SSLPeerUnverifiedException;
|
||||
import javax.net.ssl.SSLSession;
|
||||
import javax.net.ssl.SSLSessionContext;
|
||||
import java.security.cert.Certificate;
|
||||
import java.security.cert.X509Certificate;
|
||||
|
||||
/**
|
||||
* Mock SSL session for hostname verification
|
||||
*/
|
||||
public class MockSSLSession implements SSLSession {
|
||||
private final X509Certificate[] peerCertificates;
|
||||
|
||||
public MockSSLSession(X509Certificate cert) {
|
||||
this.peerCertificates = new X509Certificate[]{cert};
|
||||
}
|
||||
|
||||
@Override
|
||||
public Certificate[] getPeerCertificates() throws SSLPeerUnverifiedException {
|
||||
return peerCertificates;
|
||||
}
|
||||
|
||||
// All other methods return default/empty values as they're not used by hostname verification
|
||||
@Override
|
||||
public byte[] getId() {
|
||||
return new byte[0];
|
||||
}
|
||||
|
||||
@Override
|
||||
public SSLSessionContext getSessionContext() {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getCreationTime() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getLastAccessedTime() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void invalidate() {
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isValid() {
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void putValue(String name, Object value) {
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object getValue(String name) {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void removeValue(String name) {
|
||||
}
|
||||
|
||||
@Override
|
||||
public String[] getValueNames() {
|
||||
return new String[0];
|
||||
}
|
||||
|
||||
@Override
|
||||
public java.security.Principal getPeerPrincipal() throws SSLPeerUnverifiedException {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public java.security.Principal getLocalPrincipal() {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getCipherSuite() {
|
||||
return "";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getProtocol() {
|
||||
return "";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getPeerHost() {
|
||||
return "";
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getPeerPort() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getPacketBufferSize() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getApplicationBufferSize() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Certificate[] getLocalCertificates() {
|
||||
return new Certificate[0];
|
||||
}
|
||||
}
|
@@ -0,0 +1,14 @@
|
||||
package nu.marginalia.ping.ssl;
|
||||
|
||||
import java.security.cert.CertPath;
|
||||
import java.security.cert.PKIXCertPathValidatorResult;
|
||||
import java.util.Set;
|
||||
|
||||
public record PKIXValidationResult(boolean isValid, String errorMessage,
|
||||
Set<PkixValidationError> errors,
|
||||
PKIXCertPathValidatorResult pkixResult,
|
||||
CertPath validatedPath,
|
||||
Set<String> criticalExtensions,
|
||||
boolean hostnameValid)
|
||||
{
|
||||
}
|
@@ -0,0 +1,11 @@
|
||||
package nu.marginalia.ping.ssl;
|
||||
|
||||
public enum PkixValidationError {
|
||||
SAN_MISMATCH,
|
||||
EXPIRED,
|
||||
NOT_YET_VALID,
|
||||
PATH_VALIDATION_FAILED,
|
||||
INVALID_PKIX_PARAMETERS,
|
||||
UNKNOWN,
|
||||
UNSPECIFIED_HOST_ERROR;
|
||||
}
|
@@ -0,0 +1,74 @@
|
||||
package nu.marginalia.ping.svc;
|
||||
|
||||
import com.google.inject.Inject;
|
||||
import nu.marginalia.ping.fetcher.PingDnsFetcher;
|
||||
import nu.marginalia.ping.model.DomainDnsRecord;
|
||||
import nu.marginalia.ping.model.WritableModel;
|
||||
import nu.marginalia.ping.model.comparison.DnsRecordChange;
|
||||
import nu.marginalia.ping.model.comparison.DomainDnsEvent;
|
||||
import nu.marginalia.ping.util.JsonObject;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import javax.annotation.Nullable;
|
||||
import java.sql.SQLException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
public class DnsPingService {
|
||||
private final PingDnsFetcher pingDnsFetcher;
|
||||
private final DomainDnsInformationFactory domainDnsInformationFactory;
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(DnsPingService.class);
|
||||
|
||||
@Inject
|
||||
public DnsPingService(PingDnsFetcher pingDnsFetcher,
|
||||
DomainDnsInformationFactory domainDnsInformationFactory)
|
||||
{
|
||||
this.pingDnsFetcher = pingDnsFetcher;
|
||||
this.domainDnsInformationFactory = domainDnsInformationFactory;
|
||||
}
|
||||
|
||||
public List<WritableModel> pingDomain(String rootDomain, @Nullable DomainDnsRecord oldRecord) throws SQLException {
|
||||
var digResult = pingDnsFetcher.dig(rootDomain);
|
||||
|
||||
List<WritableModel> generatedRecords = new ArrayList<>();
|
||||
|
||||
var newRecord = domainDnsInformationFactory.create(
|
||||
rootDomain,
|
||||
oldRecord != null ? oldRecord.dnsRootDomainId() : null,
|
||||
digResult
|
||||
);
|
||||
|
||||
generatedRecords.add(newRecord);
|
||||
|
||||
// If we have an old record, compare it with the new one and optionally generate a DomainDnsEvent
|
||||
if (oldRecord != null) {
|
||||
var changes = DnsRecordChange.between(newRecord, oldRecord);
|
||||
switch (changes) {
|
||||
case DnsRecordChange.None _ -> {}
|
||||
case DnsRecordChange.Changed changed -> {
|
||||
logger.info("DNS record for {} changed: {}", newRecord.dnsRootDomainId(), changed);
|
||||
generatedRecords.add(DomainDnsEvent.builder()
|
||||
.rootDomainId(newRecord.dnsRootDomainId())
|
||||
.nodeId(newRecord.nodeAffinity())
|
||||
.tsChange(newRecord.tsLastUpdate())
|
||||
.changeARecords(changed.aRecordsChanged())
|
||||
.changeAaaaRecords(changed.aaaaRecordsChanged())
|
||||
.changeCname(changed.cnameRecordChanged())
|
||||
.changeMxRecords(changed.mxRecordsChanged())
|
||||
.changeCaaRecords(changed.caaRecordsChanged())
|
||||
.changeTxtRecords(changed.txtRecordsChanged())
|
||||
.changeNsRecords(changed.nsRecordsChanged())
|
||||
.changeSoaRecord(changed.soaRecordChanged())
|
||||
.dnsSignatureBefore(new JsonObject<>(oldRecord))
|
||||
.dnsSignatureAfter(new JsonObject<>(newRecord))
|
||||
.build());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return generatedRecords;
|
||||
}
|
||||
|
||||
}
|
@@ -0,0 +1,185 @@
|
||||
package nu.marginalia.ping.svc;
|
||||
|
||||
import com.google.inject.Inject;
|
||||
import nu.marginalia.geoip.GeoIpDictionary;
|
||||
import nu.marginalia.geoip.sources.AsnTable;
|
||||
import nu.marginalia.ping.BackoffStrategy;
|
||||
import nu.marginalia.ping.fetcher.response.HttpResponse;
|
||||
import nu.marginalia.ping.fetcher.response.HttpsResponse;
|
||||
import nu.marginalia.ping.model.DomainAvailabilityRecord;
|
||||
import nu.marginalia.ping.model.ErrorClassification;
|
||||
import nu.marginalia.ping.model.HttpSchema;
|
||||
import nu.marginalia.ping.ssl.PKIXValidationResult;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import javax.annotation.Nullable;
|
||||
import java.net.InetAddress;
|
||||
import java.security.cert.X509Certificate;
|
||||
import java.time.Duration;
|
||||
import java.time.Instant;
|
||||
import java.time.temporal.ChronoUnit;
|
||||
|
||||
public class DomainAvailabilityInformationFactory {
|
||||
private static final Logger logger = LoggerFactory.getLogger(DomainAvailabilityInformationFactory.class);
|
||||
|
||||
private final GeoIpDictionary geoIpDictionary;
|
||||
private final BackoffStrategy backoffStrategy;
|
||||
|
||||
@Inject
|
||||
public DomainAvailabilityInformationFactory(GeoIpDictionary geoIpDictionary,
|
||||
BackoffStrategy backoffStrategy) {
|
||||
this.geoIpDictionary = geoIpDictionary;
|
||||
this.backoffStrategy = backoffStrategy;
|
||||
}
|
||||
|
||||
|
||||
public DomainAvailabilityRecord createError(int domainId,
|
||||
int nodeId,
|
||||
@Nullable DomainAvailabilityRecord previousRecord,
|
||||
ErrorClassification errorClassification,
|
||||
@Nullable String errorMessage
|
||||
) {
|
||||
|
||||
Duration currentRefreshInterval = previousRecord != null ? previousRecord.backoffFetchInterval() : null;
|
||||
int errorCount = previousRecord != null ? previousRecord.backoffConsecutiveFailures() : 0;
|
||||
Instant lastAvailable = previousRecord != null ? previousRecord.tsLastAvailable() : null;
|
||||
|
||||
Duration refreshInterval = backoffStrategy.getUpdateTime(
|
||||
currentRefreshInterval,
|
||||
errorClassification,
|
||||
errorCount);
|
||||
|
||||
return DomainAvailabilityRecord.builder()
|
||||
.domainId(domainId)
|
||||
.nodeId(nodeId)
|
||||
.serverAvailable(false)
|
||||
.errorClassification(errorClassification)
|
||||
.errorMessage(errorMessage)
|
||||
.tsLastAvailable(lastAvailable)
|
||||
.tsLastPing(Instant.now())
|
||||
.tsLastError(Instant.now())
|
||||
.nextScheduledUpdate(Instant.now().plus(refreshInterval))
|
||||
.backoffFetchInterval(refreshInterval)
|
||||
.backoffConsecutiveFailures(errorCount+1)
|
||||
.build();
|
||||
}
|
||||
|
||||
public DomainAvailabilityRecord createHttpResponse(int domainId,
|
||||
int nodeId,
|
||||
@Nullable InetAddress address,
|
||||
@Nullable DomainAvailabilityRecord previousRecord,
|
||||
HttpResponse rsp) {
|
||||
|
||||
Instant lastError = previousRecord != null ? previousRecord.tsLastAvailable() : null;
|
||||
|
||||
return DomainAvailabilityRecord.builder()
|
||||
.domainId(domainId)
|
||||
.nodeId(nodeId)
|
||||
.serverAvailable(true)
|
||||
.serverIp(address != null ? address.getAddress() : null)
|
||||
.serverIpAsn(getAsn(address))
|
||||
.httpSchema(HttpSchema.HTTP)
|
||||
.httpStatus(rsp.httpStatus())
|
||||
.httpResponseTime(rsp.httpResponseTime())
|
||||
.httpEtag(rsp.headers().getFirst("ETag"))
|
||||
.httpLastModified(rsp.headers().getFirst("Last-Modified"))
|
||||
.tsLastPing(Instant.now())
|
||||
.tsLastAvailable(Instant.now())
|
||||
.tsLastError(lastError)
|
||||
.nextScheduledUpdate(Instant.now().plus(backoffStrategy.getOkInterval()))
|
||||
.backoffFetchInterval(backoffStrategy.getOkInterval())
|
||||
.build();
|
||||
|
||||
}
|
||||
|
||||
private Integer getAsn(@Nullable InetAddress address) {
|
||||
if (address == null) {
|
||||
return null;
|
||||
}
|
||||
// Placeholder for ASN lookup logic
|
||||
return geoIpDictionary.getAsnInfo(address).map(AsnTable.AsnInfo::asn).orElse(null);
|
||||
}
|
||||
|
||||
|
||||
public DomainAvailabilityRecord createHttpsResponse(int domainId,
|
||||
int nodeId,
|
||||
@Nullable InetAddress address,
|
||||
@Nullable DomainAvailabilityRecord previousRecord,
|
||||
PKIXValidationResult validationResult,
|
||||
HttpsResponse rsp) {
|
||||
Instant updateTime;
|
||||
|
||||
if (validationResult.isValid()) {
|
||||
updateTime = sslCertInformedUpdateTime((X509Certificate[]) rsp.sslCertificates());
|
||||
}
|
||||
else {
|
||||
updateTime = Instant.now().plus(backoffStrategy.getOkInterval());
|
||||
}
|
||||
|
||||
Instant lastError = previousRecord != null ? previousRecord.tsLastAvailable() : null;
|
||||
|
||||
return DomainAvailabilityRecord.builder()
|
||||
.domainId(domainId)
|
||||
.nodeId(nodeId)
|
||||
.serverAvailable(validationResult.isValid())
|
||||
.serverIp(address != null ? address.getAddress() : null)
|
||||
.serverIpAsn(getAsn(address))
|
||||
.httpSchema(HttpSchema.HTTPS)
|
||||
.httpStatus(rsp.httpStatus())
|
||||
.errorClassification(!validationResult.isValid() ? ErrorClassification.SSL_ERROR : ErrorClassification.NONE)
|
||||
.httpResponseTime(rsp.httpResponseTime()) // Placeholder, actual timing not implemented
|
||||
.httpEtag(rsp.headers().getFirst("ETag"))
|
||||
.httpLastModified(rsp.headers().getFirst("Last-Modified"))
|
||||
.tsLastPing(Instant.now())
|
||||
.tsLastError(lastError)
|
||||
.tsLastAvailable(Instant.now())
|
||||
.nextScheduledUpdate(updateTime)
|
||||
.backoffFetchInterval(backoffStrategy.getOkInterval())
|
||||
.build();
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculates the next update time for a domain based on the SSL certificate's expiry date.
|
||||
* If the certificate is valid for more than 5 days, it will check 3 days before expiry.
|
||||
* If it expires in less than 5 days, it will check just after expiry.
|
||||
*
|
||||
* @param certificates The SSL certificates associated with the domain.
|
||||
* @return The next update time as an Instant.
|
||||
*/
|
||||
private Instant sslCertInformedUpdateTime(X509Certificate[] certificates) {
|
||||
Instant now = Instant.now();
|
||||
Instant normalUpdateTime = now.plus(backoffStrategy.getOkInterval());
|
||||
|
||||
if (certificates == null || certificates.length == 0) {
|
||||
return normalUpdateTime;
|
||||
}
|
||||
|
||||
try {
|
||||
X509Certificate cert = certificates[0];
|
||||
|
||||
// Use the first certificate's notAfter date as the update time
|
||||
Instant certExpiry = certificates[0].getNotAfter().toInstant();
|
||||
|
||||
// If the certificate expires in less than 3 days, we'll check just after expiry
|
||||
if (Duration.between(Instant.now(), cert.getNotAfter().toInstant()).toDays() < 3) {
|
||||
return minDate(normalUpdateTime, certExpiry.plus(3, ChronoUnit.MINUTES));
|
||||
}
|
||||
else {
|
||||
// If the certificate is valid for more than 3 days, we'll check 3 days before expiry for renewal
|
||||
return minDate(normalUpdateTime, certExpiry.minus(3, ChronoUnit.DAYS));
|
||||
}
|
||||
|
||||
} catch (Exception e) {
|
||||
logger.warn("Failed to get certificate expiry date: {}", e.getMessage());
|
||||
}
|
||||
|
||||
return normalUpdateTime;
|
||||
}
|
||||
|
||||
private Instant minDate(Instant a, Instant b) {
|
||||
return a.isBefore(b) ? a : b;
|
||||
}
|
||||
|
||||
|
||||
}
|
@@ -0,0 +1,71 @@
|
||||
package nu.marginalia.ping.svc;
|
||||
|
||||
import com.google.inject.Inject;
|
||||
import nu.marginalia.ping.PingIntervalsConfiguration;
|
||||
import nu.marginalia.ping.model.DomainDnsRecord;
|
||||
import nu.marginalia.ping.model.SingleDnsRecord;
|
||||
import nu.marginalia.process.ProcessConfiguration;
|
||||
|
||||
import javax.annotation.Nullable;
|
||||
import java.time.Duration;
|
||||
import java.time.Instant;
|
||||
import java.util.List;
|
||||
|
||||
public class DomainDnsInformationFactory {
|
||||
|
||||
private final Duration defaultDnsUpdateInterval;
|
||||
private final int nodeId;
|
||||
|
||||
@Inject
|
||||
public DomainDnsInformationFactory(ProcessConfiguration processConfiguration,
|
||||
PingIntervalsConfiguration pingIntervalsConfiguration) {
|
||||
this.nodeId = processConfiguration.node();
|
||||
this.defaultDnsUpdateInterval = pingIntervalsConfiguration.dnsUpdateInterval();
|
||||
}
|
||||
|
||||
public DomainDnsRecord create(String rootDomain,
|
||||
@Nullable Integer rootDomainId,
|
||||
List<SingleDnsRecord> records)
|
||||
{
|
||||
var builder = DomainDnsRecord.builder()
|
||||
.rootDomainName(rootDomain)
|
||||
.dnsRootDomainId(rootDomainId)
|
||||
.tsLastUpdate(Instant.now())
|
||||
.nodeAffinity(nodeId)
|
||||
.tsNextScheduledUpdate(Instant.now().plus(defaultDnsUpdateInterval));
|
||||
|
||||
for (var record : records) {
|
||||
switch (record.recordType().toLowerCase()) {
|
||||
case "a":
|
||||
builder.addARecord(record.data());
|
||||
break;
|
||||
case "aaaa":
|
||||
builder.addAaaaRecord(record.data());
|
||||
break;
|
||||
case "cname":
|
||||
builder.cnameRecord(record.data());
|
||||
break;
|
||||
case "mx":
|
||||
builder.addMxRecord(record.data());
|
||||
break;
|
||||
case "caa":
|
||||
builder.addCaaRecord(record.data());
|
||||
break;
|
||||
case "txt":
|
||||
builder.addTxtRecord(record.data());
|
||||
break;
|
||||
case "ns":
|
||||
builder.addNsRecord(record.data());
|
||||
break;
|
||||
case "soa":
|
||||
builder.soaRecord(record.data());
|
||||
break;
|
||||
default:
|
||||
// Ignore unknown record types
|
||||
}
|
||||
}
|
||||
|
||||
return builder.build();
|
||||
}
|
||||
|
||||
}
|
@@ -0,0 +1,190 @@
|
||||
package nu.marginalia.ping.svc;
|
||||
|
||||
import nu.marginalia.ping.fetcher.response.HttpResponse;
|
||||
import nu.marginalia.ping.fetcher.response.HttpsResponse;
|
||||
import nu.marginalia.ping.model.DomainSecurityRecord;
|
||||
import nu.marginalia.ping.model.HttpSchema;
|
||||
import nu.marginalia.ping.ssl.PKIXValidationResult;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.security.MessageDigest;
|
||||
import java.security.NoSuchAlgorithmException;
|
||||
import java.security.cert.CertificateEncodingException;
|
||||
import java.security.cert.X509Certificate;
|
||||
import java.time.Instant;
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
import java.util.StringJoiner;
|
||||
|
||||
public class DomainSecurityInformationFactory {
|
||||
private static final Logger logger = LoggerFactory.getLogger(DomainSecurityInformationFactory.class);
|
||||
|
||||
// Vanilla HTTP (not HTTPS) response does not have SSL session information, so we return null
|
||||
public DomainSecurityRecord createHttpSecurityInformation(HttpResponse httpResponse, int domainId, int nodeId) {
|
||||
|
||||
var headers = httpResponse.headers();
|
||||
|
||||
return DomainSecurityRecord.builder()
|
||||
.domainId(domainId)
|
||||
.nodeId(nodeId)
|
||||
.httpSchema(HttpSchema.HTTP)
|
||||
.httpVersion(httpResponse.version())
|
||||
.headerServer(headers.getFirst("Server"))
|
||||
.headerCorsAllowOrigin(headers.getFirst("Access-Control-Allow-Origin"))
|
||||
.headerCorsAllowCredentials("true".equals(headers.getFirst("Access-Control-Allow-Credentials")))
|
||||
.headerXContentTypeOptions(headers.getFirst("X-Content-Type-Options"))
|
||||
.headerXFrameOptions(headers.getFirst("X-Frame-Options"))
|
||||
.headerXXssProtection(headers.getFirst("X-XSS-Protection"))
|
||||
.headerReferrerPolicy(headers.getFirst("Referrer-Policy"))
|
||||
.headerStrictTransportSecurity(headers.getFirst("Strict-Transport-Security"))
|
||||
// .headerContentSecurityPolicy(headers.getFirst("Content-Security-Policy").getBytes())
|
||||
.httpCompression(headers.getFirst("Content-Encoding"))
|
||||
.httpCacheControl(headers.getFirst("Cache-Control"))
|
||||
.headerXPoweredBy(headers.getFirst("X-Powered-By"))
|
||||
.tsLastUpdate(Instant.now())
|
||||
.build();
|
||||
}
|
||||
|
||||
// HTTPS response
|
||||
public DomainSecurityRecord createHttpsSecurityInformation(HttpsResponse httpResponse, PKIXValidationResult validationResult, int domainId, int nodeId) {
|
||||
|
||||
|
||||
var headers = httpResponse.headers();
|
||||
var metadata = httpResponse.sslMetadata();
|
||||
var sslCertificates = (X509Certificate[]) httpResponse.sslCertificates();
|
||||
|
||||
StringJoiner san = new StringJoiner(", ");
|
||||
boolean isWildcard = false;
|
||||
try {
|
||||
if (sslCertificates != null && sslCertificates.length > 0) {
|
||||
for (var sanEntry : sslCertificates[0].getSubjectAlternativeNames()) {
|
||||
|
||||
|
||||
if (sanEntry != null && sanEntry.size() >= 2) {
|
||||
// Check if the SAN entry is a DNS or IP address
|
||||
int type = (Integer) sanEntry.get(0);
|
||||
String value = (String) sanEntry.get(1);
|
||||
if (type == 2 || type == 7) { // DNS or IP SAN
|
||||
san.add(value);
|
||||
}
|
||||
if (type == 2 && value.startsWith("*.")) {
|
||||
isWildcard = true;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (Exception e) {
|
||||
logger.warn("Failed to get SAN from certificate: {}", e.getMessage());
|
||||
}
|
||||
|
||||
|
||||
String keyExchange = getKeyExchange(sslCertificates[0]);
|
||||
|
||||
return DomainSecurityRecord.builder()
|
||||
.domainId(domainId)
|
||||
.nodeId(nodeId)
|
||||
.httpSchema(HttpSchema.HTTPS)
|
||||
.headerServer(headers.getFirst("Server"))
|
||||
.headerCorsAllowOrigin(headers.getFirst("Access-Control-Allow-Origin"))
|
||||
.headerCorsAllowCredentials("true".equals(headers.getFirst("Access-Control-Allow-Credentials")))
|
||||
.headerXContentTypeOptions(headers.getFirst("X-Content-Type-Options"))
|
||||
.headerXFrameOptions(headers.getFirst("X-Frame-Options"))
|
||||
.headerXXssProtection(headers.getFirst("X-XSS-Protection"))
|
||||
.headerReferrerPolicy(headers.getFirst("Referrer-Policy"))
|
||||
.headerStrictTransportSecurity(headers.getFirst("Strict-Transport-Security"))
|
||||
// .headerContentSecurityPolicy(headers.getFirst("Content-Security-Policy").getBytes())
|
||||
.httpCompression(headers.getFirst("Content-Encoding"))
|
||||
.httpCacheControl(headers.getFirst("Cache-Control"))
|
||||
.headerXPoweredBy(headers.getFirst("X-Powered-By"))
|
||||
.sslProtocol(metadata.protocol())
|
||||
.sslCipherSuite(metadata.cipherSuite())
|
||||
.sslKeyExchange(keyExchange)
|
||||
.sslCertNotBefore(sslCertificates[0].getNotBefore().toInstant())
|
||||
.sslCertNotAfter(sslCertificates[0].getNotAfter().toInstant())
|
||||
.sslCertIssuer(sslCertificates[0].getIssuerX500Principal().getName())
|
||||
.sslCertSubject(sslCertificates[0].getSubjectX500Principal().getName())
|
||||
.sslCertSerialNumber(sslCertificates[0].getSerialNumber().toString())
|
||||
.sslCertFingerprintSha256(getFingerprint(sslCertificates[0]))
|
||||
.sslCertPublicKeyHash(getPublicKeyHash(sslCertificates[0]))
|
||||
.sslCertSan(san.length() > 0 ? san.toString() : null)
|
||||
.sslCertWildcard(isWildcard)
|
||||
.sslCertificateChainLength(sslCertificates.length)
|
||||
.sslCertificateValid(validationResult.isValid())
|
||||
.httpVersion(httpResponse.version())
|
||||
.tsLastUpdate(Instant.now())
|
||||
.build();
|
||||
}
|
||||
|
||||
private byte[] getPublicKeyHash(X509Certificate sslCertificate) {
|
||||
try {
|
||||
MessageDigest md = MessageDigest.getInstance("SHA-256");
|
||||
return md.digest(sslCertificate.getPublicKey().getEncoded());
|
||||
}
|
||||
catch (NoSuchAlgorithmException e) {
|
||||
logger.warn("Failed to calculate public key hash: {}", e.getMessage());
|
||||
return new byte[0]; // Re-throw to handle it upstream
|
||||
}
|
||||
}
|
||||
|
||||
private byte[] getFingerprint(X509Certificate sslCertificate) {
|
||||
try {
|
||||
MessageDigest md = MessageDigest.getInstance("SHA-256");
|
||||
return md.digest(sslCertificate.getEncoded());
|
||||
}
|
||||
catch (NoSuchAlgorithmException | CertificateEncodingException e) {
|
||||
logger.warn("Failed to calculate certificate fingerprint: {}", e.getMessage());
|
||||
return new byte[0]; // Re-throw to handle it upstream
|
||||
}
|
||||
}
|
||||
|
||||
private String getKeyExchange(X509Certificate cert) {
|
||||
StringJoiner keyExchanges = new StringJoiner(", ");
|
||||
Set<String> keyUsages = getKeyUsage(cert);
|
||||
String algorithm = cert.getPublicKey().getAlgorithm();
|
||||
|
||||
boolean supportsPFS = false; // Perfect Forward Secrecy
|
||||
if ("RSA".equals(algorithm)) {
|
||||
if (keyUsages.contains("keyEncipherment")) {
|
||||
keyExchanges.add("RSA");
|
||||
}
|
||||
if (keyUsages.contains("digitalSignature")) {
|
||||
keyExchanges.add("DHE_RSA");
|
||||
keyExchanges.add("ECDHE_RSA");
|
||||
}
|
||||
} else if ("EC".equals(algorithm)) {
|
||||
if (keyUsages.contains("digitalSignature")) {
|
||||
keyExchanges.add("ECDHE_ECDSA");
|
||||
}
|
||||
if (keyUsages.contains("keyAgreement")) {
|
||||
keyExchanges.add("ECDH_ECDSA");
|
||||
}
|
||||
}
|
||||
|
||||
return keyExchanges.toString();
|
||||
}
|
||||
|
||||
public static Set<String> getKeyUsage(X509Certificate cert) {
|
||||
boolean[] keyUsage = cert.getKeyUsage();
|
||||
Set<String> usages = new HashSet<>();
|
||||
|
||||
if (keyUsage != null) {
|
||||
String[] names = {
|
||||
"digitalSignature", "nonRepudiation", "keyEncipherment",
|
||||
"dataEncipherment", "keyAgreement", "keyCertSign",
|
||||
"cRLSign", "encipherOnly", "decipherOnly"
|
||||
};
|
||||
|
||||
for (int i = 0; i < keyUsage.length && i < names.length; i++) {
|
||||
if (keyUsage[i]) {
|
||||
usages.add(names[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return usages;
|
||||
}
|
||||
|
||||
}
|
@@ -0,0 +1,280 @@
|
||||
package nu.marginalia.ping.svc;
|
||||
|
||||
import com.google.inject.Inject;
|
||||
import com.google.inject.Singleton;
|
||||
import nu.marginalia.ping.fetcher.PingHttpFetcher;
|
||||
import nu.marginalia.ping.fetcher.response.*;
|
||||
import nu.marginalia.ping.model.*;
|
||||
import nu.marginalia.ping.model.comparison.DomainAvailabilityChange;
|
||||
import nu.marginalia.ping.model.comparison.SecurityInformationChange;
|
||||
import nu.marginalia.ping.ssl.CustomPKIXValidator;
|
||||
import nu.marginalia.ping.ssl.PKIXValidationResult;
|
||||
import nu.marginalia.ping.util.JsonObject;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import javax.annotation.Nullable;
|
||||
import java.net.InetAddress;
|
||||
import java.net.UnknownHostException;
|
||||
import java.security.cert.X509Certificate;
|
||||
import java.sql.SQLException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
@Singleton
|
||||
public class HttpPingService {
|
||||
|
||||
private final PingHttpFetcher pingHttpFetcher;
|
||||
|
||||
private final DomainAvailabilityInformationFactory domainAvailabilityInformationFactory;
|
||||
private final DomainSecurityInformationFactory domainSecurityInformationFactory;
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(HttpPingService.class);
|
||||
CustomPKIXValidator validator;
|
||||
|
||||
@Inject
|
||||
public HttpPingService(
|
||||
PingHttpFetcher pingHttpFetcher,
|
||||
DomainAvailabilityInformationFactory domainAvailabilityInformationFactory,
|
||||
DomainSecurityInformationFactory domainSecurityInformationFactory) throws Exception {
|
||||
this.pingHttpFetcher = pingHttpFetcher;
|
||||
this.domainAvailabilityInformationFactory = domainAvailabilityInformationFactory;
|
||||
this.domainSecurityInformationFactory = domainSecurityInformationFactory;
|
||||
this.validator = new CustomPKIXValidator();
|
||||
}
|
||||
|
||||
private int compareInetAddresses(InetAddress a, InetAddress b) {
|
||||
byte[] aBytes = a.getAddress();
|
||||
byte[] bBytes = b.getAddress();
|
||||
|
||||
int diff1 = Integer.compare(aBytes.length, bBytes.length);
|
||||
if (diff1 != 0) {
|
||||
return diff1;
|
||||
}
|
||||
|
||||
return Arrays.compare(aBytes, bBytes);
|
||||
}
|
||||
|
||||
public List<WritableModel> pingDomain(DomainReference domainReference,
|
||||
@Nullable DomainAvailabilityRecord oldPingStatus,
|
||||
@Nullable DomainSecurityRecord oldSecurityInformation) throws SQLException {
|
||||
// First we figure out if the domain maps to an IP address
|
||||
|
||||
List<WritableModel> generatedRecords = new ArrayList<>();
|
||||
|
||||
List<InetAddress> ipAddress = getIpAddress(domainReference.domainName());
|
||||
PingRequestResponse result;
|
||||
|
||||
if (ipAddress.isEmpty()) {
|
||||
result = new UnknownHostError();
|
||||
}
|
||||
else {
|
||||
String url = "https://" + domainReference.domainName() + "/";
|
||||
String alternateUrl = "http://" + domainReference.domainName() + "/";
|
||||
|
||||
result = pingHttpFetcher.fetchUrl(url, Method.HEAD, null, null);
|
||||
|
||||
if (result instanceof HttpsResponse response && response.httpStatus() == 405) {
|
||||
// If we get a 405, we try the GET method instead as not all servers support HEAD requests
|
||||
result = pingHttpFetcher.fetchUrl(url, Method.GET, null, null);
|
||||
}
|
||||
else if (result instanceof ConnectionError) {
|
||||
var result2 = pingHttpFetcher.fetchUrl(alternateUrl, Method.HEAD, null, null);
|
||||
if (!(result2 instanceof ConnectionError)) {
|
||||
result = result2;
|
||||
}
|
||||
if (result instanceof HttpResponse response && response.httpStatus() == 405) {
|
||||
// If we get a 405, we try the GET method instead as not all servers support HEAD requests
|
||||
result = pingHttpFetcher.fetchUrl(alternateUrl, Method.GET, null, null);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// For a consistent picture, we always use the "binary-smallest" IP IPv4 address returned by InetAddress.getAllByName,
|
||||
// for resolving ASN and similar information.
|
||||
final InetAddress lowestIpAddress = ipAddress.stream().min(this::compareInetAddresses).orElse(null);
|
||||
|
||||
|
||||
final DomainAvailabilityRecord newPingStatus;
|
||||
final DomainSecurityRecord newSecurityInformation;
|
||||
|
||||
switch (result) {
|
||||
case UnknownHostError rsp -> {
|
||||
newPingStatus = domainAvailabilityInformationFactory.createError(
|
||||
domainReference.domainId(),
|
||||
domainReference.nodeId(),
|
||||
oldPingStatus,
|
||||
ErrorClassification.DNS_ERROR,
|
||||
null);
|
||||
newSecurityInformation = null;
|
||||
}
|
||||
case ConnectionError rsp -> {
|
||||
newPingStatus = domainAvailabilityInformationFactory.createError(
|
||||
domainReference.domainId(),
|
||||
domainReference.nodeId(),
|
||||
oldPingStatus,
|
||||
ErrorClassification.CONNECTION_ERROR,
|
||||
null);
|
||||
newSecurityInformation = null;
|
||||
}
|
||||
case TimeoutResponse rsp -> {
|
||||
newPingStatus = domainAvailabilityInformationFactory.createError(
|
||||
domainReference.domainId(),
|
||||
domainReference.nodeId(),
|
||||
oldPingStatus,
|
||||
ErrorClassification.TIMEOUT,
|
||||
null);
|
||||
newSecurityInformation = null;
|
||||
}
|
||||
case ProtocolError rsp -> {
|
||||
newPingStatus = domainAvailabilityInformationFactory.createError(
|
||||
domainReference.domainId(),
|
||||
domainReference.nodeId(),
|
||||
oldPingStatus,
|
||||
ErrorClassification.HTTP_CLIENT_ERROR,
|
||||
null);
|
||||
newSecurityInformation = null;
|
||||
}
|
||||
case HttpResponse httpResponse -> {
|
||||
newPingStatus = domainAvailabilityInformationFactory.createHttpResponse(
|
||||
domainReference.domainId(),
|
||||
domainReference.nodeId(),
|
||||
lowestIpAddress,
|
||||
oldPingStatus,
|
||||
httpResponse);
|
||||
|
||||
newSecurityInformation = domainSecurityInformationFactory.createHttpSecurityInformation(
|
||||
httpResponse,
|
||||
domainReference.domainId(),
|
||||
domainReference.nodeId()
|
||||
);
|
||||
}
|
||||
case HttpsResponse httpsResponse -> {
|
||||
PKIXValidationResult validationResult = validator.validateCertificateChain(domainReference.domainName(), (X509Certificate[]) httpsResponse.sslCertificates());
|
||||
|
||||
newPingStatus = domainAvailabilityInformationFactory.createHttpsResponse(
|
||||
domainReference.domainId(),
|
||||
domainReference.nodeId(),
|
||||
lowestIpAddress,
|
||||
oldPingStatus,
|
||||
validationResult,
|
||||
httpsResponse);
|
||||
|
||||
newSecurityInformation = domainSecurityInformationFactory.createHttpsSecurityInformation(
|
||||
httpsResponse,
|
||||
validationResult,
|
||||
domainReference.domainId(),
|
||||
domainReference.nodeId()
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// We always write the new ping status, even if it is the same as the old one.
|
||||
generatedRecords.add(newPingStatus);
|
||||
|
||||
if (newSecurityInformation != null) {
|
||||
generatedRecords.add(newSecurityInformation);
|
||||
}
|
||||
|
||||
if (oldPingStatus != null && newPingStatus != null) {
|
||||
comparePingStatuses(generatedRecords, oldPingStatus, newPingStatus);
|
||||
}
|
||||
if (oldSecurityInformation != null && newSecurityInformation != null) {
|
||||
compareSecurityInformation(generatedRecords,
|
||||
oldSecurityInformation, oldPingStatus,
|
||||
newSecurityInformation, newPingStatus);
|
||||
}
|
||||
|
||||
return generatedRecords;
|
||||
}
|
||||
|
||||
private void comparePingStatuses(List<WritableModel> generatedRecords,
|
||||
DomainAvailabilityRecord oldPingStatus,
|
||||
DomainAvailabilityRecord newPingStatus) {
|
||||
|
||||
var change = DomainAvailabilityChange.between(oldPingStatus, newPingStatus);
|
||||
switch (change) {
|
||||
case DomainAvailabilityChange.None none -> {}
|
||||
case DomainAvailabilityChange.AvailableToUnavailable(AvailabilityOutageType outageType) -> {
|
||||
generatedRecords.add(new DomainAvailabilityEvent(
|
||||
oldPingStatus.domainId(),
|
||||
oldPingStatus.nodeId(),
|
||||
false,
|
||||
outageType,
|
||||
newPingStatus.httpStatus(),
|
||||
newPingStatus.errorMessage(),
|
||||
newPingStatus.tsLastPing()
|
||||
));
|
||||
}
|
||||
case DomainAvailabilityChange.UnavailableToAvailable _ -> {
|
||||
generatedRecords.add(
|
||||
new DomainAvailabilityEvent(
|
||||
oldPingStatus.domainId(),
|
||||
oldPingStatus.nodeId(),
|
||||
true,
|
||||
AvailabilityOutageType.NONE,
|
||||
newPingStatus.httpStatus(),
|
||||
newPingStatus.errorMessage(),
|
||||
newPingStatus.tsLastPing()
|
||||
)
|
||||
);
|
||||
}
|
||||
case DomainAvailabilityChange.OutageTypeChange(AvailabilityOutageType newOutageType) -> {
|
||||
generatedRecords.add(
|
||||
new DomainAvailabilityEvent(
|
||||
oldPingStatus.domainId(),
|
||||
oldPingStatus.nodeId(),
|
||||
false,
|
||||
newOutageType,
|
||||
newPingStatus.httpStatus(),
|
||||
newPingStatus.errorMessage(),
|
||||
newPingStatus.tsLastPing()
|
||||
)
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private void compareSecurityInformation(List<WritableModel> generatedRecords,
|
||||
DomainSecurityRecord oldSecurityInformation,
|
||||
DomainAvailabilityRecord oldPingStatus,
|
||||
DomainSecurityRecord newSecurityInformation,
|
||||
DomainAvailabilityRecord newPingStatus
|
||||
) {
|
||||
var change = SecurityInformationChange.between(oldSecurityInformation, oldPingStatus, newSecurityInformation, newPingStatus);
|
||||
|
||||
if (!change.isChanged())
|
||||
return;
|
||||
|
||||
generatedRecords.add(new DomainSecurityEvent(
|
||||
newSecurityInformation.domainId(),
|
||||
newSecurityInformation.nodeId(),
|
||||
newSecurityInformation.tsLastUpdate(),
|
||||
change.isAsnChanged(),
|
||||
change.isCertificateFingerprintChanged(),
|
||||
change.isCertificateProfileChanged(),
|
||||
change.isCertificateSanChanged(),
|
||||
change.isCertificatePublicKeyChanged(),
|
||||
change.oldCertificateTimeToExpiry(),
|
||||
change.isSecurityHeadersChanged(),
|
||||
change.isIpAddressChanged(),
|
||||
change.isSoftwareHeaderChanged(),
|
||||
new JsonObject<>(oldSecurityInformation),
|
||||
new JsonObject<>(newSecurityInformation)
|
||||
));
|
||||
|
||||
}
|
||||
|
||||
|
||||
List<InetAddress> getIpAddress(String domainName) {
|
||||
try {
|
||||
return Arrays.asList(InetAddress.getAllByName(domainName));
|
||||
} catch (UnknownHostException e) {
|
||||
// Handle the exception, e.g., log it or return an empty Optional
|
||||
return List.of();
|
||||
}
|
||||
}
|
||||
}
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user