mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-10-06 17:32:39 +02:00
Compare commits
73 Commits
deploy-020
...
deploy-025
Author | SHA1 | Date | |
---|---|---|---|
|
e7fa558954 | ||
|
720685bf3f | ||
|
cbec63c7da | ||
|
b03ca75785 | ||
|
184aedc071 | ||
|
0275bad281 | ||
|
fd83a9d0b8 | ||
|
d556f8ae3a | ||
|
e37559837b | ||
|
3564c4aaee | ||
|
92c54563ab | ||
|
d7a5d90b07 | ||
|
0a0e88fd6e | ||
|
b4fc0c4368 | ||
|
87ee8765b8 | ||
|
1adf4835fa | ||
|
b7b5d0bf46 | ||
|
416059adde | ||
|
db7930016a | ||
|
82456ad673 | ||
|
0882a6d9cd | ||
|
5020029c2d | ||
|
ac44d0b093 | ||
|
4b32b9b10e | ||
|
9f041d6631 | ||
|
13fb1efce4 | ||
|
c1225165b7 | ||
|
67ad7a3bbc | ||
|
ed62ec8a35 | ||
|
42b24cfa34 | ||
|
1ffaab2da6 | ||
|
5f93c7f767 | ||
|
4001c68c82 | ||
|
6b811489c5 | ||
|
e9d317c65d | ||
|
16b05a4737 | ||
|
021cd73cbb | ||
|
4253bd53b5 | ||
|
14c87461a5 | ||
|
9afed0a18e | ||
|
afad4deb94 | ||
|
f071c947e4 | ||
|
79996c9348 | ||
|
db907ab06a | ||
|
c49cd9dd95 | ||
|
eec9df3b0a | ||
|
e5f3288de6 | ||
|
d587544d3a | ||
|
1a9ae1bc40 | ||
|
e0c81e956a | ||
|
542fb12b38 | ||
|
65ec734566 | ||
|
10b6a25c63 | ||
|
6260f6bec7 | ||
|
d6d5467696 | ||
|
034560ca75 | ||
|
e994fddae4 | ||
|
345f01f306 | ||
|
5a8e286689 | ||
|
39a055aa94 | ||
|
37aaa90dc9 | ||
|
24022c5adc | ||
|
1de9ecc0b6 | ||
|
9b80245ea0 | ||
|
4e1595c1a6 | ||
|
0be8585fa5 | ||
|
a0fe070fe7 | ||
|
abe9da0fc6 | ||
|
56d0128b0a | ||
|
840b68ac55 | ||
|
c34ff6d6c3 | ||
|
32780967d8 | ||
|
7330bc489d |
16
ROADMAP.md
16
ROADMAP.md
@@ -38,14 +38,6 @@ associated with each language added, at least a models file or two, as well as s
|
|||||||
|
|
||||||
It would be very helpful to find a speaker of a large language other than English to help in the fine tuning.
|
It would be very helpful to find a speaker of a large language other than English to help in the fine tuning.
|
||||||
|
|
||||||
## Support for binary formats like PDF
|
|
||||||
|
|
||||||
The crawler needs to be modified to retain them, and the conversion logic needs to parse them.
|
|
||||||
The documents database probably should have some sort of flag indicating it's a PDF as well.
|
|
||||||
|
|
||||||
PDF parsing is known to be a bit of a security liability so some thought needs to be put in
|
|
||||||
that direction as well.
|
|
||||||
|
|
||||||
## Custom ranking logic
|
## Custom ranking logic
|
||||||
|
|
||||||
Stract does an interesting thing where they have configurable search filters.
|
Stract does an interesting thing where they have configurable search filters.
|
||||||
@@ -66,6 +58,14 @@ One of the search engine's biggest limitations right now is that it does not ind
|
|||||||
|
|
||||||
# Completed
|
# Completed
|
||||||
|
|
||||||
|
## Support for binary formats like PDF (COMPLETED 2025-05)
|
||||||
|
|
||||||
|
The crawler needs to be modified to retain them, and the conversion logic needs to parse them.
|
||||||
|
The documents database probably should have some sort of flag indicating it's a PDF as well.
|
||||||
|
|
||||||
|
PDF parsing is known to be a bit of a security liability so some thought needs to be put in
|
||||||
|
that direction as well.
|
||||||
|
|
||||||
## Web Design Overhaul (COMPLETED 2025-01)
|
## Web Design Overhaul (COMPLETED 2025-01)
|
||||||
|
|
||||||
The design is kinda clunky and hard to maintain, and needlessly outdated-looking.
|
The design is kinda clunky and hard to maintain, and needlessly outdated-looking.
|
||||||
|
@@ -1,3 +1,8 @@
|
|||||||
package nu.marginalia;
|
package nu.marginalia;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A record representing a User Agent.
|
||||||
|
* @param uaString - the header value of the User Agent
|
||||||
|
* @param uaIdentifier - what we look for in robots.txt
|
||||||
|
*/
|
||||||
public record UserAgent(String uaString, String uaIdentifier) {}
|
public record UserAgent(String uaString, String uaIdentifier) {}
|
||||||
|
@@ -0,0 +1,5 @@
|
|||||||
|
CREATE TABLE IF NOT EXISTS WMSA_prod.NSFW_DOMAINS (
|
||||||
|
ID INT NOT NULL AUTO_INCREMENT,
|
||||||
|
TIER INT NOT NULL,
|
||||||
|
PRIMARY KEY (ID)
|
||||||
|
);
|
@@ -0,0 +1,213 @@
|
|||||||
|
|
||||||
|
-- Create metadata tables for domain ping status and security information
|
||||||
|
|
||||||
|
-- These are not ICMP pings, but rather HTTP(S) pings to check the availability and security
|
||||||
|
-- of web servers associated with domains, to assess uptime and changes in security configurations
|
||||||
|
-- indicating ownership changes or security issues.
|
||||||
|
|
||||||
|
-- Note: DOMAIN_ID and NODE_ID are used to identify the domain and the node that performed the ping.
|
||||||
|
-- These are strictly speaking foreign keys to the EC_DOMAIN table, but as it
|
||||||
|
-- is strictly append-only, we do not need to enforce foreign key constraints.
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS DOMAIN_AVAILABILITY_INFORMATION (
|
||||||
|
DOMAIN_ID INT NOT NULL PRIMARY KEY,
|
||||||
|
NODE_ID INT NOT NULL,
|
||||||
|
|
||||||
|
SERVER_AVAILABLE BOOLEAN NOT NULL, -- Indicates if the server is available (true) or not (false)
|
||||||
|
SERVER_IP VARBINARY(16), -- IP address of the server (IPv4 or IPv6)
|
||||||
|
SERVER_IP_ASN INTEGER, -- Autonomous System number
|
||||||
|
|
||||||
|
DATA_HASH BIGINT, -- Hash of the data for integrity checks
|
||||||
|
SECURITY_CONFIG_HASH BIGINT, -- Hash of the security configuration for integrity checks
|
||||||
|
|
||||||
|
HTTP_SCHEMA ENUM('HTTP', 'HTTPS'), -- HTTP or HTTPS protocol used
|
||||||
|
HTTP_ETAG VARCHAR(255), -- ETag of the resource as per HTTP headers
|
||||||
|
HTTP_LAST_MODIFIED VARCHAR(255), -- Last modified date of the resource as per HTTP headers
|
||||||
|
HTTP_STATUS INT, -- HTTP status code (e.g., 200, 404, etc.)
|
||||||
|
HTTP_LOCATION VARCHAR(255), -- If the server redirects, this is the location of the redirect
|
||||||
|
HTTP_RESPONSE_TIME_MS SMALLINT UNSIGNED, -- Response time in milliseconds
|
||||||
|
|
||||||
|
ERROR_CLASSIFICATION ENUM('NONE', 'TIMEOUT', 'SSL_ERROR', 'DNS_ERROR', 'CONNECTION_ERROR', 'HTTP_CLIENT_ERROR', 'HTTP_SERVER_ERROR', 'UNKNOWN'), -- Classification of the error if the server is not available
|
||||||
|
ERROR_MESSAGE VARCHAR(255), -- Error message if the server is not available
|
||||||
|
|
||||||
|
TS_LAST_PING TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, -- Timestamp of the last ping
|
||||||
|
TS_LAST_AVAILABLE TIMESTAMP, -- Timestamp of the last time the server was available
|
||||||
|
TS_LAST_ERROR TIMESTAMP, -- Timestamp of the last error encountered
|
||||||
|
|
||||||
|
NEXT_SCHEDULED_UPDATE TIMESTAMP NOT NULL,
|
||||||
|
BACKOFF_CONSECUTIVE_FAILURES INT NOT NULL DEFAULT 0, -- Number of consecutive failures to ping the server
|
||||||
|
BACKOFF_FETCH_INTERVAL INT NOT NULL DEFAULT 60 -- Interval in seconds for the next scheduled ping
|
||||||
|
) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin;
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS DOMAIN_AVAILABILITY_INFORMATION__NODE_ID__DOMAIN_ID_IDX ON DOMAIN_AVAILABILITY_INFORMATION (NODE_ID, DOMAIN_ID);
|
||||||
|
CREATE INDEX IF NOT EXISTS DOMAIN_AVAILABILITY_INFORMATION__NEXT_SCHEDULED_UPDATE_IDX ON DOMAIN_AVAILABILITY_INFORMATION (NODE_ID, NEXT_SCHEDULED_UPDATE);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS DOMAIN_SECURITY_INFORMATION (
|
||||||
|
DOMAIN_ID INT NOT NULL PRIMARY KEY,
|
||||||
|
NODE_ID INT NOT NULL,
|
||||||
|
|
||||||
|
ASN INTEGER, -- Autonomous System Number (ASN) of the server
|
||||||
|
HTTP_SCHEMA ENUM('HTTP', 'HTTPS'), -- HTTP or HTTPS protocol used
|
||||||
|
HTTP_VERSION VARCHAR(10), -- HTTP version used (e.g., HTTP/1.1, HTTP/2)
|
||||||
|
HTTP_COMPRESSION VARCHAR(50), -- Compression method used (e.g., gzip, deflate, br)
|
||||||
|
HTTP_CACHE_CONTROL TEXT, -- Cache control directives from HTTP headers
|
||||||
|
|
||||||
|
SSL_CERT_NOT_BEFORE TIMESTAMP, -- Valid from date (usually same as issued)
|
||||||
|
SSL_CERT_NOT_AFTER TIMESTAMP, -- Valid until date (usually same as expires)
|
||||||
|
|
||||||
|
SSL_CERT_ISSUER VARCHAR(255), -- CA that issued the cert
|
||||||
|
SSL_CERT_SUBJECT VARCHAR(255), -- Certificate subject/CN
|
||||||
|
|
||||||
|
SSL_CERT_PUBLIC_KEY_HASH BINARY(32), -- SHA-256 hash of the public key
|
||||||
|
SSL_CERT_SERIAL_NUMBER VARCHAR(100), -- Unique cert serial number
|
||||||
|
SSL_CERT_FINGERPRINT_SHA256 BINARY(32), -- SHA-256 fingerprint for exact identification
|
||||||
|
SSL_CERT_SAN TEXT, -- Subject Alternative Names (JSON array)
|
||||||
|
SSL_CERT_WILDCARD BOOLEAN, -- Wildcard certificate (*.example.com)
|
||||||
|
|
||||||
|
SSL_PROTOCOL VARCHAR(20), -- TLS 1.2, TLS 1.3, etc.
|
||||||
|
SSL_CIPHER_SUITE VARCHAR(100), -- e.g., TLS_AES_256_GCM_SHA384
|
||||||
|
SSL_KEY_EXCHANGE VARCHAR(50), -- ECDHE, RSA, etc.
|
||||||
|
SSL_CERTIFICATE_CHAIN_LENGTH TINYINT, -- Number of certs in chain
|
||||||
|
|
||||||
|
SSL_CERTIFICATE_VALID BOOLEAN, -- Valid cert chain
|
||||||
|
|
||||||
|
HEADER_CORS_ALLOW_ORIGIN TEXT, -- Could be *, specific domains, or null
|
||||||
|
HEADER_CORS_ALLOW_CREDENTIALS BOOLEAN, -- Credential handling
|
||||||
|
HEADER_CONTENT_SECURITY_POLICY_HASH INT, -- CSP header, hash of the policy
|
||||||
|
HEADER_STRICT_TRANSPORT_SECURITY VARCHAR(255), -- HSTS header
|
||||||
|
HEADER_REFERRER_POLICY VARCHAR(50), -- Referrer handling
|
||||||
|
HEADER_X_FRAME_OPTIONS VARCHAR(50), -- Clickjacking protection
|
||||||
|
HEADER_X_CONTENT_TYPE_OPTIONS VARCHAR(50), -- MIME sniffing protection
|
||||||
|
HEADER_X_XSS_PROTECTION VARCHAR(50), -- XSS protection header
|
||||||
|
|
||||||
|
HEADER_SERVER VARCHAR(255), -- Server header (e.g., Apache, Nginx, etc.)
|
||||||
|
HEADER_X_POWERED_BY VARCHAR(255), -- X-Powered-By header (if present)
|
||||||
|
|
||||||
|
TS_LAST_UPDATE TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP -- Timestamp of the last SSL check
|
||||||
|
) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin;
|
||||||
|
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS DOMAIN_SECURITY_INFORMATION__NODE_ID__DOMAIN_ID_IDX ON DOMAIN_SECURITY_INFORMATION (NODE_ID, DOMAIN_ID);
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS DOMAIN_SECURITY_EVENTS (
|
||||||
|
CHANGE_ID BIGINT AUTO_INCREMENT PRIMARY KEY, -- Unique identifier for the change
|
||||||
|
DOMAIN_ID INT NOT NULL, -- Domain ID, used as a foreign key to EC_DOMAIN
|
||||||
|
NODE_ID INT NOT NULL,
|
||||||
|
|
||||||
|
TS_CHANGE TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, -- Timestamp of the change
|
||||||
|
|
||||||
|
CHANGE_ASN BOOLEAN NOT NULL DEFAULT FALSE, -- Indicates if the change is related to ASN (Autonomous System Number)
|
||||||
|
CHANGE_CERTIFICATE_FINGERPRINT BOOLEAN NOT NULL DEFAULT FALSE, -- Indicates if the change is related to SSL certificate fingerprint
|
||||||
|
CHANGE_CERTIFICATE_PROFILE BOOLEAN NOT NULL DEFAULT FALSE, -- Indicates if the change is related to SSL certificate profile (e.g., algorithm, exchange)
|
||||||
|
CHANGE_CERTIFICATE_SAN BOOLEAN NOT NULL DEFAULT FALSE, -- Indicates if the change is related to SSL certificate SAN (Subject Alternative Name)
|
||||||
|
CHANGE_CERTIFICATE_PUBLIC_KEY BOOLEAN NOT NULL DEFAULT FALSE, -- Indicates if the change is related to SSL certificate public key
|
||||||
|
CHANGE_SECURITY_HEADERS BOOLEAN NOT NULL DEFAULT FALSE, -- Indicates if the change is related to security headers
|
||||||
|
CHANGE_IP_ADDRESS BOOLEAN NOT NULL DEFAULT FALSE, -- Indicates if the change is related to IP address
|
||||||
|
CHANGE_SOFTWARE BOOLEAN NOT NULL DEFAULT FALSE, -- Indicates if the change is related to the generator (e.g., web server software)
|
||||||
|
OLD_CERT_TIME_TO_EXPIRY INT, -- Time to expiry of the old certificate in hours, if applicable
|
||||||
|
|
||||||
|
SECURITY_SIGNATURE_BEFORE BLOB NOT NULL, -- Security signature before the change, gzipped json record
|
||||||
|
SECURITY_SIGNATURE_AFTER BLOB NOT NULL -- Security signature after the change, gzipped json record
|
||||||
|
) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin;
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS DOMAIN_SECURITY_EVENTS__NODE_ID__DOMAIN_ID_IDX ON DOMAIN_SECURITY_EVENTS (NODE_ID, DOMAIN_ID);
|
||||||
|
CREATE INDEX IF NOT EXISTS DOMAIN_SECURITY_EVENTS__TS_CHANGE_IDX ON DOMAIN_SECURITY_EVENTS (TS_CHANGE);
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS DOMAIN_AVAILABILITY_EVENTS (
|
||||||
|
DOMAIN_ID INT NOT NULL,
|
||||||
|
NODE_ID INT NOT NULL,
|
||||||
|
|
||||||
|
AVAILABLE BOOLEAN NOT NULL, -- True if the service is available, false if it is not
|
||||||
|
OUTAGE_TYPE ENUM('NONE', 'TIMEOUT', 'SSL_ERROR', 'DNS_ERROR', 'CONNECTION_ERROR', 'HTTP_CLIENT_ERROR', 'HTTP_SERVER_ERROR', 'UNKNOWN') NOT NULL,
|
||||||
|
HTTP_STATUS_CODE INT, -- HTTP status code if available (e.g., 200, 404, etc.)
|
||||||
|
ERROR_MESSAGE VARCHAR(255), -- Specific error details
|
||||||
|
|
||||||
|
TS_CHANGE TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, -- Timestamp of the last update
|
||||||
|
|
||||||
|
AVAILABILITY_RECORD_ID BIGINT AUTO_INCREMENT,
|
||||||
|
P_KEY_MONTH TINYINT NOT NULL DEFAULT MONTH(TS_CHANGE), -- Month of the change for partitioning
|
||||||
|
PRIMARY KEY (AVAILABILITY_RECORD_ID, P_KEY_MONTH)
|
||||||
|
)
|
||||||
|
CHARACTER SET utf8mb4 COLLATE utf8mb4_bin
|
||||||
|
PARTITION BY RANGE (P_KEY_MONTH) (
|
||||||
|
PARTITION p0 VALUES LESS THAN (1), -- January
|
||||||
|
PARTITION p1 VALUES LESS THAN (2), -- February
|
||||||
|
PARTITION p2 VALUES LESS THAN (3), -- March
|
||||||
|
PARTITION p3 VALUES LESS THAN (4), -- April
|
||||||
|
PARTITION p4 VALUES LESS THAN (5), -- May
|
||||||
|
PARTITION p5 VALUES LESS THAN (6), -- June
|
||||||
|
PARTITION p6 VALUES LESS THAN (7), -- July
|
||||||
|
PARTITION p7 VALUES LESS THAN (8), -- August
|
||||||
|
PARTITION p8 VALUES LESS THAN (9), -- September
|
||||||
|
PARTITION p9 VALUES LESS THAN (10), -- October
|
||||||
|
PARTITION p10 VALUES LESS THAN (11), -- November
|
||||||
|
PARTITION p11 VALUES LESS THAN (12) -- December
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE INDEX DOMAIN_AVAILABILITY_EVENTS__DOMAIN_ID_TS_IDX ON DOMAIN_AVAILABILITY_EVENTS (DOMAIN_ID, TS_CHANGE);
|
||||||
|
CREATE INDEX DOMAIN_AVAILABILITY_EVENTS__TS_CHANGE_IDX ON DOMAIN_AVAILABILITY_EVENTS (TS_CHANGE);
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS DOMAIN_DNS_INFORMATION (
|
||||||
|
DNS_ROOT_DOMAIN_ID INT AUTO_INCREMENT PRIMARY KEY,
|
||||||
|
ROOT_DOMAIN_NAME VARCHAR(255) NOT NULL UNIQUE,
|
||||||
|
NODE_AFFINITY INT NOT NULL, -- Node ID that performs the DNS check, assign randomly across nodes
|
||||||
|
|
||||||
|
DNS_A_RECORDS TEXT, -- JSON array of IPv4 addresses
|
||||||
|
DNS_AAAA_RECORDS TEXT, -- JSON array of IPv6 addresses
|
||||||
|
DNS_CNAME_RECORD VARCHAR(255), -- Canonical name (if applicable)
|
||||||
|
DNS_MX_RECORDS TEXT, -- JSON array of mail exchange records
|
||||||
|
DNS_CAA_RECORDS TEXT, -- Certificate Authority Authorization
|
||||||
|
DNS_TXT_RECORDS TEXT, -- TXT records (SPF, DKIM, verification, etc.)
|
||||||
|
DNS_NS_RECORDS TEXT, -- Name servers (JSON array)
|
||||||
|
DNS_SOA_RECORD TEXT, -- Start of Authority (JSON object)
|
||||||
|
|
||||||
|
TS_LAST_DNS_CHECK TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
|
||||||
|
TS_NEXT_DNS_CHECK TIMESTAMP NOT NULL,
|
||||||
|
DNS_CHECK_PRIORITY TINYINT DEFAULT 0 -- Priority of the DNS check, in case we want to schedule a refresh sooner
|
||||||
|
) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci;
|
||||||
|
|
||||||
|
CREATE INDEX DOMAIN_DNS_INFORMATION__PRIORITY_NEXT_CHECK_IDX ON DOMAIN_DNS_INFORMATION (NODE_AFFINITY, DNS_CHECK_PRIORITY DESC, TS_NEXT_DNS_CHECK);
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS DOMAIN_DNS_EVENTS (
|
||||||
|
DNS_ROOT_DOMAIN_ID INT NOT NULL,
|
||||||
|
NODE_ID INT NOT NULL,
|
||||||
|
|
||||||
|
TS_CHANGE TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
|
||||||
|
-- DNS change type flags
|
||||||
|
CHANGE_A_RECORDS BOOLEAN NOT NULL DEFAULT FALSE, -- IPv4 address changes
|
||||||
|
CHANGE_AAAA_RECORDS BOOLEAN NOT NULL DEFAULT FALSE, -- IPv6 address changes
|
||||||
|
CHANGE_CNAME BOOLEAN NOT NULL DEFAULT FALSE, -- CNAME changes
|
||||||
|
CHANGE_MX_RECORDS BOOLEAN NOT NULL DEFAULT FALSE, -- Mail server changes
|
||||||
|
CHANGE_CAA_RECORDS BOOLEAN NOT NULL DEFAULT FALSE, -- Certificate authority changes
|
||||||
|
CHANGE_TXT_RECORDS BOOLEAN NOT NULL DEFAULT FALSE, -- TXT record changes (SPF, DKIM, etc.)
|
||||||
|
CHANGE_NS_RECORDS BOOLEAN NOT NULL DEFAULT FALSE, -- Name server changes (big red flag!)
|
||||||
|
CHANGE_SOA_RECORD BOOLEAN NOT NULL DEFAULT FALSE, -- Start of Authority changes
|
||||||
|
|
||||||
|
DNS_SIGNATURE_BEFORE BLOB NOT NULL, -- Compressed JSON snapshot of DNS records before change
|
||||||
|
DNS_SIGNATURE_AFTER BLOB NOT NULL, -- Compressed JSON snapshot of DNS records after change
|
||||||
|
|
||||||
|
DNS_EVENT_ID BIGINT AUTO_INCREMENT,
|
||||||
|
P_KEY_MONTH TINYINT NOT NULL DEFAULT MONTH(TS_CHANGE), -- Month of the change for partitioning
|
||||||
|
PRIMARY KEY (DNS_EVENT_ID, P_KEY_MONTH)
|
||||||
|
)
|
||||||
|
CHARACTER SET utf8mb4 COLLATE utf8mb4_bin
|
||||||
|
PARTITION BY RANGE (P_KEY_MONTH) (
|
||||||
|
PARTITION p0 VALUES LESS THAN (1), -- January
|
||||||
|
PARTITION p1 VALUES LESS THAN (2), -- February
|
||||||
|
PARTITION p2 VALUES LESS THAN (3), -- March
|
||||||
|
PARTITION p3 VALUES LESS THAN (4), -- April
|
||||||
|
PARTITION p4 VALUES LESS THAN (5), -- May
|
||||||
|
PARTITION p5 VALUES LESS THAN (6), -- June
|
||||||
|
PARTITION p6 VALUES LESS THAN (7), -- July
|
||||||
|
PARTITION p7 VALUES LESS THAN (8), -- August
|
||||||
|
PARTITION p8 VALUES LESS THAN (9), -- September
|
||||||
|
PARTITION p9 VALUES LESS THAN (10), -- October
|
||||||
|
PARTITION p10 VALUES LESS THAN (11), -- November
|
||||||
|
PARTITION p11 VALUES LESS THAN (12) -- December
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE INDEX DOMAIN_DNS_EVENTS__DNS_ROOT_DOMAIN_ID_TS_IDX ON DOMAIN_DNS_EVENTS (DNS_ROOT_DOMAIN_ID, TS_CHANGE);
|
||||||
|
CREATE INDEX DOMAIN_DNS_EVENTS__TS_CHANGE_IDX ON DOMAIN_DNS_EVENTS (TS_CHANGE);
|
@@ -0,0 +1,6 @@
|
|||||||
|
-- Add additional summary columns to DOMAIN_SECURITY_EVENTS table
|
||||||
|
-- to make it easier to make sense of certificate changes
|
||||||
|
|
||||||
|
ALTER TABLE DOMAIN_SECURITY_EVENTS ADD COLUMN CHANGE_CERTIFICATE_SERIAL_NUMBER BOOLEAN NOT NULL DEFAULT FALSE;
|
||||||
|
ALTER TABLE DOMAIN_SECURITY_EVENTS ADD COLUMN CHANGE_CERTIFICATE_ISSUER BOOLEAN NOT NULL DEFAULT FALSE;
|
||||||
|
OPTIMIZE TABLE DOMAIN_SECURITY_EVENTS;
|
@@ -0,0 +1,7 @@
|
|||||||
|
-- Add additional summary columns to DOMAIN_SECURITY_INFORMATION table
|
||||||
|
-- to make it easier to get more information about the SSL certificate's validity
|
||||||
|
|
||||||
|
ALTER TABLE DOMAIN_SECURITY_INFORMATION ADD COLUMN SSL_CHAIN_VALID BOOLEAN DEFAULT NULL;
|
||||||
|
ALTER TABLE DOMAIN_SECURITY_INFORMATION ADD COLUMN SSL_HOST_VALID BOOLEAN DEFAULT NULL;
|
||||||
|
ALTER TABLE DOMAIN_SECURITY_INFORMATION ADD COLUMN SSL_DATE_VALID BOOLEAN DEFAULT NULL;
|
||||||
|
OPTIMIZE TABLE DOMAIN_SECURITY_INFORMATION;
|
@@ -0,0 +1,5 @@
|
|||||||
|
-- Add additional summary columns to DOMAIN_SECURITY_EVENTS table
|
||||||
|
-- to make it easier to make sense of certificate changes
|
||||||
|
|
||||||
|
ALTER TABLE DOMAIN_SECURITY_EVENTS ADD COLUMN CHANGE_SCHEMA ENUM('NONE', 'HTTP_TO_HTTPS', 'HTTPS_TO_HTTP', 'UNKNOWN') NOT NULL DEFAULT 'UNKNOWN';
|
||||||
|
OPTIMIZE TABLE DOMAIN_SECURITY_EVENTS;
|
@@ -6,11 +6,20 @@ import nu.marginalia.model.EdgeDomain;
|
|||||||
import nu.marginalia.model.EdgeUrl;
|
import nu.marginalia.model.EdgeUrl;
|
||||||
|
|
||||||
import java.net.URISyntaxException;
|
import java.net.URISyntaxException;
|
||||||
|
import java.time.Instant;
|
||||||
|
|
||||||
public class GsonFactory {
|
public class GsonFactory {
|
||||||
public static Gson get() {
|
public static Gson get() {
|
||||||
return new GsonBuilder()
|
return new GsonBuilder()
|
||||||
.registerTypeAdapterFactory(RecordTypeAdapterFactory.builder().allowMissingComponentValues().create())
|
.registerTypeAdapterFactory(RecordTypeAdapterFactory.builder().allowMissingComponentValues().create())
|
||||||
|
.registerTypeAdapter(Instant.class, (JsonSerializer<Instant>) (src, typeOfSrc, context) -> new JsonPrimitive(src.toEpochMilli()))
|
||||||
|
.registerTypeAdapter(Instant.class, (JsonDeserializer<Instant>) (json, typeOfT, context) -> {
|
||||||
|
if (json.isJsonPrimitive() && json.getAsJsonPrimitive().isNumber()) {
|
||||||
|
return Instant.ofEpochMilli(json.getAsLong());
|
||||||
|
} else {
|
||||||
|
throw new JsonParseException("Expected a number for Instant");
|
||||||
|
}
|
||||||
|
})
|
||||||
.registerTypeAdapter(EdgeUrl.class, (JsonSerializer<EdgeUrl>) (src, typeOfSrc, context) -> new JsonPrimitive(src.toString()))
|
.registerTypeAdapter(EdgeUrl.class, (JsonSerializer<EdgeUrl>) (src, typeOfSrc, context) -> new JsonPrimitive(src.toString()))
|
||||||
.registerTypeAdapter(EdgeDomain.class, (JsonSerializer<EdgeDomain>) (src, typeOfSrc, context) -> new JsonPrimitive(src.toString()))
|
.registerTypeAdapter(EdgeDomain.class, (JsonSerializer<EdgeDomain>) (src, typeOfSrc, context) -> new JsonPrimitive(src.toString()))
|
||||||
.registerTypeAdapter(EdgeUrl.class, (JsonDeserializer<EdgeUrl>) (json, typeOfT, context) -> {
|
.registerTypeAdapter(EdgeUrl.class, (JsonDeserializer<EdgeUrl>) (json, typeOfT, context) -> {
|
||||||
|
@@ -0,0 +1,59 @@
|
|||||||
|
package nu.marginalia.process.control;
|
||||||
|
|
||||||
|
import com.google.inject.Inject;
|
||||||
|
import com.google.inject.Singleton;
|
||||||
|
import com.zaxxer.hikari.HikariDataSource;
|
||||||
|
import nu.marginalia.process.ProcessConfiguration;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import java.sql.SQLException;
|
||||||
|
import java.util.Objects;
|
||||||
|
import java.util.UUID;
|
||||||
|
|
||||||
|
@Singleton
|
||||||
|
public class ProcessEventLog {
|
||||||
|
private final HikariDataSource dataSource;
|
||||||
|
|
||||||
|
private final Logger logger = LoggerFactory.getLogger(ProcessEventLog.class);
|
||||||
|
|
||||||
|
private final String serviceName;
|
||||||
|
private final UUID instanceUuid;
|
||||||
|
private final String serviceBase;
|
||||||
|
|
||||||
|
@Inject
|
||||||
|
public ProcessEventLog(HikariDataSource dataSource, ProcessConfiguration configuration) {
|
||||||
|
this.dataSource = dataSource;
|
||||||
|
|
||||||
|
this.serviceName = configuration.processName() + ":" + configuration.node();
|
||||||
|
this.instanceUuid = configuration.instanceUuid();
|
||||||
|
this.serviceBase = configuration.processName();
|
||||||
|
|
||||||
|
logger.info("Starting service {} instance {}", serviceName, instanceUuid);
|
||||||
|
|
||||||
|
logEvent("PCS-START", serviceName);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void logEvent(Class<?> type, String message) {
|
||||||
|
logEvent(type.getSimpleName(), message);
|
||||||
|
}
|
||||||
|
public void logEvent(String type, String message) {
|
||||||
|
|
||||||
|
try (var conn = dataSource.getConnection();
|
||||||
|
var stmt = conn.prepareStatement("""
|
||||||
|
INSERT INTO SERVICE_EVENTLOG(SERVICE_NAME, SERVICE_BASE, INSTANCE, EVENT_TYPE, EVENT_MESSAGE)
|
||||||
|
VALUES (?, ?, ?, ?, ?)
|
||||||
|
""")) {
|
||||||
|
stmt.setString(1, serviceName);
|
||||||
|
stmt.setString(2, serviceBase);
|
||||||
|
stmt.setString(3, instanceUuid.toString());
|
||||||
|
stmt.setString(4, type);
|
||||||
|
stmt.setString(5, Objects.requireNonNull(message, ""));
|
||||||
|
|
||||||
|
stmt.executeUpdate();
|
||||||
|
}
|
||||||
|
catch (SQLException ex) {
|
||||||
|
logger.error("Failed to log event {}:{}", type, message);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@@ -1,17 +1,21 @@
|
|||||||
package nu.marginalia.service.discovery;
|
package nu.marginalia.service.discovery;
|
||||||
|
|
||||||
import nu.marginalia.service.discovery.monitor.*;
|
import com.google.inject.ImplementedBy;
|
||||||
|
import nu.marginalia.service.discovery.monitor.ServiceChangeMonitor;
|
||||||
|
import nu.marginalia.service.discovery.monitor.ServiceMonitorIf;
|
||||||
import nu.marginalia.service.discovery.property.ServiceEndpoint;
|
import nu.marginalia.service.discovery.property.ServiceEndpoint;
|
||||||
import static nu.marginalia.service.discovery.property.ServiceEndpoint.*;
|
|
||||||
|
|
||||||
import nu.marginalia.service.discovery.property.ServiceKey;
|
import nu.marginalia.service.discovery.property.ServiceKey;
|
||||||
|
import org.apache.curator.framework.recipes.locks.InterProcessSemaphoreV2;
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.UUID;
|
import java.util.UUID;
|
||||||
|
|
||||||
|
import static nu.marginalia.service.discovery.property.ServiceEndpoint.InstanceAddress;
|
||||||
|
|
||||||
/** A service registry that allows services to register themselves and
|
/** A service registry that allows services to register themselves and
|
||||||
* be discovered by other services on the network.
|
* be discovered by other services on the network.
|
||||||
*/
|
*/
|
||||||
|
@ImplementedBy(ZkServiceRegistry.class)
|
||||||
public interface ServiceRegistryIf {
|
public interface ServiceRegistryIf {
|
||||||
/**
|
/**
|
||||||
* Register a service with the registry.
|
* Register a service with the registry.
|
||||||
@@ -57,4 +61,9 @@ public interface ServiceRegistryIf {
|
|||||||
* </ul>
|
* </ul>
|
||||||
* */
|
* */
|
||||||
void registerMonitor(ServiceMonitorIf monitor) throws Exception;
|
void registerMonitor(ServiceMonitorIf monitor) throws Exception;
|
||||||
|
|
||||||
|
void registerProcess(String processName, int nodeId);
|
||||||
|
void deregisterProcess(String processName, int nodeId);
|
||||||
|
|
||||||
|
InterProcessSemaphoreV2 getSemaphore(String name, int permits) throws Exception;
|
||||||
}
|
}
|
||||||
|
@@ -6,6 +6,7 @@ import nu.marginalia.service.discovery.monitor.ServiceMonitorIf;
|
|||||||
import nu.marginalia.service.discovery.property.ServiceEndpoint;
|
import nu.marginalia.service.discovery.property.ServiceEndpoint;
|
||||||
import nu.marginalia.service.discovery.property.ServiceKey;
|
import nu.marginalia.service.discovery.property.ServiceKey;
|
||||||
import org.apache.curator.framework.CuratorFramework;
|
import org.apache.curator.framework.CuratorFramework;
|
||||||
|
import org.apache.curator.framework.recipes.locks.InterProcessSemaphoreV2;
|
||||||
import org.apache.curator.utils.ZKPaths;
|
import org.apache.curator.utils.ZKPaths;
|
||||||
import org.apache.zookeeper.CreateMode;
|
import org.apache.zookeeper.CreateMode;
|
||||||
import org.apache.zookeeper.Watcher;
|
import org.apache.zookeeper.Watcher;
|
||||||
@@ -256,6 +257,42 @@ public class ZkServiceRegistry implements ServiceRegistryIf {
|
|||||||
.forPath("/running-instances");
|
.forPath("/running-instances");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void registerProcess(String processName, int nodeId) {
|
||||||
|
String path = "/process-locks/" + processName + "/" + nodeId;
|
||||||
|
try {
|
||||||
|
curatorFramework.create()
|
||||||
|
.creatingParentsIfNeeded()
|
||||||
|
.withMode(CreateMode.EPHEMERAL)
|
||||||
|
.forPath(path);
|
||||||
|
livenessPaths.add(path);
|
||||||
|
}
|
||||||
|
catch (Exception ex) {
|
||||||
|
logger.error("Failed to register process {} on node {}", processName, nodeId, ex);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void deregisterProcess(String processName, int nodeId) {
|
||||||
|
String path = "/process-locks/" + processName + "/" + nodeId;
|
||||||
|
try {
|
||||||
|
curatorFramework.delete().forPath(path);
|
||||||
|
livenessPaths.remove(path);
|
||||||
|
}
|
||||||
|
catch (Exception ex) {
|
||||||
|
logger.error("Failed to deregister process {} on node {}", processName, nodeId, ex);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public InterProcessSemaphoreV2 getSemaphore(String name, int permits) {
|
||||||
|
if (stopped)
|
||||||
|
throw new IllegalStateException("Service registry is stopped, cannot get semaphore " + name);
|
||||||
|
|
||||||
|
String path = "/semaphores/" + name;
|
||||||
|
return new InterProcessSemaphoreV2(curatorFramework, path, permits);
|
||||||
|
}
|
||||||
|
|
||||||
/* Exposed for tests */
|
/* Exposed for tests */
|
||||||
public synchronized void shutDown() {
|
public synchronized void shutDown() {
|
||||||
if (stopped)
|
if (stopped)
|
||||||
|
@@ -19,6 +19,7 @@ dependencies {
|
|||||||
implementation project(':code:processes:crawling-process')
|
implementation project(':code:processes:crawling-process')
|
||||||
implementation project(':code:processes:live-crawling-process')
|
implementation project(':code:processes:live-crawling-process')
|
||||||
implementation project(':code:processes:loading-process')
|
implementation project(':code:processes:loading-process')
|
||||||
|
implementation project(':code:processes:ping-process')
|
||||||
implementation project(':code:processes:converting-process')
|
implementation project(':code:processes:converting-process')
|
||||||
implementation project(':code:processes:index-constructor-process')
|
implementation project(':code:processes:index-constructor-process')
|
||||||
|
|
||||||
@@ -37,6 +38,7 @@ dependencies {
|
|||||||
implementation project(':code:functions:link-graph:api')
|
implementation project(':code:functions:link-graph:api')
|
||||||
implementation project(':code:functions:live-capture:api')
|
implementation project(':code:functions:live-capture:api')
|
||||||
implementation project(':code:functions:search-query')
|
implementation project(':code:functions:search-query')
|
||||||
|
implementation project(':code:functions:nsfw-domain-filter')
|
||||||
implementation project(':code:execution:api')
|
implementation project(':code:execution:api')
|
||||||
|
|
||||||
implementation project(':code:processes:crawling-process:model')
|
implementation project(':code:processes:crawling-process:model')
|
||||||
|
@@ -6,11 +6,13 @@ import java.util.Set;
|
|||||||
|
|
||||||
public enum ExecutorActor {
|
public enum ExecutorActor {
|
||||||
PREC_EXPORT_ALL(NodeProfile.BATCH_CRAWL, NodeProfile.MIXED),
|
PREC_EXPORT_ALL(NodeProfile.BATCH_CRAWL, NodeProfile.MIXED),
|
||||||
|
SYNC_NSFW_LISTS(NodeProfile.BATCH_CRAWL, NodeProfile.MIXED),
|
||||||
|
|
||||||
CRAWL(NodeProfile.BATCH_CRAWL, NodeProfile.MIXED),
|
CRAWL(NodeProfile.BATCH_CRAWL, NodeProfile.MIXED),
|
||||||
RECRAWL(NodeProfile.BATCH_CRAWL, NodeProfile.MIXED),
|
RECRAWL(NodeProfile.BATCH_CRAWL, NodeProfile.MIXED),
|
||||||
RECRAWL_SINGLE_DOMAIN(NodeProfile.BATCH_CRAWL, NodeProfile.MIXED),
|
RECRAWL_SINGLE_DOMAIN(NodeProfile.BATCH_CRAWL, NodeProfile.MIXED),
|
||||||
PROC_CRAWLER_SPAWNER(NodeProfile.BATCH_CRAWL, NodeProfile.MIXED),
|
PROC_CRAWLER_SPAWNER(NodeProfile.BATCH_CRAWL, NodeProfile.MIXED),
|
||||||
|
PROC_PING_SPAWNER(NodeProfile.BATCH_CRAWL, NodeProfile.MIXED, NodeProfile.REALTIME),
|
||||||
PROC_EXPORT_TASKS_SPAWNER(NodeProfile.BATCH_CRAWL, NodeProfile.MIXED),
|
PROC_EXPORT_TASKS_SPAWNER(NodeProfile.BATCH_CRAWL, NodeProfile.MIXED),
|
||||||
ADJACENCY_CALCULATION(NodeProfile.BATCH_CRAWL, NodeProfile.MIXED),
|
ADJACENCY_CALCULATION(NodeProfile.BATCH_CRAWL, NodeProfile.MIXED),
|
||||||
EXPORT_DATA(NodeProfile.BATCH_CRAWL, NodeProfile.MIXED),
|
EXPORT_DATA(NodeProfile.BATCH_CRAWL, NodeProfile.MIXED),
|
||||||
@@ -35,7 +37,8 @@ public enum ExecutorActor {
|
|||||||
LIVE_CRAWL(NodeProfile.REALTIME),
|
LIVE_CRAWL(NodeProfile.REALTIME),
|
||||||
PROC_LIVE_CRAWL_SPAWNER(NodeProfile.REALTIME),
|
PROC_LIVE_CRAWL_SPAWNER(NodeProfile.REALTIME),
|
||||||
SCRAPE_FEEDS(NodeProfile.REALTIME),
|
SCRAPE_FEEDS(NodeProfile.REALTIME),
|
||||||
UPDATE_RSS(NodeProfile.REALTIME);
|
UPDATE_RSS(NodeProfile.REALTIME)
|
||||||
|
;
|
||||||
|
|
||||||
public String id() {
|
public String id() {
|
||||||
return "fsm:" + name().toLowerCase();
|
return "fsm:" + name().toLowerCase();
|
||||||
|
@@ -49,6 +49,7 @@ public class ExecutorActorControlService {
|
|||||||
RecrawlSingleDomainActor recrawlSingleDomainActor,
|
RecrawlSingleDomainActor recrawlSingleDomainActor,
|
||||||
RestoreBackupActor restoreBackupActor,
|
RestoreBackupActor restoreBackupActor,
|
||||||
ConverterMonitorActor converterMonitorFSM,
|
ConverterMonitorActor converterMonitorFSM,
|
||||||
|
PingMonitorActor pingMonitorActor,
|
||||||
CrawlerMonitorActor crawlerMonitorActor,
|
CrawlerMonitorActor crawlerMonitorActor,
|
||||||
LiveCrawlerMonitorActor liveCrawlerMonitorActor,
|
LiveCrawlerMonitorActor liveCrawlerMonitorActor,
|
||||||
LoaderMonitorActor loaderMonitor,
|
LoaderMonitorActor loaderMonitor,
|
||||||
@@ -68,6 +69,7 @@ public class ExecutorActorControlService {
|
|||||||
ExecutorActorStateMachines stateMachines,
|
ExecutorActorStateMachines stateMachines,
|
||||||
MigrateCrawlDataActor migrateCrawlDataActor,
|
MigrateCrawlDataActor migrateCrawlDataActor,
|
||||||
ExportAllPrecessionActor exportAllPrecessionActor,
|
ExportAllPrecessionActor exportAllPrecessionActor,
|
||||||
|
UpdateNsfwFiltersActor updateNsfwFiltersActor,
|
||||||
UpdateRssActor updateRssActor) throws SQLException {
|
UpdateRssActor updateRssActor) throws SQLException {
|
||||||
this.messageQueueFactory = messageQueueFactory;
|
this.messageQueueFactory = messageQueueFactory;
|
||||||
this.eventLog = baseServiceParams.eventLog;
|
this.eventLog = baseServiceParams.eventLog;
|
||||||
@@ -88,6 +90,7 @@ public class ExecutorActorControlService {
|
|||||||
register(ExecutorActor.PROC_CONVERTER_SPAWNER, converterMonitorFSM);
|
register(ExecutorActor.PROC_CONVERTER_SPAWNER, converterMonitorFSM);
|
||||||
register(ExecutorActor.PROC_LOADER_SPAWNER, loaderMonitor);
|
register(ExecutorActor.PROC_LOADER_SPAWNER, loaderMonitor);
|
||||||
register(ExecutorActor.PROC_CRAWLER_SPAWNER, crawlerMonitorActor);
|
register(ExecutorActor.PROC_CRAWLER_SPAWNER, crawlerMonitorActor);
|
||||||
|
register(ExecutorActor.PROC_PING_SPAWNER, pingMonitorActor);
|
||||||
register(ExecutorActor.PROC_LIVE_CRAWL_SPAWNER, liveCrawlerMonitorActor);
|
register(ExecutorActor.PROC_LIVE_CRAWL_SPAWNER, liveCrawlerMonitorActor);
|
||||||
register(ExecutorActor.PROC_EXPORT_TASKS_SPAWNER, exportTasksMonitorActor);
|
register(ExecutorActor.PROC_EXPORT_TASKS_SPAWNER, exportTasksMonitorActor);
|
||||||
|
|
||||||
@@ -109,6 +112,7 @@ public class ExecutorActorControlService {
|
|||||||
register(ExecutorActor.UPDATE_RSS, updateRssActor);
|
register(ExecutorActor.UPDATE_RSS, updateRssActor);
|
||||||
|
|
||||||
register(ExecutorActor.MIGRATE_CRAWL_DATA, migrateCrawlDataActor);
|
register(ExecutorActor.MIGRATE_CRAWL_DATA, migrateCrawlDataActor);
|
||||||
|
register(ExecutorActor.SYNC_NSFW_LISTS, updateNsfwFiltersActor);
|
||||||
|
|
||||||
if (serviceConfiguration.node() == 1) {
|
if (serviceConfiguration.node() == 1) {
|
||||||
register(ExecutorActor.PREC_EXPORT_ALL, exportAllPrecessionActor);
|
register(ExecutorActor.PREC_EXPORT_ALL, exportAllPrecessionActor);
|
||||||
|
@@ -0,0 +1,178 @@
|
|||||||
|
package nu.marginalia.actor.proc;
|
||||||
|
|
||||||
|
import com.google.gson.Gson;
|
||||||
|
import com.google.inject.Inject;
|
||||||
|
import com.google.inject.Singleton;
|
||||||
|
import nu.marginalia.actor.prototype.RecordActorPrototype;
|
||||||
|
import nu.marginalia.actor.state.ActorResumeBehavior;
|
||||||
|
import nu.marginalia.actor.state.ActorStep;
|
||||||
|
import nu.marginalia.actor.state.Resume;
|
||||||
|
import nu.marginalia.actor.state.Terminal;
|
||||||
|
import nu.marginalia.mq.MqMessageState;
|
||||||
|
import nu.marginalia.mq.persistence.MqMessageHandlerRegistry;
|
||||||
|
import nu.marginalia.mq.persistence.MqPersistence;
|
||||||
|
import nu.marginalia.mqapi.ProcessInboxNames;
|
||||||
|
import nu.marginalia.mqapi.ping.PingRequest;
|
||||||
|
import nu.marginalia.process.ProcessService;
|
||||||
|
import nu.marginalia.service.module.ServiceConfiguration;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import java.sql.SQLException;
|
||||||
|
import java.util.concurrent.ExecutionException;
|
||||||
|
import java.util.concurrent.ExecutorService;
|
||||||
|
import java.util.concurrent.Executors;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
import java.util.concurrent.atomic.AtomicBoolean;
|
||||||
|
|
||||||
|
@Singleton
|
||||||
|
public class PingMonitorActor extends RecordActorPrototype {
|
||||||
|
|
||||||
|
private final MqPersistence persistence;
|
||||||
|
private final ProcessService processService;
|
||||||
|
|
||||||
|
private final Logger logger = LoggerFactory.getLogger(getClass());
|
||||||
|
|
||||||
|
public static final int MAX_ATTEMPTS = 3;
|
||||||
|
private final String inboxName;
|
||||||
|
private final ProcessService.ProcessId processId;
|
||||||
|
private final ExecutorService executorService = Executors.newSingleThreadExecutor();
|
||||||
|
private final int node;
|
||||||
|
private final Gson gson;
|
||||||
|
|
||||||
|
public record Initial() implements ActorStep {}
|
||||||
|
@Resume(behavior = ActorResumeBehavior.RETRY)
|
||||||
|
public record Monitor(int errorAttempts) implements ActorStep {}
|
||||||
|
@Resume(behavior = ActorResumeBehavior.RESTART)
|
||||||
|
public record Run(int attempts) implements ActorStep {}
|
||||||
|
@Terminal
|
||||||
|
public record Aborted() implements ActorStep {}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ActorStep transition(ActorStep self) throws Exception {
|
||||||
|
return switch (self) {
|
||||||
|
case Initial i -> {
|
||||||
|
PingRequest request = new PingRequest();
|
||||||
|
|
||||||
|
persistence.sendNewMessage(inboxName, null, null,
|
||||||
|
"PingRequest",
|
||||||
|
gson.toJson(request),
|
||||||
|
null);
|
||||||
|
|
||||||
|
yield new Monitor(0);
|
||||||
|
}
|
||||||
|
case Monitor(int errorAttempts) -> {
|
||||||
|
for (;;) {
|
||||||
|
var messages = persistence.eavesdrop(inboxName, 1);
|
||||||
|
|
||||||
|
if (messages.isEmpty() && !processService.isRunning(processId)) {
|
||||||
|
synchronized (processId) {
|
||||||
|
processId.wait(5000);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (errorAttempts > 0) { // Reset the error counter if there is silence in the inbox
|
||||||
|
yield new Monitor(0);
|
||||||
|
}
|
||||||
|
// else continue
|
||||||
|
} else {
|
||||||
|
// Special: Associate this thread with the message so that we can get tracking
|
||||||
|
MqMessageHandlerRegistry.register(messages.getFirst().msgId());
|
||||||
|
|
||||||
|
yield new Run(0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
case Run(int attempts) -> {
|
||||||
|
try {
|
||||||
|
long startTime = System.currentTimeMillis();
|
||||||
|
var exec = new TaskExecution();
|
||||||
|
long endTime = System.currentTimeMillis();
|
||||||
|
|
||||||
|
if (exec.isError()) {
|
||||||
|
if (attempts < MAX_ATTEMPTS)
|
||||||
|
yield new Run(attempts + 1);
|
||||||
|
else
|
||||||
|
yield new Error();
|
||||||
|
}
|
||||||
|
else if (endTime - startTime < TimeUnit.SECONDS.toMillis(1)) {
|
||||||
|
// To avoid boot loops, we transition to error if the process
|
||||||
|
// didn't run for longer than 1 seconds. This might happen if
|
||||||
|
// the process crashes before it can reach the heartbeat and inbox
|
||||||
|
// stages of execution. In this case it would not report having acted
|
||||||
|
// on its message, and the process would be restarted forever without
|
||||||
|
// the attempts counter incrementing.
|
||||||
|
yield new Error("Process terminated within 1 seconds of starting");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catch (InterruptedException ex) {
|
||||||
|
// We get this exception when the process is cancelled by the user
|
||||||
|
|
||||||
|
processService.kill(processId);
|
||||||
|
setCurrentMessageToDead();
|
||||||
|
|
||||||
|
yield new Aborted();
|
||||||
|
}
|
||||||
|
|
||||||
|
yield new Monitor(attempts);
|
||||||
|
}
|
||||||
|
default -> new Error();
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
public String describe() {
|
||||||
|
return "Spawns a(n) " + processId + " process and monitors its inbox for messages";
|
||||||
|
}
|
||||||
|
|
||||||
|
@Inject
|
||||||
|
public PingMonitorActor(Gson gson,
|
||||||
|
ServiceConfiguration configuration,
|
||||||
|
MqPersistence persistence,
|
||||||
|
ProcessService processService) throws SQLException {
|
||||||
|
super(gson);
|
||||||
|
this.gson = gson;
|
||||||
|
this.node = configuration.node();
|
||||||
|
this.persistence = persistence;
|
||||||
|
this.processService = processService;
|
||||||
|
this.inboxName = ProcessInboxNames.PING_INBOX + ":" + node;
|
||||||
|
this.processId = ProcessService.ProcessId.PING;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Sets the message to dead in the database to avoid
|
||||||
|
* the service respawning on the same task when we
|
||||||
|
* re-enable this actor */
|
||||||
|
private void setCurrentMessageToDead() {
|
||||||
|
try {
|
||||||
|
var messages = persistence.eavesdrop(inboxName, 1);
|
||||||
|
|
||||||
|
if (messages.isEmpty()) // Possibly a race condition where the task is already finished
|
||||||
|
return;
|
||||||
|
|
||||||
|
var theMessage = messages.iterator().next();
|
||||||
|
persistence.updateMessageState(theMessage.msgId(), MqMessageState.DEAD);
|
||||||
|
}
|
||||||
|
catch (SQLException ex) {
|
||||||
|
logger.error("Tried but failed to set the message for " + processId + " to dead", ex);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Encapsulates the execution of the process in a separate thread so that
|
||||||
|
* we can interrupt the thread if the process is cancelled */
|
||||||
|
private class TaskExecution {
|
||||||
|
private final AtomicBoolean error = new AtomicBoolean(false);
|
||||||
|
public TaskExecution() throws ExecutionException, InterruptedException {
|
||||||
|
// Run this call in a separate thread so that this thread can be interrupted waiting for it
|
||||||
|
executorService.submit(() -> {
|
||||||
|
try {
|
||||||
|
processService.trigger(processId);
|
||||||
|
} catch (Exception e) {
|
||||||
|
logger.warn("Error in triggering process", e);
|
||||||
|
error.set(true);
|
||||||
|
}
|
||||||
|
}).get(); // Wait for the process to start
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isError() {
|
||||||
|
return error.get();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@@ -0,0 +1,53 @@
|
|||||||
|
package nu.marginalia.actor.task;
|
||||||
|
|
||||||
|
import com.google.gson.Gson;
|
||||||
|
import com.google.inject.Inject;
|
||||||
|
import com.google.inject.Singleton;
|
||||||
|
import nu.marginalia.actor.prototype.RecordActorPrototype;
|
||||||
|
import nu.marginalia.actor.state.ActorStep;
|
||||||
|
import nu.marginalia.nsfw.NsfwDomainFilter;
|
||||||
|
import nu.marginalia.service.module.ServiceConfiguration;
|
||||||
|
|
||||||
|
@Singleton
|
||||||
|
public class UpdateNsfwFiltersActor extends RecordActorPrototype {
|
||||||
|
private final ServiceConfiguration serviceConfiguration;
|
||||||
|
private final NsfwDomainFilter nsfwDomainFilter;
|
||||||
|
|
||||||
|
public record Initial() implements ActorStep {}
|
||||||
|
public record Run() implements ActorStep {}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ActorStep transition(ActorStep self) throws Exception {
|
||||||
|
return switch(self) {
|
||||||
|
case Initial() -> {
|
||||||
|
if (serviceConfiguration.node() != 1) {
|
||||||
|
yield new Error("This actor can only run on node 1");
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
yield new Run();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
case Run() -> {
|
||||||
|
nsfwDomainFilter.fetchLists();
|
||||||
|
yield new End();
|
||||||
|
}
|
||||||
|
default -> new Error();
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String describe() {
|
||||||
|
return "Sync NSFW filters";
|
||||||
|
}
|
||||||
|
|
||||||
|
@Inject
|
||||||
|
public UpdateNsfwFiltersActor(Gson gson,
|
||||||
|
ServiceConfiguration serviceConfiguration,
|
||||||
|
NsfwDomainFilter nsfwDomainFilter)
|
||||||
|
{
|
||||||
|
super(gson);
|
||||||
|
this.serviceConfiguration = serviceConfiguration;
|
||||||
|
this.nsfwDomainFilter = nsfwDomainFilter;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@@ -8,6 +8,7 @@ import nu.marginalia.crawl.CrawlerMain;
|
|||||||
import nu.marginalia.index.IndexConstructorMain;
|
import nu.marginalia.index.IndexConstructorMain;
|
||||||
import nu.marginalia.livecrawler.LiveCrawlerMain;
|
import nu.marginalia.livecrawler.LiveCrawlerMain;
|
||||||
import nu.marginalia.loading.LoaderMain;
|
import nu.marginalia.loading.LoaderMain;
|
||||||
|
import nu.marginalia.ping.PingMain;
|
||||||
import nu.marginalia.service.control.ServiceEventLog;
|
import nu.marginalia.service.control.ServiceEventLog;
|
||||||
import nu.marginalia.service.server.BaseServiceParams;
|
import nu.marginalia.service.server.BaseServiceParams;
|
||||||
import nu.marginalia.task.ExportTasksMain;
|
import nu.marginalia.task.ExportTasksMain;
|
||||||
@@ -41,6 +42,7 @@ public class ProcessService {
|
|||||||
return switch (id) {
|
return switch (id) {
|
||||||
case "converter" -> ProcessId.CONVERTER;
|
case "converter" -> ProcessId.CONVERTER;
|
||||||
case "crawler" -> ProcessId.CRAWLER;
|
case "crawler" -> ProcessId.CRAWLER;
|
||||||
|
case "ping" -> ProcessId.PING;
|
||||||
case "loader" -> ProcessId.LOADER;
|
case "loader" -> ProcessId.LOADER;
|
||||||
case "export-tasks" -> ProcessId.EXPORT_TASKS;
|
case "export-tasks" -> ProcessId.EXPORT_TASKS;
|
||||||
case "index-constructor" -> ProcessId.INDEX_CONSTRUCTOR;
|
case "index-constructor" -> ProcessId.INDEX_CONSTRUCTOR;
|
||||||
@@ -50,6 +52,7 @@ public class ProcessService {
|
|||||||
|
|
||||||
public enum ProcessId {
|
public enum ProcessId {
|
||||||
CRAWLER(CrawlerMain.class),
|
CRAWLER(CrawlerMain.class),
|
||||||
|
PING(PingMain.class),
|
||||||
LIVE_CRAWLER(LiveCrawlerMain.class),
|
LIVE_CRAWLER(LiveCrawlerMain.class),
|
||||||
CONVERTER(ConverterMain.class),
|
CONVERTER(ConverterMain.class),
|
||||||
LOADER(LoaderMain.class),
|
LOADER(LoaderMain.class),
|
||||||
@@ -68,6 +71,7 @@ public class ProcessService {
|
|||||||
case LIVE_CRAWLER -> "LIVE_CRAWLER_PROCESS_OPTS";
|
case LIVE_CRAWLER -> "LIVE_CRAWLER_PROCESS_OPTS";
|
||||||
case CONVERTER -> "CONVERTER_PROCESS_OPTS";
|
case CONVERTER -> "CONVERTER_PROCESS_OPTS";
|
||||||
case LOADER -> "LOADER_PROCESS_OPTS";
|
case LOADER -> "LOADER_PROCESS_OPTS";
|
||||||
|
case PING -> "PING_PROCESS_OPTS";
|
||||||
case INDEX_CONSTRUCTOR -> "INDEX_CONSTRUCTION_PROCESS_OPTS";
|
case INDEX_CONSTRUCTOR -> "INDEX_CONSTRUCTION_PROCESS_OPTS";
|
||||||
case EXPORT_TASKS -> "EXPORT_TASKS_PROCESS_OPTS";
|
case EXPORT_TASKS -> "EXPORT_TASKS_PROCESS_OPTS";
|
||||||
};
|
};
|
||||||
|
@@ -27,10 +27,12 @@ public class DbBrowseDomainsRandom {
|
|||||||
public List<BrowseResult> getRandomDomains(int count, DomainBlacklist blacklist, int set) {
|
public List<BrowseResult> getRandomDomains(int count, DomainBlacklist blacklist, int set) {
|
||||||
|
|
||||||
final String q = """
|
final String q = """
|
||||||
SELECT DOMAIN_ID, DOMAIN_NAME, INDEXED
|
SELECT EC_RANDOM_DOMAINS.DOMAIN_ID, DOMAIN_NAME, INDEXED
|
||||||
FROM EC_RANDOM_DOMAINS
|
FROM EC_RANDOM_DOMAINS
|
||||||
INNER JOIN EC_DOMAIN ON EC_DOMAIN.ID=DOMAIN_ID
|
INNER JOIN EC_DOMAIN ON EC_DOMAIN.ID=DOMAIN_ID
|
||||||
|
LEFT JOIN DOMAIN_AVAILABILITY_INFORMATION DAI ON DAI.DOMAIN_ID=EC_RANDOM_DOMAINS.DOMAIN_ID
|
||||||
WHERE STATE<2
|
WHERE STATE<2
|
||||||
|
AND SERVER_AVAILABLE
|
||||||
AND DOMAIN_SET=?
|
AND DOMAIN_SET=?
|
||||||
AND DOMAIN_ALIAS IS NULL
|
AND DOMAIN_ALIAS IS NULL
|
||||||
ORDER BY RAND()
|
ORDER BY RAND()
|
||||||
|
@@ -22,6 +22,7 @@ dependencies {
|
|||||||
implementation project(':code:common:db')
|
implementation project(':code:common:db')
|
||||||
implementation project(':code:libraries:blocking-thread-pool')
|
implementation project(':code:libraries:blocking-thread-pool')
|
||||||
implementation project(':code:libraries:message-queue')
|
implementation project(':code:libraries:message-queue')
|
||||||
|
implementation project(':code:libraries:domain-lock')
|
||||||
|
|
||||||
implementation project(':code:execution:api')
|
implementation project(':code:execution:api')
|
||||||
implementation project(':code:processes:crawling-process:ft-content-type')
|
implementation project(':code:processes:crawling-process:ft-content-type')
|
||||||
|
@@ -12,6 +12,7 @@ import org.slf4j.LoggerFactory;
|
|||||||
|
|
||||||
import java.net.URI;
|
import java.net.URI;
|
||||||
import java.net.URISyntaxException;
|
import java.net.URISyntaxException;
|
||||||
|
import java.time.Duration;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import java.util.concurrent.TimeUnit;
|
import java.util.concurrent.TimeUnit;
|
||||||
@@ -35,15 +36,22 @@ public class DomSampleService {
|
|||||||
|
|
||||||
if (StringUtils.isEmpty(browserlessAddress) || serviceConfiguration.node() > 1) {
|
if (StringUtils.isEmpty(browserlessAddress) || serviceConfiguration.node() > 1) {
|
||||||
logger.warn("Live capture service will not run");
|
logger.warn("Live capture service will not run");
|
||||||
browserlessURI = null; // satisfy final
|
browserlessURI = null;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
browserlessURI = new URI(browserlessAddress);
|
browserlessURI = new URI(browserlessAddress);
|
||||||
|
|
||||||
Thread.ofPlatform().daemon().start(this::run);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void start() {
|
||||||
|
if (browserlessURI == null) {
|
||||||
|
logger.warn("DomSampleService is not enabled due to missing browserless URI or multi-node configuration");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
Thread.ofPlatform().daemon().start(this::run);
|
||||||
|
}
|
||||||
|
|
||||||
public void syncDomains() {
|
public void syncDomains() {
|
||||||
Set<String> dbDomains = new HashSet<>();
|
Set<String> dbDomains = new HashSet<>();
|
||||||
|
|
||||||
@@ -102,8 +110,7 @@ public class DomSampleService {
|
|||||||
private void updateDomain(BrowserlessClient client, String domain) {
|
private void updateDomain(BrowserlessClient client, String domain) {
|
||||||
var rootUrl = "https://" + domain + "/";
|
var rootUrl = "https://" + domain + "/";
|
||||||
try {
|
try {
|
||||||
var content = client.annotatedContent(rootUrl,
|
var content = client.annotatedContent(rootUrl, new BrowserlessClient.GotoOptions("load", Duration.ofSeconds(10).toMillis()));
|
||||||
BrowserlessClient.GotoOptions.defaultValues());
|
|
||||||
|
|
||||||
if (content.isPresent()) {
|
if (content.isPresent()) {
|
||||||
db.saveSample(domain, rootUrl, content.get());
|
db.saveSample(domain, rootUrl, content.get());
|
||||||
|
@@ -26,7 +26,9 @@ public class DomSampleDb implements AutoCloseable {
|
|||||||
stmt.executeUpdate("CREATE TABLE IF NOT EXISTS samples (url TEXT PRIMARY KEY, domain TEXT, sample BLOB, requests BLOB, accepted_popover BOOLEAN DEFAULT FALSE)");
|
stmt.executeUpdate("CREATE TABLE IF NOT EXISTS samples (url TEXT PRIMARY KEY, domain TEXT, sample BLOB, requests BLOB, accepted_popover BOOLEAN DEFAULT FALSE)");
|
||||||
stmt.executeUpdate("CREATE INDEX IF NOT EXISTS domain_index ON samples (domain)");
|
stmt.executeUpdate("CREATE INDEX IF NOT EXISTS domain_index ON samples (domain)");
|
||||||
stmt.executeUpdate("CREATE TABLE IF NOT EXISTS schedule (domain TEXT PRIMARY KEY, last_fetch TIMESTAMP DEFAULT NULL)");
|
stmt.executeUpdate("CREATE TABLE IF NOT EXISTS schedule (domain TEXT PRIMARY KEY, last_fetch TIMESTAMP DEFAULT NULL)");
|
||||||
|
stmt.execute("PRAGMA journal_mode=WAL");
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public void syncDomains(Set<String> domains) {
|
public void syncDomains(Set<String> domains) {
|
||||||
@@ -151,8 +153,6 @@ public class DomSampleDb implements AutoCloseable {
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
record Request(String url, String method, String timestamp, boolean acceptedPopover) {}
|
|
||||||
|
|
||||||
public void saveSampleRaw(String domain, String url, String sample, String requests, boolean acceptedPopover) throws SQLException {
|
public void saveSampleRaw(String domain, String url, String sample, String requests, boolean acceptedPopover) throws SQLException {
|
||||||
try (var stmt = connection.prepareStatement("""
|
try (var stmt = connection.prepareStatement("""
|
||||||
INSERT OR REPLACE
|
INSERT OR REPLACE
|
||||||
|
@@ -141,7 +141,7 @@ public class BrowserlessClient implements AutoCloseable {
|
|||||||
|
|
||||||
public record GotoOptions(String waitUntil, long timeout) {
|
public record GotoOptions(String waitUntil, long timeout) {
|
||||||
public static GotoOptions defaultValues() {
|
public static GotoOptions defaultValues() {
|
||||||
return new GotoOptions("load", Duration.ofSeconds(10).toMillis());
|
return new GotoOptions("networkidle2", Duration.ofSeconds(10).toMillis());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -126,7 +126,6 @@ public class LiveCaptureGrpcService
|
|||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
EdgeDomain domain = domainNameOpt.get();
|
EdgeDomain domain = domainNameOpt.get();
|
||||||
String domainNameStr = domain.toString();
|
|
||||||
|
|
||||||
if (!isValidDomainForCapture(domain)) {
|
if (!isValidDomainForCapture(domain)) {
|
||||||
ScreenshotDbOperations.flagDomainAsFetched(conn, domain);
|
ScreenshotDbOperations.flagDomainAsFetched(conn, domain);
|
||||||
|
@@ -1,66 +0,0 @@
|
|||||||
package nu.marginalia.rss.svc;
|
|
||||||
|
|
||||||
import nu.marginalia.model.EdgeDomain;
|
|
||||||
|
|
||||||
import java.util.Map;
|
|
||||||
import java.util.concurrent.ConcurrentHashMap;
|
|
||||||
import java.util.concurrent.Semaphore;
|
|
||||||
|
|
||||||
/** Holds lock objects for each domain, to prevent multiple threads from
|
|
||||||
* crawling the same domain at the same time.
|
|
||||||
*/
|
|
||||||
public class DomainLocks {
|
|
||||||
// The locks are stored in a map, with the domain name as the key. This map will grow
|
|
||||||
// relatively big, but should be manageable since the number of domains is limited to
|
|
||||||
// a few hundred thousand typically.
|
|
||||||
private final Map<String, Semaphore> locks = new ConcurrentHashMap<>();
|
|
||||||
|
|
||||||
/** Returns a lock object corresponding to the given domain. The object is returned as-is,
|
|
||||||
* and may be held by another thread. The caller is responsible for locking and releasing the lock.
|
|
||||||
*/
|
|
||||||
public DomainLock lockDomain(EdgeDomain domain) throws InterruptedException {
|
|
||||||
return new DomainLock(domain.toString(),
|
|
||||||
locks.computeIfAbsent(domain.topDomain.toLowerCase(), this::defaultPermits));
|
|
||||||
}
|
|
||||||
|
|
||||||
private Semaphore defaultPermits(String topDomain) {
|
|
||||||
if (topDomain.equals("wordpress.com"))
|
|
||||||
return new Semaphore(16);
|
|
||||||
if (topDomain.equals("blogspot.com"))
|
|
||||||
return new Semaphore(8);
|
|
||||||
|
|
||||||
if (topDomain.equals("neocities.org"))
|
|
||||||
return new Semaphore(4);
|
|
||||||
if (topDomain.equals("github.io"))
|
|
||||||
return new Semaphore(4);
|
|
||||||
|
|
||||||
if (topDomain.equals("substack.com")) {
|
|
||||||
return new Semaphore(1);
|
|
||||||
}
|
|
||||||
if (topDomain.endsWith(".edu")) {
|
|
||||||
return new Semaphore(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
return new Semaphore(2);
|
|
||||||
}
|
|
||||||
|
|
||||||
public static class DomainLock implements AutoCloseable {
|
|
||||||
private final String domainName;
|
|
||||||
private final Semaphore semaphore;
|
|
||||||
|
|
||||||
DomainLock(String domainName, Semaphore semaphore) throws InterruptedException {
|
|
||||||
this.domainName = domainName;
|
|
||||||
this.semaphore = semaphore;
|
|
||||||
|
|
||||||
Thread.currentThread().setName("fetching:" + domainName + " [await domain lock]");
|
|
||||||
semaphore.acquire();
|
|
||||||
Thread.currentThread().setName("fetching:" + domainName);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void close() {
|
|
||||||
semaphore.release();
|
|
||||||
Thread.currentThread().setName("fetching:" + domainName + " [wrapping up]");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
@@ -5,6 +5,8 @@ import com.opencsv.CSVReader;
|
|||||||
import nu.marginalia.WmsaHome;
|
import nu.marginalia.WmsaHome;
|
||||||
import nu.marginalia.contenttype.ContentType;
|
import nu.marginalia.contenttype.ContentType;
|
||||||
import nu.marginalia.contenttype.DocumentBodyToString;
|
import nu.marginalia.contenttype.DocumentBodyToString;
|
||||||
|
import nu.marginalia.coordination.DomainCoordinator;
|
||||||
|
import nu.marginalia.coordination.DomainLock;
|
||||||
import nu.marginalia.executor.client.ExecutorClient;
|
import nu.marginalia.executor.client.ExecutorClient;
|
||||||
import nu.marginalia.model.EdgeDomain;
|
import nu.marginalia.model.EdgeDomain;
|
||||||
import nu.marginalia.nodecfg.NodeConfigurationService;
|
import nu.marginalia.nodecfg.NodeConfigurationService;
|
||||||
@@ -51,12 +53,13 @@ public class FeedFetcherService {
|
|||||||
private final ServiceHeartbeat serviceHeartbeat;
|
private final ServiceHeartbeat serviceHeartbeat;
|
||||||
private final ExecutorClient executorClient;
|
private final ExecutorClient executorClient;
|
||||||
|
|
||||||
private final DomainLocks domainLocks = new DomainLocks();
|
private final DomainCoordinator domainCoordinator;
|
||||||
|
|
||||||
private volatile boolean updating;
|
private volatile boolean updating;
|
||||||
|
|
||||||
@Inject
|
@Inject
|
||||||
public FeedFetcherService(FeedDb feedDb,
|
public FeedFetcherService(FeedDb feedDb,
|
||||||
|
DomainCoordinator domainCoordinator,
|
||||||
FileStorageService fileStorageService,
|
FileStorageService fileStorageService,
|
||||||
NodeConfigurationService nodeConfigurationService,
|
NodeConfigurationService nodeConfigurationService,
|
||||||
ServiceHeartbeat serviceHeartbeat,
|
ServiceHeartbeat serviceHeartbeat,
|
||||||
@@ -67,6 +70,7 @@ public class FeedFetcherService {
|
|||||||
this.nodeConfigurationService = nodeConfigurationService;
|
this.nodeConfigurationService = nodeConfigurationService;
|
||||||
this.serviceHeartbeat = serviceHeartbeat;
|
this.serviceHeartbeat = serviceHeartbeat;
|
||||||
this.executorClient = executorClient;
|
this.executorClient = executorClient;
|
||||||
|
this.domainCoordinator = domainCoordinator;
|
||||||
}
|
}
|
||||||
|
|
||||||
public enum UpdateMode {
|
public enum UpdateMode {
|
||||||
@@ -132,7 +136,7 @@ public class FeedFetcherService {
|
|||||||
};
|
};
|
||||||
|
|
||||||
FetchResult feedData;
|
FetchResult feedData;
|
||||||
try (DomainLocks.DomainLock domainLock = domainLocks.lockDomain(new EdgeDomain(feed.domain()))) {
|
try (DomainLock domainLock = domainCoordinator.lockDomain(new EdgeDomain(feed.domain()))) {
|
||||||
feedData = fetchFeedData(feed, client, fetchExecutor, ifModifiedSinceDate, ifNoneMatchTag);
|
feedData = fetchFeedData(feed, client, fetchExecutor, ifModifiedSinceDate, ifNoneMatchTag);
|
||||||
} catch (Exception ex) {
|
} catch (Exception ex) {
|
||||||
feedData = new FetchResult.TransientError();
|
feedData = new FetchResult.TransientError();
|
||||||
|
@@ -108,7 +108,7 @@ public class BrowserlessClientTest {
|
|||||||
DomSampleDb dbop = new DomSampleDb(Path.of("/tmp/dom-sample.db"))
|
DomSampleDb dbop = new DomSampleDb(Path.of("/tmp/dom-sample.db"))
|
||||||
) {
|
) {
|
||||||
var content = client.annotatedContent("https://marginalia.nu/", BrowserlessClient.GotoOptions.defaultValues()).orElseThrow();
|
var content = client.annotatedContent("https://marginalia.nu/", BrowserlessClient.GotoOptions.defaultValues()).orElseThrow();
|
||||||
dbop.saveSample("marginalia.nu", "https://www.thesodacanstove.com/alcohol-stove/how-to-build/", content);
|
dbop.saveSample("marginalia.nu", "https://marginalia.nu/", content);
|
||||||
System.out.println(content);
|
System.out.println(content);
|
||||||
Assertions.assertFalse(content.isBlank(), "Content should not be empty");
|
Assertions.assertFalse(content.isBlank(), "Content should not be empty");
|
||||||
|
|
||||||
|
43
code/functions/nsfw-domain-filter/build.gradle
Normal file
43
code/functions/nsfw-domain-filter/build.gradle
Normal file
@@ -0,0 +1,43 @@
|
|||||||
|
plugins {
|
||||||
|
id 'java'
|
||||||
|
id 'jvm-test-suite'
|
||||||
|
}
|
||||||
|
|
||||||
|
java {
|
||||||
|
toolchain {
|
||||||
|
languageVersion.set(JavaLanguageVersion.of(rootProject.ext.jvmVersion))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
apply from: "$rootProject.projectDir/srcsets.gradle"
|
||||||
|
|
||||||
|
dependencies {
|
||||||
|
|
||||||
|
implementation project(':code:common:config')
|
||||||
|
implementation project(':code:common:model')
|
||||||
|
implementation project(':code:common:db')
|
||||||
|
|
||||||
|
|
||||||
|
implementation libs.bundles.slf4j
|
||||||
|
implementation libs.prometheus
|
||||||
|
implementation libs.guava
|
||||||
|
implementation libs.commons.lang3
|
||||||
|
implementation dependencies.create(libs.guice.get()) {
|
||||||
|
exclude group: 'com.google.guava'
|
||||||
|
}
|
||||||
|
implementation libs.notnull
|
||||||
|
implementation libs.fastutil
|
||||||
|
implementation libs.bundles.mariadb
|
||||||
|
|
||||||
|
|
||||||
|
testImplementation libs.bundles.slf4j.test
|
||||||
|
testImplementation libs.bundles.junit
|
||||||
|
testImplementation libs.mockito
|
||||||
|
|
||||||
|
testImplementation platform('org.testcontainers:testcontainers-bom:1.17.4')
|
||||||
|
testImplementation libs.commons.codec
|
||||||
|
testImplementation project(':code:common:service')
|
||||||
|
testImplementation 'org.testcontainers:mariadb:1.17.4'
|
||||||
|
testImplementation 'org.testcontainers:junit-jupiter:1.17.4'
|
||||||
|
testImplementation project(':code:libraries:test-helpers')
|
||||||
|
}
|
@@ -0,0 +1,192 @@
|
|||||||
|
package nu.marginalia.nsfw;
|
||||||
|
|
||||||
|
import com.google.inject.Inject;
|
||||||
|
import com.google.inject.Singleton;
|
||||||
|
import com.google.inject.name.Named;
|
||||||
|
import com.zaxxer.hikari.HikariDataSource;
|
||||||
|
import it.unimi.dsi.fastutil.ints.IntOpenHashSet;
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import java.io.BufferedReader;
|
||||||
|
import java.io.ByteArrayInputStream;
|
||||||
|
import java.io.InputStreamReader;
|
||||||
|
import java.net.http.HttpClient;
|
||||||
|
import java.net.http.HttpRequest;
|
||||||
|
import java.net.http.HttpResponse;
|
||||||
|
import java.sql.SQLException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
import java.util.zip.GZIPInputStream;
|
||||||
|
|
||||||
|
@Singleton
|
||||||
|
public class NsfwDomainFilter {
|
||||||
|
private final HikariDataSource dataSource;
|
||||||
|
|
||||||
|
private final List<String> dangerLists;
|
||||||
|
private final List<String> smutLists;
|
||||||
|
|
||||||
|
private volatile IntOpenHashSet blockedDomainIdsTier1 = new IntOpenHashSet();
|
||||||
|
private volatile IntOpenHashSet blockedDomainIdsTier2 = new IntOpenHashSet();
|
||||||
|
|
||||||
|
private static final Logger logger = LoggerFactory.getLogger(NsfwDomainFilter.class);
|
||||||
|
|
||||||
|
public static final int NSFW_DISABLE = 0;
|
||||||
|
public static final int NSFW_BLOCK_DANGER = 1;
|
||||||
|
public static final int NSFW_BLOCK_SMUT = 2;
|
||||||
|
|
||||||
|
@Inject
|
||||||
|
public NsfwDomainFilter(HikariDataSource dataSource,
|
||||||
|
@Named("nsfw.dangerLists") List<String> dangerLists,
|
||||||
|
@Named("nsfw.smutLists") List<String> smutLists
|
||||||
|
) {
|
||||||
|
this.dataSource = dataSource;
|
||||||
|
|
||||||
|
this.dangerLists = dangerLists;
|
||||||
|
this.smutLists = smutLists;
|
||||||
|
|
||||||
|
Thread.ofPlatform().daemon().name("NsfwDomainFilterSync").start(() -> {
|
||||||
|
while (true) {
|
||||||
|
sync();
|
||||||
|
try {
|
||||||
|
TimeUnit.HOURS.sleep(1);
|
||||||
|
} catch (InterruptedException e) {
|
||||||
|
Thread.currentThread().interrupt();
|
||||||
|
break; // Exit the loop if interrupted
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isBlocked(int domainId, int tier) {
|
||||||
|
if (tier == 0)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (tier >= 1 && blockedDomainIdsTier1.contains(domainId))
|
||||||
|
return true;
|
||||||
|
if (tier >= 2 && blockedDomainIdsTier2.contains(domainId))
|
||||||
|
return true;
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
private synchronized void sync() {
|
||||||
|
try (var conn = dataSource.getConnection();
|
||||||
|
var stmt = conn.prepareStatement("SELECT ID, TIER FROM NSFW_DOMAINS")
|
||||||
|
) {
|
||||||
|
var rs = stmt.executeQuery();
|
||||||
|
IntOpenHashSet tier1 = new IntOpenHashSet();
|
||||||
|
IntOpenHashSet tier2 = new IntOpenHashSet();
|
||||||
|
|
||||||
|
while (rs.next()) {
|
||||||
|
int domainId = rs.getInt("ID");
|
||||||
|
int tier = rs.getInt("TIER");
|
||||||
|
|
||||||
|
switch (tier) {
|
||||||
|
case 1 -> tier1.add(domainId);
|
||||||
|
case 2 -> tier2.add(domainId);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
this.blockedDomainIdsTier1 = tier1;
|
||||||
|
this.blockedDomainIdsTier2 = tier2;
|
||||||
|
|
||||||
|
logger.info("NSFW domain filter synced: {} tier 1, {} tier 2", tier1.size(), tier2.size());
|
||||||
|
|
||||||
|
}
|
||||||
|
catch (SQLException ex) {
|
||||||
|
logger.error("Failed to sync NSFW domain filter", ex);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public synchronized void fetchLists() {
|
||||||
|
try (var conn = dataSource.getConnection();
|
||||||
|
HttpClient client = HttpClient.newBuilder()
|
||||||
|
.followRedirects(HttpClient.Redirect.ALWAYS)
|
||||||
|
.build();
|
||||||
|
var stmt = conn.createStatement();
|
||||||
|
var insertStmt = conn.prepareStatement("INSERT IGNORE INTO NSFW_DOMAINS_TMP (ID, TIER) SELECT ID, ? FROM EC_DOMAIN WHERE DOMAIN_NAME = ?")) {
|
||||||
|
|
||||||
|
stmt.execute("DROP TABLE IF EXISTS NSFW_DOMAINS_TMP");
|
||||||
|
stmt.execute("CREATE TABLE NSFW_DOMAINS_TMP LIKE NSFW_DOMAINS");
|
||||||
|
|
||||||
|
List<String> combinedDangerList = new ArrayList<>(10_000);
|
||||||
|
for (var dangerListUrl : dangerLists) {
|
||||||
|
combinedDangerList.addAll(fetchList(client, dangerListUrl));
|
||||||
|
}
|
||||||
|
|
||||||
|
for (String domain : combinedDangerList) {
|
||||||
|
insertStmt.setInt(1, NSFW_BLOCK_DANGER);
|
||||||
|
insertStmt.setString(2, domain);
|
||||||
|
insertStmt.execute();
|
||||||
|
}
|
||||||
|
|
||||||
|
List<String> combinedSmutList = new ArrayList<>(10_000);
|
||||||
|
for (var smutListUrl : smutLists) {
|
||||||
|
combinedSmutList.addAll(fetchList(client, smutListUrl));
|
||||||
|
}
|
||||||
|
|
||||||
|
for (String domain : combinedSmutList) {
|
||||||
|
insertStmt.setInt(1, NSFW_BLOCK_SMUT);
|
||||||
|
insertStmt.setString(2, domain);
|
||||||
|
insertStmt.addBatch();
|
||||||
|
insertStmt.execute();
|
||||||
|
}
|
||||||
|
|
||||||
|
stmt.execute("""
|
||||||
|
DROP TABLE IF EXISTS NSFW_DOMAINS
|
||||||
|
""");
|
||||||
|
stmt.execute("""
|
||||||
|
RENAME TABLE NSFW_DOMAINS_TMP TO NSFW_DOMAINS
|
||||||
|
""");
|
||||||
|
sync();
|
||||||
|
}
|
||||||
|
catch (SQLException ex) {
|
||||||
|
logger.error("Failed to fetch NSFW domain lists", ex);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<String> fetchList(HttpClient client, String url) {
|
||||||
|
|
||||||
|
logger.info("Fetching NSFW domain list from {}", url);
|
||||||
|
|
||||||
|
var request = HttpRequest.newBuilder()
|
||||||
|
.uri(java.net.URI.create(url))
|
||||||
|
.build();
|
||||||
|
|
||||||
|
try {
|
||||||
|
if (url.endsWith(".gz")) {
|
||||||
|
var response = client.send(request, HttpResponse.BodyHandlers.ofByteArray());
|
||||||
|
|
||||||
|
byte[] body = response.body();
|
||||||
|
|
||||||
|
try (var reader = new BufferedReader(new InputStreamReader(new GZIPInputStream(new ByteArrayInputStream(body))))) {
|
||||||
|
return reader.lines()
|
||||||
|
.filter(StringUtils::isNotEmpty)
|
||||||
|
.toList();
|
||||||
|
} catch (Exception e) {
|
||||||
|
logger.error("Error reading GZIP response from {}", url, e);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
var response = client.send(request, HttpResponse.BodyHandlers.ofString());
|
||||||
|
if (response.statusCode() == 200) {
|
||||||
|
|
||||||
|
return Arrays.stream(StringUtils.split(response.body(), "\n"))
|
||||||
|
.filter(StringUtils::isNotEmpty)
|
||||||
|
.toList();
|
||||||
|
} else {
|
||||||
|
logger.warn("Failed to fetch list from {}: HTTP {}", url, response.statusCode());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catch (Exception e) {
|
||||||
|
logger.error("Error fetching NSFW domain list from {}", url, e);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
return List.of();
|
||||||
|
}
|
||||||
|
}
|
@@ -0,0 +1,30 @@
|
|||||||
|
package nu.marginalia.nsfw;
|
||||||
|
|
||||||
|
import com.google.inject.AbstractModule;
|
||||||
|
import com.google.inject.Provides;
|
||||||
|
import jakarta.inject.Named;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
public class NsfwFilterModule extends AbstractModule {
|
||||||
|
|
||||||
|
@Provides
|
||||||
|
@Named("nsfw.dangerLists")
|
||||||
|
public List<String> nsfwDomainLists1() {
|
||||||
|
return List.of(
|
||||||
|
"https://raw.githubusercontent.com/olbat/ut1-blacklists/refs/heads/master/blacklists/cryptojacking/domains",
|
||||||
|
"https://raw.githubusercontent.com/olbat/ut1-blacklists/refs/heads/master/blacklists/malware/domains",
|
||||||
|
"https://raw.githubusercontent.com/olbat/ut1-blacklists/refs/heads/master/blacklists/phishing/domains"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
@Provides
|
||||||
|
@Named("nsfw.smutLists")
|
||||||
|
public List<String> nsfwDomainLists2() {
|
||||||
|
return List.of(
|
||||||
|
"https://github.com/olbat/ut1-blacklists/raw/refs/heads/master/blacklists/adult/domains.gz",
|
||||||
|
"https://raw.githubusercontent.com/olbat/ut1-blacklists/refs/heads/master/blacklists/gambling/domains"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void configure() {}
|
||||||
|
}
|
@@ -0,0 +1,108 @@
|
|||||||
|
package nu.marginalia.nsfw;
|
||||||
|
|
||||||
|
|
||||||
|
import com.google.inject.AbstractModule;
|
||||||
|
import com.google.inject.Guice;
|
||||||
|
import com.google.inject.Provides;
|
||||||
|
import com.zaxxer.hikari.HikariConfig;
|
||||||
|
import com.zaxxer.hikari.HikariDataSource;
|
||||||
|
import jakarta.inject.Named;
|
||||||
|
import nu.marginalia.test.TestMigrationLoader;
|
||||||
|
import org.junit.jupiter.api.BeforeAll;
|
||||||
|
import org.junit.jupiter.api.Tag;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
import org.testcontainers.containers.MariaDBContainer;
|
||||||
|
import org.testcontainers.junit.jupiter.Container;
|
||||||
|
import org.testcontainers.junit.jupiter.Testcontainers;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.nio.file.Files;
|
||||||
|
import java.nio.file.Path;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import static org.junit.jupiter.api.Assertions.assertFalse;
|
||||||
|
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||||
|
|
||||||
|
|
||||||
|
@Tag("slow")
|
||||||
|
@Testcontainers
|
||||||
|
class NsfwDomainFilterTest extends AbstractModule {
|
||||||
|
|
||||||
|
@Container
|
||||||
|
static MariaDBContainer<?> mariaDBContainer = new MariaDBContainer<>("mariadb")
|
||||||
|
.withDatabaseName("WMSA_prod")
|
||||||
|
.withUsername("wmsa")
|
||||||
|
.withPassword("wmsa")
|
||||||
|
.withNetworkAliases("mariadb");
|
||||||
|
|
||||||
|
static HikariDataSource dataSource;
|
||||||
|
static Path tempDir;
|
||||||
|
|
||||||
|
@BeforeAll
|
||||||
|
public static void setUpDb() throws IOException {
|
||||||
|
tempDir = Files.createTempDirectory(NsfwDomainFilterTest.class.getSimpleName());
|
||||||
|
|
||||||
|
System.setProperty("system.homePath", tempDir.toString());
|
||||||
|
|
||||||
|
HikariConfig config = new HikariConfig();
|
||||||
|
config.setJdbcUrl(mariaDBContainer.getJdbcUrl());
|
||||||
|
config.setUsername("wmsa");
|
||||||
|
config.setPassword("wmsa");
|
||||||
|
|
||||||
|
dataSource = new HikariDataSource(config);
|
||||||
|
|
||||||
|
TestMigrationLoader.flywayMigration(dataSource);
|
||||||
|
|
||||||
|
try (var conn = dataSource.getConnection();
|
||||||
|
var stmt = conn.prepareStatement("INSERT INTO EC_DOMAIN (DOMAIN_NAME, DOMAIN_TOP, NODE_AFFINITY) VALUES (?, ?, 1)")
|
||||||
|
) {
|
||||||
|
|
||||||
|
// Ensure the database is ready
|
||||||
|
conn.createStatement().execute("SELECT 1");
|
||||||
|
|
||||||
|
stmt.setString(1, "www.google.com");
|
||||||
|
stmt.setString(2, "google.com");
|
||||||
|
stmt.executeUpdate();
|
||||||
|
stmt.setString(1, "www.bing.com");
|
||||||
|
stmt.setString(2, "bing.com");
|
||||||
|
stmt.executeUpdate();
|
||||||
|
} catch (Exception e) {
|
||||||
|
throw new RuntimeException("Failed to connect to the database", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Provides
|
||||||
|
@Named("nsfw.dangerLists")
|
||||||
|
public List<String> nsfwDomainLists1() {
|
||||||
|
return List.of(
|
||||||
|
"https://downloads.marginalia.nu/test/list1"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Provides
|
||||||
|
@Named("nsfw.smutLists")
|
||||||
|
public List<String> nsfwDomainLists2() {
|
||||||
|
return List.of(
|
||||||
|
"https://downloads.marginalia.nu/test/list2.gz"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void configure() {
|
||||||
|
bind(HikariDataSource.class).toInstance(dataSource);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void test() {
|
||||||
|
var filter = Guice
|
||||||
|
.createInjector(this)
|
||||||
|
.getInstance(NsfwDomainFilter.class);
|
||||||
|
|
||||||
|
filter.fetchLists();
|
||||||
|
|
||||||
|
assertTrue(filter.isBlocked(1, NsfwDomainFilter.NSFW_BLOCK_DANGER));
|
||||||
|
assertTrue(filter.isBlocked(1, NsfwDomainFilter.NSFW_BLOCK_SMUT));
|
||||||
|
assertFalse(filter.isBlocked(2, NsfwDomainFilter.NSFW_BLOCK_DANGER));
|
||||||
|
assertTrue(filter.isBlocked(2, NsfwDomainFilter.NSFW_BLOCK_SMUT));
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@@ -1,9 +1,6 @@
|
|||||||
package nu.marginalia.api.searchquery;
|
package nu.marginalia.api.searchquery;
|
||||||
|
|
||||||
import nu.marginalia.api.searchquery.model.query.ProcessedQuery;
|
import nu.marginalia.api.searchquery.model.query.*;
|
||||||
import nu.marginalia.api.searchquery.model.query.QueryParams;
|
|
||||||
import nu.marginalia.api.searchquery.model.query.QueryResponse;
|
|
||||||
import nu.marginalia.api.searchquery.model.query.SearchSpecification;
|
|
||||||
import nu.marginalia.api.searchquery.model.results.DecoratedSearchResultItem;
|
import nu.marginalia.api.searchquery.model.results.DecoratedSearchResultItem;
|
||||||
import nu.marginalia.api.searchquery.model.results.PrototypeRankingParameters;
|
import nu.marginalia.api.searchquery.model.results.PrototypeRankingParameters;
|
||||||
import nu.marginalia.api.searchquery.model.results.SearchResultItem;
|
import nu.marginalia.api.searchquery.model.results.SearchResultItem;
|
||||||
@@ -32,6 +29,8 @@ public class QueryProtobufCodec {
|
|||||||
builder.setSearchSetIdentifier(query.specs.searchSetIdentifier);
|
builder.setSearchSetIdentifier(query.specs.searchSetIdentifier);
|
||||||
builder.setHumanQuery(request.getHumanQuery());
|
builder.setHumanQuery(request.getHumanQuery());
|
||||||
|
|
||||||
|
builder.setNsfwFilterTierValue(request.getNsfwFilterTierValue());
|
||||||
|
|
||||||
builder.setQuality(IndexProtobufCodec.convertSpecLimit(query.specs.quality));
|
builder.setQuality(IndexProtobufCodec.convertSpecLimit(query.specs.quality));
|
||||||
builder.setYear(IndexProtobufCodec.convertSpecLimit(query.specs.year));
|
builder.setYear(IndexProtobufCodec.convertSpecLimit(query.specs.year));
|
||||||
builder.setSize(IndexProtobufCodec.convertSpecLimit(query.specs.size));
|
builder.setSize(IndexProtobufCodec.convertSpecLimit(query.specs.size));
|
||||||
@@ -78,6 +77,8 @@ public class QueryProtobufCodec {
|
|||||||
builder.setSearchSetIdentifier(query.specs.searchSetIdentifier);
|
builder.setSearchSetIdentifier(query.specs.searchSetIdentifier);
|
||||||
builder.setHumanQuery(humanQuery);
|
builder.setHumanQuery(humanQuery);
|
||||||
|
|
||||||
|
builder.setNsfwFilterTier(RpcIndexQuery.NSFW_FILTER_TIER.DANGER);
|
||||||
|
|
||||||
builder.setQuality(IndexProtobufCodec.convertSpecLimit(query.specs.quality));
|
builder.setQuality(IndexProtobufCodec.convertSpecLimit(query.specs.quality));
|
||||||
builder.setYear(IndexProtobufCodec.convertSpecLimit(query.specs.year));
|
builder.setYear(IndexProtobufCodec.convertSpecLimit(query.specs.year));
|
||||||
builder.setSize(IndexProtobufCodec.convertSpecLimit(query.specs.size));
|
builder.setSize(IndexProtobufCodec.convertSpecLimit(query.specs.size));
|
||||||
@@ -112,6 +113,7 @@ public class QueryProtobufCodec {
|
|||||||
request.getSearchSetIdentifier(),
|
request.getSearchSetIdentifier(),
|
||||||
QueryStrategy.valueOf(request.getQueryStrategy()),
|
QueryStrategy.valueOf(request.getQueryStrategy()),
|
||||||
RpcTemporalBias.Bias.valueOf(request.getTemporalBias().getBias().name()),
|
RpcTemporalBias.Bias.valueOf(request.getTemporalBias().getBias().name()),
|
||||||
|
NsfwFilterTier.fromCodedValue(request.getNsfwFilterTierValue()),
|
||||||
request.getPagination().getPage()
|
request.getPagination().getPage()
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
@@ -327,6 +329,7 @@ public class QueryProtobufCodec {
|
|||||||
.setRank(IndexProtobufCodec.convertSpecLimit(params.rank()))
|
.setRank(IndexProtobufCodec.convertSpecLimit(params.rank()))
|
||||||
.setSearchSetIdentifier(params.identifier())
|
.setSearchSetIdentifier(params.identifier())
|
||||||
.setQueryStrategy(params.queryStrategy().name())
|
.setQueryStrategy(params.queryStrategy().name())
|
||||||
|
.setNsfwFilterTierValue(params.filterTier().getCodedValue())
|
||||||
.setTemporalBias(RpcTemporalBias.newBuilder()
|
.setTemporalBias(RpcTemporalBias.newBuilder()
|
||||||
.setBias(RpcTemporalBias.Bias.valueOf(params.temporalBias().name()))
|
.setBias(RpcTemporalBias.Bias.valueOf(params.temporalBias().name()))
|
||||||
.build())
|
.build())
|
||||||
|
@@ -0,0 +1,26 @@
|
|||||||
|
package nu.marginalia.api.searchquery.model.query;
|
||||||
|
|
||||||
|
public enum NsfwFilterTier {
|
||||||
|
OFF(0),
|
||||||
|
DANGER(1),
|
||||||
|
PORN_AND_GAMBLING(2);
|
||||||
|
|
||||||
|
private final int codedValue; // same as ordinal() for now, but can be changed later if needed
|
||||||
|
|
||||||
|
NsfwFilterTier(int codedValue) {
|
||||||
|
this.codedValue = codedValue;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static NsfwFilterTier fromCodedValue(int codedValue) {
|
||||||
|
for (NsfwFilterTier tier : NsfwFilterTier.values()) {
|
||||||
|
if (tier.codedValue == codedValue) {
|
||||||
|
return tier;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
throw new IllegalArgumentException("Invalid coded value for NsfwFilterTirer: " + codedValue);
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getCodedValue() {
|
||||||
|
return codedValue;
|
||||||
|
}
|
||||||
|
}
|
@@ -25,10 +25,11 @@ public record QueryParams(
|
|||||||
String identifier,
|
String identifier,
|
||||||
QueryStrategy queryStrategy,
|
QueryStrategy queryStrategy,
|
||||||
RpcTemporalBias.Bias temporalBias,
|
RpcTemporalBias.Bias temporalBias,
|
||||||
|
NsfwFilterTier filterTier,
|
||||||
int page
|
int page
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
public QueryParams(String query, RpcQueryLimits limits, String identifier) {
|
public QueryParams(String query, RpcQueryLimits limits, String identifier, NsfwFilterTier filterTier) {
|
||||||
this(query, null,
|
this(query, null,
|
||||||
List.of(),
|
List.of(),
|
||||||
List.of(),
|
List.of(),
|
||||||
@@ -43,6 +44,7 @@ public record QueryParams(
|
|||||||
identifier,
|
identifier,
|
||||||
QueryStrategy.AUTO,
|
QueryStrategy.AUTO,
|
||||||
RpcTemporalBias.Bias.NONE,
|
RpcTemporalBias.Bias.NONE,
|
||||||
|
filterTier,
|
||||||
1 // page
|
1 // page
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
@@ -32,6 +32,14 @@ message RpcQsQuery {
|
|||||||
RpcTemporalBias temporalBias = 16;
|
RpcTemporalBias temporalBias = 16;
|
||||||
|
|
||||||
RpcQsQueryPagination pagination = 17;
|
RpcQsQueryPagination pagination = 17;
|
||||||
|
|
||||||
|
NSFW_FILTER_TIER nsfwFilterTier = 18;
|
||||||
|
|
||||||
|
enum NSFW_FILTER_TIER {
|
||||||
|
NONE = 0;
|
||||||
|
DANGER = 1;
|
||||||
|
PORN_AND_GAMBLING = 2;
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Query service query response */
|
/* Query service query response */
|
||||||
@@ -78,8 +86,17 @@ message RpcIndexQuery {
|
|||||||
RpcQueryLimits queryLimits = 10;
|
RpcQueryLimits queryLimits = 10;
|
||||||
string queryStrategy = 11; // Named query configuration
|
string queryStrategy = 11; // Named query configuration
|
||||||
RpcResultRankingParameters parameters = 12;
|
RpcResultRankingParameters parameters = 12;
|
||||||
|
|
||||||
|
NSFW_FILTER_TIER nsfwFilterTier = 13;
|
||||||
|
|
||||||
|
enum NSFW_FILTER_TIER {
|
||||||
|
NONE = 0;
|
||||||
|
DANGER = 1;
|
||||||
|
PORN_AND_GAMBLING = 2;
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/* A tagged union encoding some limit on a field */
|
/* A tagged union encoding some limit on a field */
|
||||||
message RpcSpecLimit {
|
message RpcSpecLimit {
|
||||||
int32 value = 1;
|
int32 value = 1;
|
||||||
|
@@ -19,6 +19,7 @@ dependencies {
|
|||||||
implementation project(':code:common:model')
|
implementation project(':code:common:model')
|
||||||
implementation project(':code:common:service')
|
implementation project(':code:common:service')
|
||||||
|
|
||||||
|
implementation project(':code:functions:nsfw-domain-filter')
|
||||||
implementation project(':code:functions:search-query:api')
|
implementation project(':code:functions:search-query:api')
|
||||||
|
|
||||||
implementation project(':code:index:query')
|
implementation project(':code:index:query')
|
||||||
|
@@ -11,6 +11,7 @@ import nu.marginalia.api.searchquery.model.query.QueryParams;
|
|||||||
import nu.marginalia.api.searchquery.model.results.DecoratedSearchResultItem;
|
import nu.marginalia.api.searchquery.model.results.DecoratedSearchResultItem;
|
||||||
import nu.marginalia.api.searchquery.model.results.PrototypeRankingParameters;
|
import nu.marginalia.api.searchquery.model.results.PrototypeRankingParameters;
|
||||||
import nu.marginalia.index.api.IndexClient;
|
import nu.marginalia.index.api.IndexClient;
|
||||||
|
import nu.marginalia.nsfw.NsfwDomainFilter;
|
||||||
import nu.marginalia.service.server.DiscoverableService;
|
import nu.marginalia.service.server.DiscoverableService;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
@@ -34,13 +35,16 @@ public class QueryGRPCService
|
|||||||
|
|
||||||
|
|
||||||
private final QueryFactory queryFactory;
|
private final QueryFactory queryFactory;
|
||||||
|
private final NsfwDomainFilter nsfwDomainFilter;
|
||||||
private final IndexClient indexClient;
|
private final IndexClient indexClient;
|
||||||
|
|
||||||
@Inject
|
@Inject
|
||||||
public QueryGRPCService(QueryFactory queryFactory,
|
public QueryGRPCService(QueryFactory queryFactory,
|
||||||
|
NsfwDomainFilter nsfwDomainFilter,
|
||||||
IndexClient indexClient)
|
IndexClient indexClient)
|
||||||
{
|
{
|
||||||
this.queryFactory = queryFactory;
|
this.queryFactory = queryFactory;
|
||||||
|
this.nsfwDomainFilter = nsfwDomainFilter;
|
||||||
this.indexClient = indexClient;
|
this.indexClient = indexClient;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -3,6 +3,7 @@ package nu.marginalia.query.svc;
|
|||||||
import nu.marginalia.WmsaHome;
|
import nu.marginalia.WmsaHome;
|
||||||
import nu.marginalia.api.searchquery.RpcQueryLimits;
|
import nu.marginalia.api.searchquery.RpcQueryLimits;
|
||||||
import nu.marginalia.api.searchquery.RpcTemporalBias;
|
import nu.marginalia.api.searchquery.RpcTemporalBias;
|
||||||
|
import nu.marginalia.api.searchquery.model.query.NsfwFilterTier;
|
||||||
import nu.marginalia.api.searchquery.model.query.QueryParams;
|
import nu.marginalia.api.searchquery.model.query.QueryParams;
|
||||||
import nu.marginalia.api.searchquery.model.query.SearchSpecification;
|
import nu.marginalia.api.searchquery.model.query.SearchSpecification;
|
||||||
import nu.marginalia.functions.searchquery.QueryFactory;
|
import nu.marginalia.functions.searchquery.QueryFactory;
|
||||||
@@ -58,6 +59,7 @@ public class QueryFactoryTest {
|
|||||||
"NONE",
|
"NONE",
|
||||||
QueryStrategy.AUTO,
|
QueryStrategy.AUTO,
|
||||||
RpcTemporalBias.Bias.NONE,
|
RpcTemporalBias.Bias.NONE,
|
||||||
|
NsfwFilterTier.OFF,
|
||||||
0), null).specs;
|
0), null).specs;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -17,6 +17,7 @@ dependencies {
|
|||||||
implementation project(':code:common:service')
|
implementation project(':code:common:service')
|
||||||
implementation project(':code:common:db')
|
implementation project(':code:common:db')
|
||||||
implementation project(':code:libraries:message-queue')
|
implementation project(':code:libraries:message-queue')
|
||||||
|
implementation project(':code:functions:nsfw-domain-filter')
|
||||||
implementation project(':code:functions:search-query:api')
|
implementation project(':code:functions:search-query:api')
|
||||||
|
|
||||||
implementation libs.bundles.slf4j
|
implementation libs.bundles.slf4j
|
||||||
|
@@ -2,11 +2,13 @@ package nu.marginalia.index.api;
|
|||||||
|
|
||||||
import com.google.inject.Inject;
|
import com.google.inject.Inject;
|
||||||
import com.google.inject.Singleton;
|
import com.google.inject.Singleton;
|
||||||
|
import io.prometheus.client.Counter;
|
||||||
import nu.marginalia.api.searchquery.IndexApiGrpc;
|
import nu.marginalia.api.searchquery.IndexApiGrpc;
|
||||||
import nu.marginalia.api.searchquery.RpcDecoratedResultItem;
|
import nu.marginalia.api.searchquery.RpcDecoratedResultItem;
|
||||||
import nu.marginalia.api.searchquery.RpcIndexQuery;
|
import nu.marginalia.api.searchquery.RpcIndexQuery;
|
||||||
import nu.marginalia.db.DomainBlacklistImpl;
|
import nu.marginalia.db.DomainBlacklistImpl;
|
||||||
import nu.marginalia.model.id.UrlIdCodec;
|
import nu.marginalia.model.id.UrlIdCodec;
|
||||||
|
import nu.marginalia.nsfw.NsfwDomainFilter;
|
||||||
import nu.marginalia.service.client.GrpcChannelPoolFactory;
|
import nu.marginalia.service.client.GrpcChannelPoolFactory;
|
||||||
import nu.marginalia.service.client.GrpcMultiNodeChannelPool;
|
import nu.marginalia.service.client.GrpcMultiNodeChannelPool;
|
||||||
import nu.marginalia.service.discovery.property.ServiceKey;
|
import nu.marginalia.service.discovery.property.ServiceKey;
|
||||||
@@ -28,14 +30,26 @@ public class IndexClient {
|
|||||||
private static final Logger logger = LoggerFactory.getLogger(IndexClient.class);
|
private static final Logger logger = LoggerFactory.getLogger(IndexClient.class);
|
||||||
private final GrpcMultiNodeChannelPool<IndexApiGrpc.IndexApiBlockingStub> channelPool;
|
private final GrpcMultiNodeChannelPool<IndexApiGrpc.IndexApiBlockingStub> channelPool;
|
||||||
private final DomainBlacklistImpl blacklist;
|
private final DomainBlacklistImpl blacklist;
|
||||||
|
private final NsfwDomainFilter nsfwDomainFilter;
|
||||||
|
|
||||||
|
Counter wmsa_index_query_count = Counter.build()
|
||||||
|
.name("wmsa_nsfw_filter_result_count")
|
||||||
|
.labelNames("tier")
|
||||||
|
.help("Count of results filtered by NSFW tier")
|
||||||
|
.register();
|
||||||
|
|
||||||
private static final ExecutorService executor = Executors.newCachedThreadPool();
|
private static final ExecutorService executor = Executors.newCachedThreadPool();
|
||||||
|
|
||||||
@Inject
|
@Inject
|
||||||
public IndexClient(GrpcChannelPoolFactory channelPoolFactory, DomainBlacklistImpl blacklist) {
|
public IndexClient(GrpcChannelPoolFactory channelPoolFactory,
|
||||||
|
DomainBlacklistImpl blacklist,
|
||||||
|
NsfwDomainFilter nsfwDomainFilter
|
||||||
|
) {
|
||||||
this.channelPool = channelPoolFactory.createMulti(
|
this.channelPool = channelPoolFactory.createMulti(
|
||||||
ServiceKey.forGrpcApi(IndexApiGrpc.class, ServicePartition.multi()),
|
ServiceKey.forGrpcApi(IndexApiGrpc.class, ServicePartition.multi()),
|
||||||
IndexApiGrpc::newBlockingStub);
|
IndexApiGrpc::newBlockingStub);
|
||||||
this.blacklist = blacklist;
|
this.blacklist = blacklist;
|
||||||
|
this.nsfwDomainFilter = nsfwDomainFilter;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static final Comparator<RpcDecoratedResultItem> comparator =
|
private static final Comparator<RpcDecoratedResultItem> comparator =
|
||||||
@@ -52,7 +66,7 @@ public class IndexClient {
|
|||||||
public AggregateQueryResponse executeQueries(RpcIndexQuery indexRequest, Pagination pagination) {
|
public AggregateQueryResponse executeQueries(RpcIndexQuery indexRequest, Pagination pagination) {
|
||||||
|
|
||||||
final int requestedMaxResults = indexRequest.getQueryLimits().getResultsTotal();
|
final int requestedMaxResults = indexRequest.getQueryLimits().getResultsTotal();
|
||||||
|
int filterTier = indexRequest.getNsfwFilterTierValue();
|
||||||
AtomicInteger totalNumResults = new AtomicInteger(0);
|
AtomicInteger totalNumResults = new AtomicInteger(0);
|
||||||
|
|
||||||
List<RpcDecoratedResultItem> results =
|
List<RpcDecoratedResultItem> results =
|
||||||
@@ -74,7 +88,7 @@ public class IndexClient {
|
|||||||
}
|
}
|
||||||
})
|
})
|
||||||
.flatMap(List::stream)
|
.flatMap(List::stream)
|
||||||
.filter(item -> !isBlacklisted(item))
|
.filter(item -> !isBlacklisted(item, filterTier))
|
||||||
.sorted(comparator)
|
.sorted(comparator)
|
||||||
.skip(Math.max(0, (pagination.page - 1) * pagination.pageSize))
|
.skip(Math.max(0, (pagination.page - 1) * pagination.pageSize))
|
||||||
.limit(pagination.pageSize)
|
.limit(pagination.pageSize)
|
||||||
@@ -83,8 +97,23 @@ public class IndexClient {
|
|||||||
return new AggregateQueryResponse(results, pagination.page(), totalNumResults.get());
|
return new AggregateQueryResponse(results, pagination.page(), totalNumResults.get());
|
||||||
}
|
}
|
||||||
|
|
||||||
private boolean isBlacklisted(RpcDecoratedResultItem item) {
|
static String[] tierNames = {
|
||||||
return blacklist.isBlacklisted(UrlIdCodec.getDomainId(item.getRawItem().getCombinedId()));
|
"OFF",
|
||||||
|
"DANGER",
|
||||||
|
"NSFW"
|
||||||
|
};
|
||||||
|
|
||||||
|
private boolean isBlacklisted(RpcDecoratedResultItem item, int filterTier) {
|
||||||
|
int domainId = UrlIdCodec.getDomainId(item.getRawItem().getCombinedId());
|
||||||
|
|
||||||
|
if (blacklist.isBlacklisted(domainId)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if (nsfwDomainFilter.isBlocked(domainId, filterTier)) {
|
||||||
|
wmsa_index_query_count.labels(tierNames[filterTier]).inc();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
32
code/libraries/domain-lock/build.gradle
Normal file
32
code/libraries/domain-lock/build.gradle
Normal file
@@ -0,0 +1,32 @@
|
|||||||
|
plugins {
|
||||||
|
id 'java'
|
||||||
|
}
|
||||||
|
|
||||||
|
java {
|
||||||
|
toolchain {
|
||||||
|
languageVersion.set(JavaLanguageVersion.of(rootProject.ext.jvmVersion))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
apply from: "$rootProject.projectDir/srcsets.gradle"
|
||||||
|
|
||||||
|
dependencies {
|
||||||
|
implementation libs.bundles.slf4j
|
||||||
|
implementation project(':code:common:model')
|
||||||
|
implementation project(':code:common:config')
|
||||||
|
implementation project(':code:common:service')
|
||||||
|
|
||||||
|
implementation libs.bundles.curator
|
||||||
|
|
||||||
|
implementation libs.guava
|
||||||
|
implementation dependencies.create(libs.guice.get()) {
|
||||||
|
exclude group: 'com.google.guava'
|
||||||
|
}
|
||||||
|
testImplementation libs.bundles.slf4j.test
|
||||||
|
testImplementation libs.bundles.junit
|
||||||
|
testImplementation libs.mockito
|
||||||
|
}
|
||||||
|
|
||||||
|
test {
|
||||||
|
useJUnitPlatform()
|
||||||
|
}
|
@@ -0,0 +1,32 @@
|
|||||||
|
package nu.marginalia.coordination;
|
||||||
|
|
||||||
|
import nu.marginalia.model.EdgeDomain;
|
||||||
|
|
||||||
|
public class DefaultDomainPermits {
|
||||||
|
|
||||||
|
public static int defaultPermits(EdgeDomain domain) {
|
||||||
|
return defaultPermits(domain.topDomain.toLowerCase());
|
||||||
|
}
|
||||||
|
|
||||||
|
public static int defaultPermits(String topDomain) {
|
||||||
|
|
||||||
|
if (topDomain.equals("wordpress.com"))
|
||||||
|
return 16;
|
||||||
|
if (topDomain.equals("blogspot.com"))
|
||||||
|
return 8;
|
||||||
|
if (topDomain.equals("tumblr.com"))
|
||||||
|
return 8;
|
||||||
|
if (topDomain.equals("neocities.org"))
|
||||||
|
return 8;
|
||||||
|
if (topDomain.equals("github.io"))
|
||||||
|
return 8;
|
||||||
|
// Substack really dislikes broad-scale crawlers, so we need to be careful
|
||||||
|
// to not get blocked.
|
||||||
|
if (topDomain.equals("substack.com")) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@@ -0,0 +1,17 @@
|
|||||||
|
package nu.marginalia.coordination;
|
||||||
|
|
||||||
|
import com.google.inject.AbstractModule;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
public class DomainCoordinationModule extends AbstractModule {
|
||||||
|
|
||||||
|
private static final Logger logger = LoggerFactory.getLogger(DomainCoordinationModule.class);
|
||||||
|
|
||||||
|
public DomainCoordinationModule() {
|
||||||
|
}
|
||||||
|
|
||||||
|
public void configure() {
|
||||||
|
bind(DomainCoordinator.class).to(ZookeeperDomainCoordinator.class);
|
||||||
|
}
|
||||||
|
}
|
@@ -0,0 +1,13 @@
|
|||||||
|
package nu.marginalia.coordination;
|
||||||
|
|
||||||
|
import nu.marginalia.model.EdgeDomain;
|
||||||
|
|
||||||
|
import java.time.Duration;
|
||||||
|
import java.util.Optional;
|
||||||
|
|
||||||
|
public interface DomainCoordinator {
|
||||||
|
DomainLock lockDomain(EdgeDomain domain) throws InterruptedException;
|
||||||
|
Optional<DomainLock> tryLockDomain(EdgeDomain domain, Duration timeout) throws InterruptedException;
|
||||||
|
Optional<DomainLock> tryLockDomain(EdgeDomain domain) throws InterruptedException;
|
||||||
|
boolean isLockableHint(EdgeDomain domain);
|
||||||
|
}
|
@@ -0,0 +1,5 @@
|
|||||||
|
package nu.marginalia.coordination;
|
||||||
|
|
||||||
|
public interface DomainLock extends AutoCloseable {
|
||||||
|
void close();
|
||||||
|
}
|
@@ -1,16 +1,17 @@
|
|||||||
package nu.marginalia.crawl.logic;
|
package nu.marginalia.coordination;
|
||||||
|
|
||||||
|
import com.google.inject.Singleton;
|
||||||
import nu.marginalia.model.EdgeDomain;
|
import nu.marginalia.model.EdgeDomain;
|
||||||
|
|
||||||
|
import java.time.Duration;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
import java.util.concurrent.ConcurrentHashMap;
|
import java.util.concurrent.ConcurrentHashMap;
|
||||||
import java.util.concurrent.Semaphore;
|
import java.util.concurrent.Semaphore;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
|
||||||
/** Holds lock objects for each domain, to prevent multiple threads from
|
@Singleton
|
||||||
* crawling the same domain at the same time.
|
public class LocalDomainCoordinator implements DomainCoordinator {
|
||||||
*/
|
|
||||||
public class DomainLocks {
|
|
||||||
// The locks are stored in a map, with the domain name as the key. This map will grow
|
// The locks are stored in a map, with the domain name as the key. This map will grow
|
||||||
// relatively big, but should be manageable since the number of domains is limited to
|
// relatively big, but should be manageable since the number of domains is limited to
|
||||||
// a few hundred thousand typically.
|
// a few hundred thousand typically.
|
||||||
@@ -24,13 +25,25 @@ public class DomainLocks {
|
|||||||
|
|
||||||
sem.acquire();
|
sem.acquire();
|
||||||
|
|
||||||
return new DomainLock(sem);
|
return new LocalDomainLock(sem);
|
||||||
}
|
}
|
||||||
|
|
||||||
public Optional<DomainLock> tryLockDomain(EdgeDomain domain) {
|
public Optional<DomainLock> tryLockDomain(EdgeDomain domain) {
|
||||||
var sem = locks.computeIfAbsent(domain.topDomain.toLowerCase(), this::defaultPermits);
|
var sem = locks.computeIfAbsent(domain.topDomain.toLowerCase(), this::defaultPermits);
|
||||||
if (sem.tryAcquire(1)) {
|
if (sem.tryAcquire(1)) {
|
||||||
return Optional.of(new DomainLock(sem));
|
return Optional.of(new LocalDomainLock(sem));
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
// We don't have a lock, so we return an empty optional
|
||||||
|
return Optional.empty();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public Optional<DomainLock> tryLockDomain(EdgeDomain domain, Duration timeout) throws InterruptedException {
|
||||||
|
var sem = locks.computeIfAbsent(domain.topDomain.toLowerCase(), this::defaultPermits);
|
||||||
|
if (sem.tryAcquire(1, timeout.toMillis(), TimeUnit.MILLISECONDS)) {
|
||||||
|
return Optional.of(new LocalDomainLock(sem));
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
// We don't have a lock, so we return an empty optional
|
// We don't have a lock, so we return an empty optional
|
||||||
@@ -39,24 +52,7 @@ public class DomainLocks {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private Semaphore defaultPermits(String topDomain) {
|
private Semaphore defaultPermits(String topDomain) {
|
||||||
if (topDomain.equals("wordpress.com"))
|
return new Semaphore(DefaultDomainPermits.defaultPermits(topDomain));
|
||||||
return new Semaphore(16);
|
|
||||||
if (topDomain.equals("blogspot.com"))
|
|
||||||
return new Semaphore(8);
|
|
||||||
if (topDomain.equals("tumblr.com"))
|
|
||||||
return new Semaphore(8);
|
|
||||||
if (topDomain.equals("neocities.org"))
|
|
||||||
return new Semaphore(8);
|
|
||||||
if (topDomain.equals("github.io"))
|
|
||||||
return new Semaphore(8);
|
|
||||||
|
|
||||||
// Substack really dislikes broad-scale crawlers, so we need to be careful
|
|
||||||
// to not get blocked.
|
|
||||||
if (topDomain.equals("substack.com")) {
|
|
||||||
return new Semaphore(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
return new Semaphore(2);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Returns true if the domain is lockable, i.e. if it is not already locked by another thread.
|
/** Returns true if the domain is lockable, i.e. if it is not already locked by another thread.
|
||||||
@@ -71,15 +67,15 @@ public class DomainLocks {
|
|||||||
return sem.availablePermits() > 0;
|
return sem.availablePermits() > 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static class DomainLock implements AutoCloseable {
|
public static class LocalDomainLock implements DomainLock {
|
||||||
private final Semaphore semaphore;
|
private final Semaphore semaphore;
|
||||||
|
|
||||||
DomainLock(Semaphore semaphore) {
|
LocalDomainLock(Semaphore semaphore) {
|
||||||
this.semaphore = semaphore;
|
this.semaphore = semaphore;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void close() throws Exception {
|
public void close() {
|
||||||
semaphore.release();
|
semaphore.release();
|
||||||
Thread.currentThread().setName("[idle]");
|
Thread.currentThread().setName("[idle]");
|
||||||
}
|
}
|
@@ -0,0 +1,116 @@
|
|||||||
|
package nu.marginalia.coordination;
|
||||||
|
|
||||||
|
import com.google.inject.Inject;
|
||||||
|
import com.google.inject.Singleton;
|
||||||
|
import com.google.inject.name.Named;
|
||||||
|
import nu.marginalia.model.EdgeDomain;
|
||||||
|
import nu.marginalia.service.discovery.ServiceRegistryIf;
|
||||||
|
import org.apache.curator.framework.recipes.locks.InterProcessSemaphoreV2;
|
||||||
|
import org.apache.curator.framework.recipes.locks.Lease;
|
||||||
|
|
||||||
|
import java.time.Duration;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Optional;
|
||||||
|
import java.util.concurrent.ConcurrentHashMap;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
|
||||||
|
@Singleton
|
||||||
|
public class ZookeeperDomainCoordinator implements DomainCoordinator {
|
||||||
|
// The locks are stored in a map, with the domain name as the key. This map will grow
|
||||||
|
// relatively big, but should be manageable since the number of domains is limited to
|
||||||
|
// a few hundred thousand typically.
|
||||||
|
private final Map<String, InterProcessSemaphoreV2> locks = new ConcurrentHashMap<>();
|
||||||
|
private final Map<String, Integer> waitCounts = new ConcurrentHashMap<>();
|
||||||
|
|
||||||
|
private final ServiceRegistryIf serviceRegistry;
|
||||||
|
private final int nodeId;
|
||||||
|
|
||||||
|
@Inject
|
||||||
|
public ZookeeperDomainCoordinator(ServiceRegistryIf serviceRegistry, @Named("wmsa-system-node") int nodeId) {
|
||||||
|
// Zookeeper-specific initialization can be done here if needed
|
||||||
|
this.serviceRegistry = serviceRegistry;
|
||||||
|
this.nodeId = nodeId;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Returns a lock object corresponding to the given domain. The object is returned as-is,
|
||||||
|
* and may be held by another thread. The caller is responsible for locking and releasing the lock.
|
||||||
|
*/
|
||||||
|
public DomainLock lockDomain(EdgeDomain domain) throws InterruptedException {
|
||||||
|
final String key = domain.topDomain.toLowerCase();
|
||||||
|
var sem = locks.computeIfAbsent(key, this::createSemapore);
|
||||||
|
|
||||||
|
// Increment or add a wait count for the domain
|
||||||
|
waitCounts.compute(key, (k,value) -> (value == null ? 1 : value + 1));
|
||||||
|
try {
|
||||||
|
return new ZkDomainLock(sem, sem.acquire());
|
||||||
|
}
|
||||||
|
catch (Exception e) {
|
||||||
|
throw new RuntimeException("Failed to acquire lock for domain: " + domain.topDomain, e);
|
||||||
|
}
|
||||||
|
finally {
|
||||||
|
// Decrement or remove the wait count for the domain
|
||||||
|
waitCounts.compute(key, (k,value) -> (value == null || value <= 1) ? null : value - 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public Optional<DomainLock> tryLockDomain(EdgeDomain domain) throws InterruptedException {
|
||||||
|
return tryLockDomain(domain, Duration.ofSeconds(1)); // Underlying semaphore doesn't have a tryLock method, so we use a short timeout
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public Optional<DomainLock> tryLockDomain(EdgeDomain domain, Duration timeout) throws InterruptedException {
|
||||||
|
final String key = domain.topDomain.toLowerCase();
|
||||||
|
var sem = locks.computeIfAbsent(key, this::createSemapore);
|
||||||
|
|
||||||
|
// Increment or add a wait count for the domain
|
||||||
|
waitCounts.compute(key, (k,value) -> (value == null ? 1 : value + 1));
|
||||||
|
try {
|
||||||
|
var lease = sem.acquire(timeout.toMillis(), TimeUnit.MILLISECONDS); // Acquire with timeout
|
||||||
|
if (lease != null) {
|
||||||
|
return Optional.of(new ZkDomainLock(sem, lease));
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
return Optional.empty(); // If we fail to acquire the lease, we return an empty optional
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catch (Exception e) {
|
||||||
|
return Optional.empty(); // If we fail to acquire the lock, we return an empty optional
|
||||||
|
}
|
||||||
|
finally {
|
||||||
|
waitCounts.compute(key, (k,value) -> (value == null || value <= 1) ? null : value - 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private InterProcessSemaphoreV2 createSemapore(String topDomain){
|
||||||
|
try {
|
||||||
|
return serviceRegistry.getSemaphore(topDomain + ":" + nodeId, DefaultDomainPermits.defaultPermits(topDomain));
|
||||||
|
}
|
||||||
|
catch (Exception e) {
|
||||||
|
throw new RuntimeException("Failed to get semaphore for domain: " + topDomain, e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Returns true if the domain is lockable, i.e. if it is not already locked by another thread.
|
||||||
|
* (this is just a hint, and does not guarantee that the domain is actually lockable any time
|
||||||
|
* after this method returns true)
|
||||||
|
*/
|
||||||
|
public boolean isLockableHint(EdgeDomain domain) {
|
||||||
|
return !waitCounts.containsKey(domain.topDomain.toLowerCase());
|
||||||
|
}
|
||||||
|
|
||||||
|
public static class ZkDomainLock implements DomainLock {
|
||||||
|
private final InterProcessSemaphoreV2 semaphore;
|
||||||
|
private final Lease lease;
|
||||||
|
|
||||||
|
ZkDomainLock(InterProcessSemaphoreV2 semaphore, Lease lease) {
|
||||||
|
this.semaphore = semaphore;
|
||||||
|
this.lease = lease;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void close() {
|
||||||
|
semaphore.returnLease(lease);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@@ -15,6 +15,10 @@ dependencies {
|
|||||||
|
|
||||||
implementation libs.bundles.slf4j
|
implementation libs.bundles.slf4j
|
||||||
implementation libs.opencsv
|
implementation libs.opencsv
|
||||||
|
implementation libs.guava
|
||||||
|
implementation dependencies.create(libs.guice.get()) {
|
||||||
|
exclude group: 'com.google.guava'
|
||||||
|
}
|
||||||
|
|
||||||
testImplementation libs.bundles.slf4j.test
|
testImplementation libs.bundles.slf4j.test
|
||||||
testImplementation libs.bundles.junit
|
testImplementation libs.bundles.junit
|
||||||
|
@@ -1,5 +1,6 @@
|
|||||||
package nu.marginalia.geoip;
|
package nu.marginalia.geoip;
|
||||||
|
|
||||||
|
import com.google.inject.Singleton;
|
||||||
import nu.marginalia.WmsaHome;
|
import nu.marginalia.WmsaHome;
|
||||||
import nu.marginalia.geoip.sources.AsnMapping;
|
import nu.marginalia.geoip.sources.AsnMapping;
|
||||||
import nu.marginalia.geoip.sources.AsnTable;
|
import nu.marginalia.geoip.sources.AsnTable;
|
||||||
@@ -10,6 +11,7 @@ import org.slf4j.LoggerFactory;
|
|||||||
import java.net.InetAddress;
|
import java.net.InetAddress;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
|
|
||||||
|
@Singleton
|
||||||
public class GeoIpDictionary {
|
public class GeoIpDictionary {
|
||||||
private volatile IP2LocationMapping ip2locMapping = null;
|
private volatile IP2LocationMapping ip2locMapping = null;
|
||||||
private volatile AsnTable asnTable = null;
|
private volatile AsnTable asnTable = null;
|
||||||
@@ -76,7 +78,7 @@ public class GeoIpDictionary {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public Optional<AsnTable.AsnInfo> getAsnInfo(int ipAddress) {
|
public Optional<AsnTable.AsnInfo> getAsnInfo(int ipAddress) {
|
||||||
if (null == asnTable) { // not loaded yet or failed to load
|
if (null == asnMapping || null == asnTable) { // not loaded yet or failed to load
|
||||||
return Optional.empty();
|
return Optional.empty();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -32,6 +32,7 @@ dependencies {
|
|||||||
implementation project(':code:libraries:message-queue')
|
implementation project(':code:libraries:message-queue')
|
||||||
implementation project(':code:libraries:language-processing')
|
implementation project(':code:libraries:language-processing')
|
||||||
implementation project(':code:libraries:easy-lsh')
|
implementation project(':code:libraries:easy-lsh')
|
||||||
|
implementation project(':code:libraries:domain-lock')
|
||||||
implementation project(':code:processes:crawling-process:model')
|
implementation project(':code:processes:crawling-process:model')
|
||||||
implementation project(':code:processes:crawling-process:model')
|
implementation project(':code:processes:crawling-process:model')
|
||||||
|
|
||||||
@@ -58,6 +59,7 @@ dependencies {
|
|||||||
implementation libs.jsoup
|
implementation libs.jsoup
|
||||||
implementation libs.opencsv
|
implementation libs.opencsv
|
||||||
implementation libs.fastutil
|
implementation libs.fastutil
|
||||||
|
implementation libs.bundles.curator
|
||||||
|
|
||||||
implementation libs.bundles.mariadb
|
implementation libs.bundles.mariadb
|
||||||
implementation libs.bundles.httpcomponents
|
implementation libs.bundles.httpcomponents
|
||||||
|
@@ -10,9 +10,11 @@ import nu.marginalia.WmsaHome;
|
|||||||
import nu.marginalia.atags.model.DomainLinks;
|
import nu.marginalia.atags.model.DomainLinks;
|
||||||
import nu.marginalia.atags.source.AnchorTagsSource;
|
import nu.marginalia.atags.source.AnchorTagsSource;
|
||||||
import nu.marginalia.atags.source.AnchorTagsSourceFactory;
|
import nu.marginalia.atags.source.AnchorTagsSourceFactory;
|
||||||
|
import nu.marginalia.coordination.DomainCoordinationModule;
|
||||||
|
import nu.marginalia.coordination.DomainCoordinator;
|
||||||
|
import nu.marginalia.coordination.DomainLock;
|
||||||
import nu.marginalia.crawl.fetcher.HttpFetcherImpl;
|
import nu.marginalia.crawl.fetcher.HttpFetcherImpl;
|
||||||
import nu.marginalia.crawl.fetcher.warc.WarcRecorder;
|
import nu.marginalia.crawl.fetcher.warc.WarcRecorder;
|
||||||
import nu.marginalia.crawl.logic.DomainLocks;
|
|
||||||
import nu.marginalia.crawl.retreival.CrawlDataReference;
|
import nu.marginalia.crawl.retreival.CrawlDataReference;
|
||||||
import nu.marginalia.crawl.retreival.CrawlerRetreiver;
|
import nu.marginalia.crawl.retreival.CrawlerRetreiver;
|
||||||
import nu.marginalia.crawl.retreival.DomainProber;
|
import nu.marginalia.crawl.retreival.DomainProber;
|
||||||
@@ -25,9 +27,12 @@ import nu.marginalia.mq.MessageQueueFactory;
|
|||||||
import nu.marginalia.process.ProcessConfiguration;
|
import nu.marginalia.process.ProcessConfiguration;
|
||||||
import nu.marginalia.process.ProcessConfigurationModule;
|
import nu.marginalia.process.ProcessConfigurationModule;
|
||||||
import nu.marginalia.process.ProcessMainClass;
|
import nu.marginalia.process.ProcessMainClass;
|
||||||
|
import nu.marginalia.process.control.ProcessEventLog;
|
||||||
import nu.marginalia.process.control.ProcessHeartbeatImpl;
|
import nu.marginalia.process.control.ProcessHeartbeatImpl;
|
||||||
import nu.marginalia.process.log.WorkLog;
|
import nu.marginalia.process.log.WorkLog;
|
||||||
|
import nu.marginalia.service.discovery.ServiceRegistryIf;
|
||||||
import nu.marginalia.service.module.DatabaseModule;
|
import nu.marginalia.service.module.DatabaseModule;
|
||||||
|
import nu.marginalia.service.module.ServiceDiscoveryModule;
|
||||||
import nu.marginalia.slop.SlopCrawlDataRecord;
|
import nu.marginalia.slop.SlopCrawlDataRecord;
|
||||||
import nu.marginalia.storage.FileStorageService;
|
import nu.marginalia.storage.FileStorageService;
|
||||||
import nu.marginalia.storage.model.FileStorageId;
|
import nu.marginalia.storage.model.FileStorageId;
|
||||||
@@ -54,6 +59,7 @@ public class CrawlerMain extends ProcessMainClass {
|
|||||||
|
|
||||||
private final UserAgent userAgent;
|
private final UserAgent userAgent;
|
||||||
private final ProcessHeartbeatImpl heartbeat;
|
private final ProcessHeartbeatImpl heartbeat;
|
||||||
|
private final ProcessEventLog eventLog;
|
||||||
private final DomainProber domainProber;
|
private final DomainProber domainProber;
|
||||||
private final FileStorageService fileStorageService;
|
private final FileStorageService fileStorageService;
|
||||||
private final AnchorTagsSourceFactory anchorTagsSourceFactory;
|
private final AnchorTagsSourceFactory anchorTagsSourceFactory;
|
||||||
@@ -61,9 +67,10 @@ public class CrawlerMain extends ProcessMainClass {
|
|||||||
private final HikariDataSource dataSource;
|
private final HikariDataSource dataSource;
|
||||||
private final DomainBlacklist blacklist;
|
private final DomainBlacklist blacklist;
|
||||||
private final int node;
|
private final int node;
|
||||||
|
private final ServiceRegistryIf serviceRegistry;
|
||||||
private final SimpleBlockingThreadPool pool;
|
private final SimpleBlockingThreadPool pool;
|
||||||
|
|
||||||
private final DomainLocks domainLocks = new DomainLocks();
|
private final DomainCoordinator domainCoordinator;
|
||||||
|
|
||||||
private final Map<String, CrawlTask> pendingCrawlTasks = new ConcurrentHashMap<>();
|
private final Map<String, CrawlTask> pendingCrawlTasks = new ConcurrentHashMap<>();
|
||||||
|
|
||||||
@@ -84,6 +91,7 @@ public class CrawlerMain extends ProcessMainClass {
|
|||||||
public CrawlerMain(UserAgent userAgent,
|
public CrawlerMain(UserAgent userAgent,
|
||||||
HttpFetcherImpl httpFetcher,
|
HttpFetcherImpl httpFetcher,
|
||||||
ProcessHeartbeatImpl heartbeat,
|
ProcessHeartbeatImpl heartbeat,
|
||||||
|
ProcessEventLog eventLog,
|
||||||
MessageQueueFactory messageQueueFactory, DomainProber domainProber,
|
MessageQueueFactory messageQueueFactory, DomainProber domainProber,
|
||||||
FileStorageService fileStorageService,
|
FileStorageService fileStorageService,
|
||||||
ProcessConfiguration processConfiguration,
|
ProcessConfiguration processConfiguration,
|
||||||
@@ -91,6 +99,8 @@ public class CrawlerMain extends ProcessMainClass {
|
|||||||
WarcArchiverFactory warcArchiverFactory,
|
WarcArchiverFactory warcArchiverFactory,
|
||||||
HikariDataSource dataSource,
|
HikariDataSource dataSource,
|
||||||
DomainBlacklist blacklist,
|
DomainBlacklist blacklist,
|
||||||
|
DomainCoordinator domainCoordinator,
|
||||||
|
ServiceRegistryIf serviceRegistry,
|
||||||
Gson gson) throws InterruptedException {
|
Gson gson) throws InterruptedException {
|
||||||
|
|
||||||
super(messageQueueFactory, processConfiguration, gson, CRAWLER_INBOX);
|
super(messageQueueFactory, processConfiguration, gson, CRAWLER_INBOX);
|
||||||
@@ -98,6 +108,7 @@ public class CrawlerMain extends ProcessMainClass {
|
|||||||
this.userAgent = userAgent;
|
this.userAgent = userAgent;
|
||||||
this.fetcher = httpFetcher;
|
this.fetcher = httpFetcher;
|
||||||
this.heartbeat = heartbeat;
|
this.heartbeat = heartbeat;
|
||||||
|
this.eventLog = eventLog;
|
||||||
this.domainProber = domainProber;
|
this.domainProber = domainProber;
|
||||||
this.fileStorageService = fileStorageService;
|
this.fileStorageService = fileStorageService;
|
||||||
this.anchorTagsSourceFactory = anchorTagsSourceFactory;
|
this.anchorTagsSourceFactory = anchorTagsSourceFactory;
|
||||||
@@ -105,6 +116,8 @@ public class CrawlerMain extends ProcessMainClass {
|
|||||||
this.dataSource = dataSource;
|
this.dataSource = dataSource;
|
||||||
this.blacklist = blacklist;
|
this.blacklist = blacklist;
|
||||||
this.node = processConfiguration.node();
|
this.node = processConfiguration.node();
|
||||||
|
this.serviceRegistry = serviceRegistry;
|
||||||
|
this.domainCoordinator = domainCoordinator;
|
||||||
|
|
||||||
SimpleBlockingThreadPool.ThreadType threadType;
|
SimpleBlockingThreadPool.ThreadType threadType;
|
||||||
if (Boolean.getBoolean("crawler.useVirtualThreads")) {
|
if (Boolean.getBoolean("crawler.useVirtualThreads")) {
|
||||||
@@ -147,12 +160,18 @@ public class CrawlerMain extends ProcessMainClass {
|
|||||||
Injector injector = Guice.createInjector(
|
Injector injector = Guice.createInjector(
|
||||||
new CrawlerModule(),
|
new CrawlerModule(),
|
||||||
new ProcessConfigurationModule("crawler"),
|
new ProcessConfigurationModule("crawler"),
|
||||||
|
new ServiceDiscoveryModule(),
|
||||||
|
new DomainCoordinationModule(),
|
||||||
new DatabaseModule(false)
|
new DatabaseModule(false)
|
||||||
);
|
);
|
||||||
var crawler = injector.getInstance(CrawlerMain.class);
|
var crawler = injector.getInstance(CrawlerMain.class);
|
||||||
|
|
||||||
var instructions = crawler.fetchInstructions(nu.marginalia.mqapi.crawling.CrawlRequest.class);
|
var instructions = crawler.fetchInstructions(nu.marginalia.mqapi.crawling.CrawlRequest.class);
|
||||||
|
|
||||||
|
crawler.serviceRegistry.registerProcess("crawler", crawler.node);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
crawler.eventLog.logEvent("CRAWLER-INFO", "Crawling started");
|
||||||
var req = instructions.value();
|
var req = instructions.value();
|
||||||
if (req.targetDomainName != null) {
|
if (req.targetDomainName != null) {
|
||||||
crawler.runForSingleDomain(req.targetDomainName, req.crawlStorage);
|
crawler.runForSingleDomain(req.targetDomainName, req.crawlStorage);
|
||||||
@@ -160,11 +179,15 @@ public class CrawlerMain extends ProcessMainClass {
|
|||||||
else {
|
else {
|
||||||
crawler.runForDatabaseDomains(req.crawlStorage);
|
crawler.runForDatabaseDomains(req.crawlStorage);
|
||||||
}
|
}
|
||||||
|
crawler.eventLog.logEvent("CRAWLER-INFO", "Crawl completed successfully");
|
||||||
instructions.ok();
|
instructions.ok();
|
||||||
} catch (Exception ex) {
|
} catch (Exception ex) {
|
||||||
logger.error("Crawler failed", ex);
|
logger.error("Crawler failed", ex);
|
||||||
instructions.err();
|
instructions.err();
|
||||||
}
|
}
|
||||||
|
finally {
|
||||||
|
crawler.serviceRegistry.deregisterProcess("crawler", crawler.node);
|
||||||
|
}
|
||||||
|
|
||||||
TimeUnit.SECONDS.sleep(5);
|
TimeUnit.SECONDS.sleep(5);
|
||||||
}
|
}
|
||||||
@@ -433,7 +456,7 @@ public class CrawlerMain extends ProcessMainClass {
|
|||||||
/** Best effort indicator whether we could start this now without getting stuck in
|
/** Best effort indicator whether we could start this now without getting stuck in
|
||||||
* DomainLocks purgatory */
|
* DomainLocks purgatory */
|
||||||
public boolean canRun() {
|
public boolean canRun() {
|
||||||
return domainLocks.isLockableHint(new EdgeDomain(domain));
|
return domainCoordinator.isLockableHint(new EdgeDomain(domain));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@@ -444,7 +467,7 @@ public class CrawlerMain extends ProcessMainClass {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
Optional<DomainLocks.DomainLock> lock = domainLocks.tryLockDomain(new EdgeDomain(domain));
|
Optional<DomainLock> lock = domainCoordinator.tryLockDomain(new EdgeDomain(domain));
|
||||||
// We don't have a lock, so we can't run this task
|
// We don't have a lock, so we can't run this task
|
||||||
// we return to avoid blocking the pool for too long
|
// we return to avoid blocking the pool for too long
|
||||||
if (lock.isEmpty()) {
|
if (lock.isEmpty()) {
|
||||||
@@ -452,7 +475,7 @@ public class CrawlerMain extends ProcessMainClass {
|
|||||||
retryQueue.put(this);
|
retryQueue.put(this);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
DomainLocks.DomainLock domainLock = lock.get();
|
DomainLock domainLock = lock.get();
|
||||||
|
|
||||||
try (domainLock) {
|
try (domainLock) {
|
||||||
Thread.currentThread().setName("crawling:" + domain);
|
Thread.currentThread().setName("crawling:" + domain);
|
||||||
|
@@ -52,6 +52,7 @@ import java.io.IOException;
|
|||||||
import java.net.SocketTimeoutException;
|
import java.net.SocketTimeoutException;
|
||||||
import java.net.URISyntaxException;
|
import java.net.URISyntaxException;
|
||||||
import java.net.UnknownHostException;
|
import java.net.UnknownHostException;
|
||||||
|
import java.security.KeyManagementException;
|
||||||
import java.security.NoSuchAlgorithmException;
|
import java.security.NoSuchAlgorithmException;
|
||||||
import java.time.Duration;
|
import java.time.Duration;
|
||||||
import java.time.Instant;
|
import java.time.Instant;
|
||||||
@@ -87,13 +88,14 @@ public class HttpFetcherImpl implements HttpFetcher, HttpRequestRetryStrategy {
|
|||||||
return connectionManager.getTotalStats();
|
return connectionManager.getTotalStats();
|
||||||
}
|
}
|
||||||
|
|
||||||
private CloseableHttpClient createClient() throws NoSuchAlgorithmException {
|
private CloseableHttpClient createClient() throws NoSuchAlgorithmException, KeyManagementException {
|
||||||
final ConnectionConfig connectionConfig = ConnectionConfig.custom()
|
final ConnectionConfig connectionConfig = ConnectionConfig.custom()
|
||||||
.setSocketTimeout(10, TimeUnit.SECONDS)
|
.setSocketTimeout(10, TimeUnit.SECONDS)
|
||||||
.setConnectTimeout(30, TimeUnit.SECONDS)
|
.setConnectTimeout(30, TimeUnit.SECONDS)
|
||||||
.setValidateAfterInactivity(TimeValue.ofSeconds(5))
|
.setValidateAfterInactivity(TimeValue.ofSeconds(5))
|
||||||
.build();
|
.build();
|
||||||
|
|
||||||
|
|
||||||
connectionManager = PoolingHttpClientConnectionManagerBuilder.create()
|
connectionManager = PoolingHttpClientConnectionManagerBuilder.create()
|
||||||
.setMaxConnPerRoute(2)
|
.setMaxConnPerRoute(2)
|
||||||
.setMaxConnTotal(5000)
|
.setMaxConnTotal(5000)
|
||||||
@@ -183,6 +185,8 @@ public class HttpFetcherImpl implements HttpFetcher, HttpRequestRetryStrategy {
|
|||||||
this.client = createClient();
|
this.client = createClient();
|
||||||
} catch (NoSuchAlgorithmException e) {
|
} catch (NoSuchAlgorithmException e) {
|
||||||
throw new RuntimeException(e);
|
throw new RuntimeException(e);
|
||||||
|
} catch (KeyManagementException e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
}
|
}
|
||||||
this.userAgentString = userAgent.uaString();
|
this.userAgentString = userAgent.uaString();
|
||||||
this.userAgentIdentifier = userAgent.uaIdentifier();
|
this.userAgentIdentifier = userAgent.uaIdentifier();
|
||||||
@@ -193,6 +197,8 @@ public class HttpFetcherImpl implements HttpFetcher, HttpRequestRetryStrategy {
|
|||||||
this.client = createClient();
|
this.client = createClient();
|
||||||
} catch (NoSuchAlgorithmException e) {
|
} catch (NoSuchAlgorithmException e) {
|
||||||
throw new RuntimeException(e);
|
throw new RuntimeException(e);
|
||||||
|
} catch (KeyManagementException e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
}
|
}
|
||||||
this.userAgentString = userAgent;
|
this.userAgentString = userAgent;
|
||||||
this.userAgentIdentifier = userAgent;
|
this.userAgentIdentifier = userAgent;
|
||||||
|
@@ -32,6 +32,7 @@ dependencies {
|
|||||||
implementation project(':code:index:api')
|
implementation project(':code:index:api')
|
||||||
implementation project(':code:processes:process-mq-api')
|
implementation project(':code:processes:process-mq-api')
|
||||||
implementation project(':code:libraries:message-queue')
|
implementation project(':code:libraries:message-queue')
|
||||||
|
implementation project(':code:libraries:domain-lock')
|
||||||
implementation project(':code:libraries:language-processing')
|
implementation project(':code:libraries:language-processing')
|
||||||
implementation project(':code:libraries:easy-lsh')
|
implementation project(':code:libraries:easy-lsh')
|
||||||
implementation project(':code:processes:crawling-process')
|
implementation project(':code:processes:crawling-process')
|
||||||
|
@@ -10,6 +10,8 @@ import nu.marginalia.api.feeds.FeedsClient;
|
|||||||
import nu.marginalia.converting.ConverterModule;
|
import nu.marginalia.converting.ConverterModule;
|
||||||
import nu.marginalia.converting.processor.DomainProcessor;
|
import nu.marginalia.converting.processor.DomainProcessor;
|
||||||
import nu.marginalia.converting.writer.ConverterBatchWriter;
|
import nu.marginalia.converting.writer.ConverterBatchWriter;
|
||||||
|
import nu.marginalia.coordination.DomainCoordinationModule;
|
||||||
|
import nu.marginalia.coordination.DomainCoordinator;
|
||||||
import nu.marginalia.db.DbDomainQueries;
|
import nu.marginalia.db.DbDomainQueries;
|
||||||
import nu.marginalia.db.DomainBlacklist;
|
import nu.marginalia.db.DomainBlacklist;
|
||||||
import nu.marginalia.io.SerializableCrawlDataStream;
|
import nu.marginalia.io.SerializableCrawlDataStream;
|
||||||
@@ -58,6 +60,7 @@ public class LiveCrawlerMain extends ProcessMainClass {
|
|||||||
private final FileStorageService fileStorageService;
|
private final FileStorageService fileStorageService;
|
||||||
private final KeywordLoaderService keywordLoaderService;
|
private final KeywordLoaderService keywordLoaderService;
|
||||||
private final DocumentLoaderService documentLoaderService;
|
private final DocumentLoaderService documentLoaderService;
|
||||||
|
private final DomainCoordinator domainCoordinator;
|
||||||
private final HikariDataSource dataSource;
|
private final HikariDataSource dataSource;
|
||||||
|
|
||||||
@Inject
|
@Inject
|
||||||
@@ -71,7 +74,7 @@ public class LiveCrawlerMain extends ProcessMainClass {
|
|||||||
DomainProcessor domainProcessor,
|
DomainProcessor domainProcessor,
|
||||||
FileStorageService fileStorageService,
|
FileStorageService fileStorageService,
|
||||||
KeywordLoaderService keywordLoaderService,
|
KeywordLoaderService keywordLoaderService,
|
||||||
DocumentLoaderService documentLoaderService, HikariDataSource dataSource)
|
DocumentLoaderService documentLoaderService, DomainCoordinator domainCoordinator, HikariDataSource dataSource)
|
||||||
throws Exception
|
throws Exception
|
||||||
{
|
{
|
||||||
super(messageQueueFactory, config, gson, LIVE_CRAWLER_INBOX);
|
super(messageQueueFactory, config, gson, LIVE_CRAWLER_INBOX);
|
||||||
@@ -84,6 +87,7 @@ public class LiveCrawlerMain extends ProcessMainClass {
|
|||||||
this.fileStorageService = fileStorageService;
|
this.fileStorageService = fileStorageService;
|
||||||
this.keywordLoaderService = keywordLoaderService;
|
this.keywordLoaderService = keywordLoaderService;
|
||||||
this.documentLoaderService = documentLoaderService;
|
this.documentLoaderService = documentLoaderService;
|
||||||
|
this.domainCoordinator = domainCoordinator;
|
||||||
this.dataSource = dataSource;
|
this.dataSource = dataSource;
|
||||||
|
|
||||||
domainBlacklist.waitUntilLoaded();
|
domainBlacklist.waitUntilLoaded();
|
||||||
@@ -107,6 +111,7 @@ public class LiveCrawlerMain extends ProcessMainClass {
|
|||||||
try {
|
try {
|
||||||
Injector injector = Guice.createInjector(
|
Injector injector = Guice.createInjector(
|
||||||
new LiveCrawlerModule(),
|
new LiveCrawlerModule(),
|
||||||
|
new DomainCoordinationModule(), // 2 hours lease timeout is enough for the live crawler
|
||||||
new ProcessConfigurationModule("crawler"),
|
new ProcessConfigurationModule("crawler"),
|
||||||
new ConverterModule(),
|
new ConverterModule(),
|
||||||
new ServiceDiscoveryModule(),
|
new ServiceDiscoveryModule(),
|
||||||
@@ -172,7 +177,7 @@ public class LiveCrawlerMain extends ProcessMainClass {
|
|||||||
|
|
||||||
processHeartbeat.progress(LiveCrawlState.CRAWLING);
|
processHeartbeat.progress(LiveCrawlState.CRAWLING);
|
||||||
|
|
||||||
try (SimpleLinkScraper fetcher = new SimpleLinkScraper(dataSet, domainQueries, domainBlacklist);
|
try (SimpleLinkScraper fetcher = new SimpleLinkScraper(dataSet, domainCoordinator, domainQueries, domainBlacklist);
|
||||||
var hb = heartbeat.createAdHocTaskHeartbeat("Live Crawling"))
|
var hb = heartbeat.createAdHocTaskHeartbeat("Live Crawling"))
|
||||||
{
|
{
|
||||||
for (Map.Entry<String, List<String>> entry : hb.wrap("Fetching", urlsPerDomain.entrySet())) {
|
for (Map.Entry<String, List<String>> entry : hb.wrap("Fetching", urlsPerDomain.entrySet())) {
|
||||||
|
@@ -5,8 +5,9 @@ import crawlercommons.robots.SimpleRobotRulesParser;
|
|||||||
import nu.marginalia.WmsaHome;
|
import nu.marginalia.WmsaHome;
|
||||||
import nu.marginalia.contenttype.ContentType;
|
import nu.marginalia.contenttype.ContentType;
|
||||||
import nu.marginalia.contenttype.DocumentBodyToString;
|
import nu.marginalia.contenttype.DocumentBodyToString;
|
||||||
|
import nu.marginalia.coordination.DomainCoordinator;
|
||||||
|
import nu.marginalia.coordination.DomainLock;
|
||||||
import nu.marginalia.crawl.fetcher.HttpFetcherImpl;
|
import nu.marginalia.crawl.fetcher.HttpFetcherImpl;
|
||||||
import nu.marginalia.crawl.logic.DomainLocks;
|
|
||||||
import nu.marginalia.crawl.retreival.CrawlDelayTimer;
|
import nu.marginalia.crawl.retreival.CrawlDelayTimer;
|
||||||
import nu.marginalia.db.DbDomainQueries;
|
import nu.marginalia.db.DbDomainQueries;
|
||||||
import nu.marginalia.db.DomainBlacklist;
|
import nu.marginalia.db.DomainBlacklist;
|
||||||
@@ -46,14 +47,16 @@ public class SimpleLinkScraper implements AutoCloseable {
|
|||||||
private final DomainBlacklist domainBlacklist;
|
private final DomainBlacklist domainBlacklist;
|
||||||
private final Duration connectTimeout = Duration.ofSeconds(10);
|
private final Duration connectTimeout = Duration.ofSeconds(10);
|
||||||
private final Duration readTimeout = Duration.ofSeconds(10);
|
private final Duration readTimeout = Duration.ofSeconds(10);
|
||||||
private final DomainLocks domainLocks = new DomainLocks();
|
private final DomainCoordinator domainCoordinator;
|
||||||
|
|
||||||
private final static int MAX_SIZE = Integer.getInteger("crawler.maxFetchSize", 10 * 1024 * 1024);
|
private final static int MAX_SIZE = Integer.getInteger("crawler.maxFetchSize", 10 * 1024 * 1024);
|
||||||
|
|
||||||
public SimpleLinkScraper(LiveCrawlDataSet dataSet,
|
public SimpleLinkScraper(LiveCrawlDataSet dataSet,
|
||||||
|
DomainCoordinator domainCoordinator,
|
||||||
DbDomainQueries domainQueries,
|
DbDomainQueries domainQueries,
|
||||||
DomainBlacklist domainBlacklist) {
|
DomainBlacklist domainBlacklist) {
|
||||||
this.dataSet = dataSet;
|
this.dataSet = dataSet;
|
||||||
|
this.domainCoordinator = domainCoordinator;
|
||||||
this.domainQueries = domainQueries;
|
this.domainQueries = domainQueries;
|
||||||
this.domainBlacklist = domainBlacklist;
|
this.domainBlacklist = domainBlacklist;
|
||||||
}
|
}
|
||||||
@@ -98,7 +101,7 @@ public class SimpleLinkScraper implements AutoCloseable {
|
|||||||
.version(HttpClient.Version.HTTP_2)
|
.version(HttpClient.Version.HTTP_2)
|
||||||
.build();
|
.build();
|
||||||
// throttle concurrent access per domain; IDE will complain it's not used, but it holds a semaphore -- do not remove:
|
// throttle concurrent access per domain; IDE will complain it's not used, but it holds a semaphore -- do not remove:
|
||||||
DomainLocks.DomainLock lock = domainLocks.lockDomain(domain)
|
DomainLock lock = domainCoordinator.lockDomain(domain)
|
||||||
) {
|
) {
|
||||||
SimpleRobotRules rules = fetchRobotsRules(rootUrl, client);
|
SimpleRobotRules rules = fetchRobotsRules(rootUrl, client);
|
||||||
|
|
||||||
|
@@ -1,5 +1,6 @@
|
|||||||
package nu.marginalia.livecrawler;
|
package nu.marginalia.livecrawler;
|
||||||
|
|
||||||
|
import nu.marginalia.coordination.LocalDomainCoordinator;
|
||||||
import nu.marginalia.db.DomainBlacklistImpl;
|
import nu.marginalia.db.DomainBlacklistImpl;
|
||||||
import nu.marginalia.io.SerializableCrawlDataStream;
|
import nu.marginalia.io.SerializableCrawlDataStream;
|
||||||
import nu.marginalia.model.EdgeDomain;
|
import nu.marginalia.model.EdgeDomain;
|
||||||
@@ -37,7 +38,7 @@ class SimpleLinkScraperTest {
|
|||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testRetrieveNow() throws Exception {
|
public void testRetrieveNow() throws Exception {
|
||||||
var scraper = new SimpleLinkScraper(dataSet, null, Mockito.mock(DomainBlacklistImpl.class));
|
var scraper = new SimpleLinkScraper(dataSet, new LocalDomainCoordinator(), null, Mockito.mock(DomainBlacklistImpl.class));
|
||||||
int fetched = scraper.retrieveNow(new EdgeDomain("www.marginalia.nu"), 1, List.of("https://www.marginalia.nu/"));
|
int fetched = scraper.retrieveNow(new EdgeDomain("www.marginalia.nu"), 1, List.of("https://www.marginalia.nu/"));
|
||||||
Assertions.assertEquals(1, fetched);
|
Assertions.assertEquals(1, fetched);
|
||||||
|
|
||||||
@@ -57,7 +58,7 @@ class SimpleLinkScraperTest {
|
|||||||
@Test
|
@Test
|
||||||
public void testRetrieveNow_Redundant() throws Exception {
|
public void testRetrieveNow_Redundant() throws Exception {
|
||||||
dataSet.saveDocument(1, new EdgeUrl("https://www.marginalia.nu/"), "<html>", "", "127.0.0.1");
|
dataSet.saveDocument(1, new EdgeUrl("https://www.marginalia.nu/"), "<html>", "", "127.0.0.1");
|
||||||
var scraper = new SimpleLinkScraper(dataSet, null, Mockito.mock(DomainBlacklistImpl.class));
|
var scraper = new SimpleLinkScraper(dataSet, new LocalDomainCoordinator(),null, Mockito.mock(DomainBlacklistImpl.class));
|
||||||
|
|
||||||
// If the requested URL is already in the dataSet, we retrieveNow should shortcircuit and not fetch anything
|
// If the requested URL is already in the dataSet, we retrieveNow should shortcircuit and not fetch anything
|
||||||
int fetched = scraper.retrieveNow(new EdgeDomain("www.marginalia.nu"), 1, List.of("https://www.marginalia.nu/"));
|
int fetched = scraper.retrieveNow(new EdgeDomain("www.marginalia.nu"), 1, List.of("https://www.marginalia.nu/"));
|
||||||
|
72
code/processes/ping-process/build.gradle
Normal file
72
code/processes/ping-process/build.gradle
Normal file
@@ -0,0 +1,72 @@
|
|||||||
|
plugins {
|
||||||
|
id 'java'
|
||||||
|
|
||||||
|
id 'application'
|
||||||
|
id 'jvm-test-suite'
|
||||||
|
}
|
||||||
|
|
||||||
|
java {
|
||||||
|
toolchain {
|
||||||
|
languageVersion.set(JavaLanguageVersion.of(rootProject.ext.jvmVersion))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
application {
|
||||||
|
mainClass = 'nu.marginalia.ping.PingMain'
|
||||||
|
applicationName = 'ping-process'
|
||||||
|
}
|
||||||
|
|
||||||
|
tasks.distZip.enabled = false
|
||||||
|
|
||||||
|
apply from: "$rootProject.projectDir/srcsets.gradle"
|
||||||
|
|
||||||
|
dependencies {
|
||||||
|
|
||||||
|
implementation project(':code:common:db')
|
||||||
|
implementation project(':code:common:model')
|
||||||
|
implementation project(':code:common:config')
|
||||||
|
implementation project(':code:common:service')
|
||||||
|
|
||||||
|
implementation project(':code:libraries:domain-lock')
|
||||||
|
implementation project(':code:libraries:geo-ip')
|
||||||
|
implementation project(':code:libraries:message-queue')
|
||||||
|
|
||||||
|
implementation project(':code:processes:process-mq-api')
|
||||||
|
implementation libs.bundles.slf4j
|
||||||
|
implementation libs.notnull
|
||||||
|
implementation libs.guava
|
||||||
|
|
||||||
|
implementation dependencies.create(libs.guice.get()) {
|
||||||
|
exclude group: 'com.google.guava'
|
||||||
|
}
|
||||||
|
implementation libs.gson
|
||||||
|
implementation libs.zstd
|
||||||
|
implementation libs.bucket4j
|
||||||
|
implementation libs.crawlercommons
|
||||||
|
implementation libs.jsoup
|
||||||
|
implementation libs.fastutil
|
||||||
|
implementation libs.bundles.curator
|
||||||
|
implementation libs.bundles.mariadb
|
||||||
|
implementation libs.bundles.httpcomponents
|
||||||
|
implementation libs.commons.lang3
|
||||||
|
|
||||||
|
implementation 'org.bouncycastle:bcprov-jdk18on:1.80'
|
||||||
|
implementation 'org.bouncycastle:bcpkix-jdk18on:1.80'
|
||||||
|
implementation 'dnsjava:dnsjava:3.5.2'
|
||||||
|
|
||||||
|
testImplementation libs.bundles.slf4j.test
|
||||||
|
testImplementation libs.bundles.junit
|
||||||
|
testImplementation libs.mockito
|
||||||
|
|
||||||
|
testImplementation libs.wiremock
|
||||||
|
|
||||||
|
|
||||||
|
testImplementation platform('org.testcontainers:testcontainers-bom:1.17.4')
|
||||||
|
testImplementation libs.commons.codec
|
||||||
|
testImplementation 'org.testcontainers:mariadb:1.17.4'
|
||||||
|
testImplementation 'org.testcontainers:junit-jupiter:1.17.4'
|
||||||
|
testImplementation project(':code:libraries:test-helpers')
|
||||||
|
|
||||||
|
testImplementation project(':code:processes:test-data')
|
||||||
|
}
|
||||||
|
|
@@ -0,0 +1,84 @@
|
|||||||
|
package nu.marginalia.ping;
|
||||||
|
|
||||||
|
import com.google.inject.Inject;
|
||||||
|
import nu.marginalia.ping.model.ErrorClassification;
|
||||||
|
|
||||||
|
import java.time.Duration;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.concurrent.ThreadLocalRandom;
|
||||||
|
|
||||||
|
public class BackoffStrategy {
|
||||||
|
|
||||||
|
private final Map<ErrorClassification, Duration> baseIntervals;
|
||||||
|
private final Map<ErrorClassification, Duration> maxIntervals;
|
||||||
|
private final Duration okInterval;
|
||||||
|
|
||||||
|
@Inject
|
||||||
|
public BackoffStrategy(PingIntervalsConfiguration pingIntervalsConfiguration) {
|
||||||
|
this.baseIntervals = pingIntervalsConfiguration.baseIntervals();
|
||||||
|
this.maxIntervals = pingIntervalsConfiguration.maxIntervals();
|
||||||
|
this.okInterval = baseIntervals.get(ErrorClassification.NONE);
|
||||||
|
}
|
||||||
|
|
||||||
|
public Duration getOkInterval() {
|
||||||
|
return okInterval;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Duration getUpdateTime(Duration currentDuration,
|
||||||
|
ErrorClassification errorClassification,
|
||||||
|
int backoffConsecutiveFailures) {
|
||||||
|
|
||||||
|
Duration nextBackoff = calculateBackoff(errorClassification, currentDuration, backoffConsecutiveFailures + 1);
|
||||||
|
nextBackoff = addJitter(nextBackoff);
|
||||||
|
|
||||||
|
return nextBackoff;
|
||||||
|
}
|
||||||
|
|
||||||
|
private Duration calculateBackoff(ErrorClassification errorClassification,
|
||||||
|
Duration currentDuration,
|
||||||
|
int backoffConsecutiveFailures) {
|
||||||
|
|
||||||
|
if (currentDuration == null) {
|
||||||
|
return baseIntervals.get(errorClassification);
|
||||||
|
}
|
||||||
|
|
||||||
|
Duration baseInterval = baseIntervals.get(errorClassification);
|
||||||
|
Duration maxInterval = maxIntervals.get(errorClassification);
|
||||||
|
|
||||||
|
if (currentDuration.compareTo(maxInterval) >= 0) {
|
||||||
|
return maxInterval;
|
||||||
|
}
|
||||||
|
|
||||||
|
double multiplier = switch(errorClassification) {
|
||||||
|
case ErrorClassification.UNKNOWN -> 1.5;
|
||||||
|
case ErrorClassification.TIMEOUT -> 2.5;
|
||||||
|
case ErrorClassification.CONNECTION_ERROR -> 2.0;
|
||||||
|
case ErrorClassification.HTTP_CLIENT_ERROR -> 1.7;
|
||||||
|
case ErrorClassification.HTTP_SERVER_ERROR -> 2.0;
|
||||||
|
case ErrorClassification.SSL_ERROR -> 1.8;
|
||||||
|
case ErrorClassification.DNS_ERROR -> 1.5;
|
||||||
|
default -> 2.0; // Default multiplier for any other classification
|
||||||
|
};
|
||||||
|
|
||||||
|
double backoffMinutes = baseInterval.toMinutes()
|
||||||
|
* Math.pow(multiplier, backoffConsecutiveFailures - 1);
|
||||||
|
|
||||||
|
Duration newDuration = Duration.ofMinutes(Math.round(0.5+backoffMinutes));
|
||||||
|
if (newDuration.compareTo(maxInterval) > 0) {
|
||||||
|
return maxInterval;
|
||||||
|
}
|
||||||
|
|
||||||
|
return newDuration;
|
||||||
|
}
|
||||||
|
|
||||||
|
private Duration addJitter(Duration duration) {
|
||||||
|
// Add ±15% jitter to prevent synchronized retries
|
||||||
|
double jitterPercent = 0.15;
|
||||||
|
long baseMinutes = duration.toMinutes();
|
||||||
|
long jitterRange = (long) (baseMinutes * jitterPercent * 2);
|
||||||
|
long jitterOffset = ThreadLocalRandom.current().nextLong(jitterRange + 1) - (jitterRange / 2);
|
||||||
|
|
||||||
|
long finalMinutes = Math.max(1, baseMinutes + jitterOffset);
|
||||||
|
return Duration.ofMinutes(finalMinutes);
|
||||||
|
}
|
||||||
|
}
|
259
code/processes/ping-process/java/nu/marginalia/ping/PingDao.java
Normal file
259
code/processes/ping-process/java/nu/marginalia/ping/PingDao.java
Normal file
@@ -0,0 +1,259 @@
|
|||||||
|
package nu.marginalia.ping;
|
||||||
|
|
||||||
|
import com.google.gson.Gson;
|
||||||
|
import com.google.inject.Inject;
|
||||||
|
import com.google.inject.Singleton;
|
||||||
|
import com.zaxxer.hikari.HikariDataSource;
|
||||||
|
import nu.marginalia.model.gson.GsonFactory;
|
||||||
|
import nu.marginalia.ping.model.*;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import java.sql.ResultSet;
|
||||||
|
import java.sql.SQLException;
|
||||||
|
import java.time.Instant;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Objects;
|
||||||
|
|
||||||
|
@Singleton
|
||||||
|
public class PingDao {
|
||||||
|
private final HikariDataSource dataSource;
|
||||||
|
private static final Gson gson = GsonFactory.get();
|
||||||
|
private static final Logger logger = LoggerFactory.getLogger(PingDao.class);
|
||||||
|
|
||||||
|
@Inject
|
||||||
|
public PingDao(HikariDataSource dataSource) {
|
||||||
|
this.dataSource = dataSource;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void write(WritableModel model) {
|
||||||
|
write(List.of(model));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void write(Collection<WritableModel> models) {
|
||||||
|
logger.debug("Writing: {}", models);
|
||||||
|
|
||||||
|
try (var conn = dataSource.getConnection()) {
|
||||||
|
|
||||||
|
// Don't bother with a transaction if there's only one model to write.
|
||||||
|
if (models.size() <= 1) {
|
||||||
|
for (WritableModel model : models) {
|
||||||
|
model.write(conn);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else { // If there are multiple models, use a transaction to ensure atomicity.
|
||||||
|
conn.setAutoCommit(false);
|
||||||
|
try {
|
||||||
|
for (WritableModel model : models) {
|
||||||
|
model.write(conn);
|
||||||
|
}
|
||||||
|
conn.commit();
|
||||||
|
} catch (SQLException e) {
|
||||||
|
conn.rollback();
|
||||||
|
throw e;
|
||||||
|
} finally {
|
||||||
|
conn.setAutoCommit(true);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (SQLException e) {
|
||||||
|
throw new RuntimeException("Failed to write model", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void scheduleDnsUpdate(String rootDomainName, Instant timestamp, int priority) throws SQLException {
|
||||||
|
try (var conn = dataSource.getConnection();
|
||||||
|
var ps = conn.prepareStatement("""
|
||||||
|
UPDATE DOMAIN_DNS_INFORMATION
|
||||||
|
SET TS_NEXT_DNS_CHECK = ?, DNS_CHECK_PRIORITY = ?
|
||||||
|
WHERE ROOT_DOMAIN_NAME = ?
|
||||||
|
""")) {
|
||||||
|
|
||||||
|
ps.setTimestamp(1, java.sql.Timestamp.from(timestamp));
|
||||||
|
ps.setInt(2, priority);
|
||||||
|
ps.setString(3, rootDomainName);
|
||||||
|
ps.executeUpdate();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public DomainAvailabilityRecord getDomainPingStatus(int domainId) throws SQLException {
|
||||||
|
|
||||||
|
try (var conn = dataSource.getConnection();
|
||||||
|
var ps = conn.prepareStatement("SELECT * FROM DOMAIN_AVAILABILITY_INFORMATION WHERE domain_id = ?")) {
|
||||||
|
|
||||||
|
ps.setInt(1, domainId);
|
||||||
|
ResultSet rs = ps.executeQuery();
|
||||||
|
if (rs.next()) {
|
||||||
|
return new DomainAvailabilityRecord(rs);
|
||||||
|
} else {
|
||||||
|
return null; // or throw an exception if preferred
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public DomainSecurityRecord getDomainSecurityInformation(int domainId) throws SQLException {
|
||||||
|
try (var conn = dataSource.getConnection();
|
||||||
|
var ps = conn.prepareStatement("SELECT * FROM DOMAIN_SECURITY_INFORMATION WHERE domain_id = ?")) {
|
||||||
|
|
||||||
|
ps.setInt(1, domainId);
|
||||||
|
ResultSet rs = ps.executeQuery();
|
||||||
|
if (rs.next()) {
|
||||||
|
return new DomainSecurityRecord(rs);
|
||||||
|
} else {
|
||||||
|
return null; // or throw an exception if preferred
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public DomainDnsRecord getDomainDnsRecord(long dnsRootDomainId) throws SQLException {
|
||||||
|
try (var conn = dataSource.getConnection();
|
||||||
|
var ps = conn.prepareStatement("SELECT * FROM DOMAIN_DNS_INFORMATION WHERE DNS_ROOT_DOMAIN_ID = ?")) {
|
||||||
|
|
||||||
|
ps.setObject(1, dnsRootDomainId, java.sql.Types.INTEGER);
|
||||||
|
ResultSet rs = ps.executeQuery();
|
||||||
|
if (rs.next()) {
|
||||||
|
return new DomainDnsRecord(rs);
|
||||||
|
} else {
|
||||||
|
return null; // or throw an exception if preferred
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public DomainDnsRecord getDomainDnsRecord(String rootDomainName) throws SQLException {
|
||||||
|
try (var conn = dataSource.getConnection();
|
||||||
|
var ps = conn.prepareStatement("SELECT * FROM DOMAIN_DNS_INFORMATION WHERE ROOT_DOMAIN_NAME = ?")) {
|
||||||
|
|
||||||
|
ps.setString(1, rootDomainName);
|
||||||
|
ResultSet rs = ps.executeQuery();
|
||||||
|
if (rs.next()) {
|
||||||
|
return new DomainDnsRecord(rs);
|
||||||
|
} else {
|
||||||
|
return null; // or throw an exception if preferred
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public HistoricalAvailabilityData getHistoricalAvailabilityData(long domainId) throws SQLException {
|
||||||
|
var query = """
|
||||||
|
SELECT EC_DOMAIN.ID, EC_DOMAIN.DOMAIN_NAME, EC_DOMAIN.NODE_AFFINITY, DOMAIN_AVAILABILITY_INFORMATION.*, DOMAIN_SECURITY_INFORMATION.*
|
||||||
|
FROM EC_DOMAIN
|
||||||
|
LEFT JOIN DOMAIN_SECURITY_INFORMATION ON DOMAIN_SECURITY_INFORMATION.DOMAIN_ID = EC_DOMAIN.ID
|
||||||
|
LEFT JOIN DOMAIN_AVAILABILITY_INFORMATION ON DOMAIN_AVAILABILITY_INFORMATION.DOMAIN_ID = EC_DOMAIN.ID
|
||||||
|
WHERE EC_DOMAIN.ID = ?
|
||||||
|
""";
|
||||||
|
try (var conn = dataSource.getConnection();
|
||||||
|
var ps = conn.prepareStatement(query)) {
|
||||||
|
|
||||||
|
ps.setLong(1, domainId);
|
||||||
|
|
||||||
|
ResultSet rs = ps.executeQuery();
|
||||||
|
while (rs.next()) {
|
||||||
|
String domainName = rs.getString("EC_DOMAIN.DOMAIN_NAME");
|
||||||
|
|
||||||
|
DomainAvailabilityRecord dar;
|
||||||
|
DomainSecurityRecord dsr;
|
||||||
|
|
||||||
|
if (rs.getObject("DOMAIN_SECURITY_INFORMATION.DOMAIN_ID", Integer.class) != null)
|
||||||
|
dsr = new DomainSecurityRecord(rs);
|
||||||
|
else
|
||||||
|
dsr = null;
|
||||||
|
|
||||||
|
if (rs.getObject("DOMAIN_AVAILABILITY_INFORMATION.DOMAIN_ID", Integer.class) != null)
|
||||||
|
dar = new DomainAvailabilityRecord(rs);
|
||||||
|
else
|
||||||
|
dar = null;
|
||||||
|
|
||||||
|
if (dar == null) {
|
||||||
|
return new HistoricalAvailabilityData.JustDomainReference(new DomainReference(
|
||||||
|
rs.getInt("EC_DOMAIN.ID"),
|
||||||
|
rs.getInt("EC_DOMAIN.NODE_AFFINITY"),
|
||||||
|
domainName.toLowerCase()
|
||||||
|
));
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
if (dsr != null) {
|
||||||
|
return new HistoricalAvailabilityData.AvailabilityAndSecurity(domainName, dar, dsr);
|
||||||
|
} else {
|
||||||
|
return new HistoricalAvailabilityData.JustAvailability(domainName, dar);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<UpdateSchedule.UpdateJob<DomainReference, HistoricalAvailabilityData>> getDomainUpdateSchedule(int nodeId) {
|
||||||
|
List<UpdateSchedule.UpdateJob<DomainReference, HistoricalAvailabilityData>> updateJobs = new ArrayList<>();
|
||||||
|
|
||||||
|
try (var conn = dataSource.getConnection();
|
||||||
|
var ps = conn.prepareStatement("""
|
||||||
|
SELECT ID, DOMAIN_NAME, NEXT_SCHEDULED_UPDATE
|
||||||
|
FROM EC_DOMAIN
|
||||||
|
LEFT JOIN DOMAIN_AVAILABILITY_INFORMATION
|
||||||
|
ON EC_DOMAIN.ID = DOMAIN_AVAILABILITY_INFORMATION.DOMAIN_ID
|
||||||
|
WHERE NODE_AFFINITY = ?
|
||||||
|
""")) {
|
||||||
|
ps.setFetchSize(10_000);
|
||||||
|
ps.setInt(1, nodeId);
|
||||||
|
ResultSet rs = ps.executeQuery();
|
||||||
|
while (rs.next()) {
|
||||||
|
int domainId = rs.getInt("ID");
|
||||||
|
String domainName = rs.getString("DOMAIN_NAME");
|
||||||
|
var ts = rs.getTimestamp("NEXT_SCHEDULED_UPDATE");
|
||||||
|
Instant nextUpdate = ts == null ? Instant.now() : ts.toInstant();
|
||||||
|
|
||||||
|
var ref = new DomainReference(domainId, nodeId, domainName.toLowerCase());
|
||||||
|
updateJobs.add(new UpdateSchedule.UpdateJob<>(ref, nextUpdate));
|
||||||
|
}
|
||||||
|
} catch (SQLException e) {
|
||||||
|
throw new RuntimeException("Failed to retrieve domain update schedule", e);
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.info("Found {} availability update jobs for node {}", updateJobs.size(), nodeId);
|
||||||
|
|
||||||
|
return updateJobs;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<UpdateSchedule.UpdateJob<RootDomainReference, RootDomainReference>> getDnsUpdateSchedule(int nodeId) {
|
||||||
|
List<UpdateSchedule.UpdateJob<RootDomainReference, RootDomainReference>> updateJobs = new ArrayList<>();
|
||||||
|
|
||||||
|
try (var conn = dataSource.getConnection();
|
||||||
|
var ps = conn.prepareStatement("""
|
||||||
|
SELECT DISTINCT(DOMAIN_TOP),DOMAIN_DNS_INFORMATION.* FROM EC_DOMAIN
|
||||||
|
LEFT JOIN DOMAIN_DNS_INFORMATION ON ROOT_DOMAIN_NAME = DOMAIN_TOP
|
||||||
|
WHERE EC_DOMAIN.NODE_AFFINITY = ?
|
||||||
|
""")) {
|
||||||
|
ps.setFetchSize(10_000);
|
||||||
|
ps.setInt(1, nodeId);
|
||||||
|
ResultSet rs = ps.executeQuery();
|
||||||
|
while (rs.next()) {
|
||||||
|
Long dnsRootDomainId = rs.getObject("DOMAIN_DNS_INFORMATION.DNS_ROOT_DOMAIN_ID", Long.class);
|
||||||
|
String rootDomainName = rs.getString("DOMAIN_TOP");
|
||||||
|
|
||||||
|
if (dnsRootDomainId == null) {
|
||||||
|
updateJobs.add(
|
||||||
|
new UpdateSchedule.UpdateJob<>(
|
||||||
|
new RootDomainReference.ByName(rootDomainName),
|
||||||
|
Instant.now())
|
||||||
|
);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
var record = new DomainDnsRecord(rs);
|
||||||
|
updateJobs.add(new UpdateSchedule.UpdateJob<>(
|
||||||
|
new RootDomainReference.ByIdAndName(dnsRootDomainId, rootDomainName),
|
||||||
|
Objects.requireNonNullElseGet(record.tsNextScheduledUpdate(), Instant::now))
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (SQLException e) {
|
||||||
|
throw new RuntimeException("Failed to retrieve DNS update schedule", e);
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.info("Found {} dns update jobs for node {}", updateJobs.size(), nodeId);
|
||||||
|
|
||||||
|
return updateJobs;
|
||||||
|
}
|
||||||
|
}
|
@@ -0,0 +1,13 @@
|
|||||||
|
package nu.marginalia.ping;
|
||||||
|
|
||||||
|
import nu.marginalia.ping.model.ErrorClassification;
|
||||||
|
|
||||||
|
import java.time.Duration;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
public record PingIntervalsConfiguration(
|
||||||
|
Duration dnsUpdateInterval,
|
||||||
|
Map<ErrorClassification, Duration> baseIntervals,
|
||||||
|
Map<ErrorClassification, Duration> maxIntervals
|
||||||
|
) {
|
||||||
|
}
|
@@ -0,0 +1,297 @@
|
|||||||
|
package nu.marginalia.ping;
|
||||||
|
|
||||||
|
import com.google.inject.Inject;
|
||||||
|
import nu.marginalia.coordination.DomainCoordinator;
|
||||||
|
import nu.marginalia.model.EdgeDomain;
|
||||||
|
import nu.marginalia.ping.model.*;
|
||||||
|
import nu.marginalia.ping.svc.DnsPingService;
|
||||||
|
import nu.marginalia.ping.svc.HttpPingService;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import javax.annotation.Nullable;
|
||||||
|
import java.time.Duration;
|
||||||
|
import java.time.Instant;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Objects;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
|
||||||
|
/** PingJobScheduler is responsible for scheduling and processing ping jobs
|
||||||
|
* for both HTTP pings and DNS lookups. It manages a queue of jobs and processes them
|
||||||
|
* in separate threads, ensuring that domains are pinged and DNS records are updated
|
||||||
|
* efficiently.
|
||||||
|
*/
|
||||||
|
public class PingJobScheduler {
|
||||||
|
private final HttpPingService httpPingService;
|
||||||
|
private final DnsPingService dnsPingService;
|
||||||
|
private final DomainCoordinator domainCoordinator;
|
||||||
|
private final PingDao pingDao;
|
||||||
|
|
||||||
|
private static final Logger logger = LoggerFactory.getLogger(PingJobScheduler.class);
|
||||||
|
|
||||||
|
private static final UpdateSchedule<RootDomainReference, RootDomainReference> dnsUpdateSchedule
|
||||||
|
= new UpdateSchedule<>(250_000);
|
||||||
|
private static final UpdateSchedule<DomainReference, HistoricalAvailabilityData> availabilityUpdateSchedule
|
||||||
|
= new UpdateSchedule<>(250_000);
|
||||||
|
|
||||||
|
public volatile Instant dnsLastSync = Instant.now();
|
||||||
|
public volatile Instant availabilityLastSync = Instant.now();
|
||||||
|
|
||||||
|
public volatile Integer nodeId = null;
|
||||||
|
public volatile boolean running = false;
|
||||||
|
|
||||||
|
private final List<Thread> allThreads = new ArrayList<>();
|
||||||
|
|
||||||
|
@Inject
|
||||||
|
public PingJobScheduler(HttpPingService httpPingService,
|
||||||
|
DnsPingService dnsPingService,
|
||||||
|
DomainCoordinator domainCoordinator,
|
||||||
|
PingDao pingDao)
|
||||||
|
{
|
||||||
|
this.httpPingService = httpPingService;
|
||||||
|
this.dnsPingService = dnsPingService;
|
||||||
|
this.domainCoordinator = domainCoordinator;
|
||||||
|
this.pingDao = pingDao;
|
||||||
|
}
|
||||||
|
|
||||||
|
public synchronized void start() {
|
||||||
|
if (running)
|
||||||
|
return;
|
||||||
|
|
||||||
|
nodeId = null;
|
||||||
|
|
||||||
|
running = true;
|
||||||
|
|
||||||
|
allThreads.add(Thread.ofPlatform().daemon().name("sync-dns").start(this::syncAvailabilityJobs));
|
||||||
|
allThreads.add(Thread.ofPlatform().daemon().name("sync-availability").start(this::syncDnsRecords));
|
||||||
|
|
||||||
|
int availabilityThreads = Integer.getInteger("ping.availabilityThreads", 8);
|
||||||
|
int pingThreads = Integer.getInteger("ping.dnsThreads", 2);
|
||||||
|
|
||||||
|
for (int i = 0; i < availabilityThreads; i++) {
|
||||||
|
allThreads.add(Thread.ofPlatform().daemon().name("availability-job-consumer-" + i).start(this::availabilityJobConsumer));
|
||||||
|
}
|
||||||
|
for (int i = 0; i < pingThreads; i++) {
|
||||||
|
allThreads.add(Thread.ofPlatform().daemon().name("dns-job-consumer-" + i).start(this::dnsJobConsumer));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void stop() {
|
||||||
|
running = false;
|
||||||
|
for (Thread thread : allThreads) {
|
||||||
|
try {
|
||||||
|
thread.interrupt();
|
||||||
|
thread.join();
|
||||||
|
} catch (InterruptedException e) {
|
||||||
|
Thread.currentThread().interrupt();
|
||||||
|
logger.error("Failed to join thread: " + thread.getName(), e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void pause(int nodeId) {
|
||||||
|
logger.info("Pausing PingJobScheduler for nodeId: {}", nodeId);
|
||||||
|
|
||||||
|
if (this.nodeId != null && this.nodeId != nodeId) {
|
||||||
|
logger.warn("Attempted to pause PingJobScheduler with mismatched nodeId: expected {}, got {}", this.nodeId, nodeId);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
this.nodeId = null;
|
||||||
|
|
||||||
|
availabilityUpdateSchedule.clear();
|
||||||
|
dnsUpdateSchedule.clear();
|
||||||
|
|
||||||
|
logger.info("PingJobScheduler paused");
|
||||||
|
}
|
||||||
|
|
||||||
|
public synchronized void enableForNode(int nodeId) {
|
||||||
|
logger.info("Resuming PingJobScheduler for nodeId: {}", nodeId);
|
||||||
|
if (this.nodeId != null) {
|
||||||
|
logger.warn("Attempted to resume PingJobScheduler with mismatched nodeId: expected {}, got {}", this.nodeId, nodeId);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
availabilityUpdateSchedule.replaceQueue(pingDao.getDomainUpdateSchedule(nodeId));
|
||||||
|
dnsUpdateSchedule.replaceQueue(pingDao.getDnsUpdateSchedule(nodeId));
|
||||||
|
dnsLastSync = Instant.now();
|
||||||
|
availabilityLastSync = Instant.now();
|
||||||
|
|
||||||
|
// Flag that we are running again
|
||||||
|
this.nodeId = nodeId;
|
||||||
|
|
||||||
|
notifyAll();
|
||||||
|
logger.info("PingJobScheduler resumed");
|
||||||
|
}
|
||||||
|
|
||||||
|
public synchronized void waitForResume() throws InterruptedException {
|
||||||
|
while (nodeId == null) {
|
||||||
|
wait();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void availabilityJobConsumer() {
|
||||||
|
while (running) {
|
||||||
|
try {
|
||||||
|
Integer nid = nodeId;
|
||||||
|
if (nid == null) {
|
||||||
|
waitForResume();
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
DomainReference ref = availabilityUpdateSchedule.nextIf(domain -> {
|
||||||
|
EdgeDomain domainObj = new EdgeDomain(domain.domainName());
|
||||||
|
if (!domainCoordinator.isLockableHint(domainObj)) {
|
||||||
|
return false; // Skip locked domains
|
||||||
|
}
|
||||||
|
return true; // Process this domain
|
||||||
|
});
|
||||||
|
|
||||||
|
long nextId = ref.domainId();
|
||||||
|
var data = pingDao.getHistoricalAvailabilityData(nextId);
|
||||||
|
if (data == null) {
|
||||||
|
logger.warn("No availability data found for ID: {}", nextId);
|
||||||
|
continue; // No data to process, skip this iteration
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
List<WritableModel> objects = switch (data) {
|
||||||
|
case HistoricalAvailabilityData.JustDomainReference(DomainReference reference)
|
||||||
|
-> httpPingService.pingDomain(reference, null, null);
|
||||||
|
case HistoricalAvailabilityData.JustAvailability(String domain, DomainAvailabilityRecord record)
|
||||||
|
-> httpPingService.pingDomain(
|
||||||
|
new DomainReference(record.domainId(), record.nodeId(), domain), record, null);
|
||||||
|
case HistoricalAvailabilityData.AvailabilityAndSecurity(String domain, DomainAvailabilityRecord availability, DomainSecurityRecord security)
|
||||||
|
-> httpPingService.pingDomain(
|
||||||
|
new DomainReference(availability.domainId(), availability.nodeId(), domain), availability, security);
|
||||||
|
};
|
||||||
|
|
||||||
|
pingDao.write(objects);
|
||||||
|
|
||||||
|
// Re-schedule the next update time for the domain
|
||||||
|
for (var object : objects) {
|
||||||
|
var ts = object.nextUpdateTime();
|
||||||
|
if (ts != null) {
|
||||||
|
availabilityUpdateSchedule.add(ref, ts);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catch (Exception e) {
|
||||||
|
logger.error("Error processing availability job for domain: " + data.domain(), e);
|
||||||
|
}
|
||||||
|
} catch (InterruptedException e) {
|
||||||
|
Thread.currentThread().interrupt();
|
||||||
|
logger.error("Availability job consumer interrupted", e);
|
||||||
|
break;
|
||||||
|
} catch (Exception e) {
|
||||||
|
logger.error("Error processing availability job", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void dnsJobConsumer() {
|
||||||
|
while (running) {
|
||||||
|
try {
|
||||||
|
Integer nid = nodeId;
|
||||||
|
if (nid == null) {
|
||||||
|
waitForResume();
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
RootDomainReference ref = dnsUpdateSchedule.next();
|
||||||
|
|
||||||
|
try {
|
||||||
|
List<WritableModel> objects = switch(ref) {
|
||||||
|
case RootDomainReference.ByIdAndName(long id, String name) -> {
|
||||||
|
var oldRecord = Objects.requireNonNull(pingDao.getDomainDnsRecord(id));
|
||||||
|
yield dnsPingService.pingDomain(oldRecord.rootDomainName(), oldRecord);
|
||||||
|
}
|
||||||
|
case RootDomainReference.ByName(String name) -> {
|
||||||
|
@Nullable var oldRecord = pingDao.getDomainDnsRecord(name);
|
||||||
|
yield dnsPingService.pingDomain(name, oldRecord);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
pingDao.write(objects);
|
||||||
|
|
||||||
|
// Re-schedule the next update time for the domain
|
||||||
|
for (var object : objects) {
|
||||||
|
var ts = object.nextUpdateTime();
|
||||||
|
if (ts != null) {
|
||||||
|
dnsUpdateSchedule.add(ref, ts);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catch (Exception e) {
|
||||||
|
logger.error("Error processing DNS job for domain: " + ref, e);
|
||||||
|
}
|
||||||
|
|
||||||
|
} catch (InterruptedException e) {
|
||||||
|
Thread.currentThread().interrupt();
|
||||||
|
logger.error("DNS job consumer interrupted", e);
|
||||||
|
break;
|
||||||
|
} catch (Exception e) {
|
||||||
|
logger.error("Error processing DNS job", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void syncAvailabilityJobs() {
|
||||||
|
try {
|
||||||
|
while (running) {
|
||||||
|
|
||||||
|
// If we are suspended, wait for resume
|
||||||
|
Integer nid = nodeId;
|
||||||
|
if (nid == null) {
|
||||||
|
waitForResume();
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if we need to refresh the availability data
|
||||||
|
Instant nextRefresh = availabilityLastSync.plus(Duration.ofHours(24));
|
||||||
|
if (Instant.now().isBefore(nextRefresh)) {
|
||||||
|
Duration remaining = Duration.between(Instant.now(), nextRefresh);
|
||||||
|
TimeUnit.MINUTES.sleep(Math.max(1, remaining.toMinutes()));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
availabilityUpdateSchedule.replaceQueue(pingDao.getDomainUpdateSchedule(nid));
|
||||||
|
availabilityLastSync = Instant.now();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catch (Exception e) {
|
||||||
|
logger.error("Error fetching new ping jobs", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void syncDnsRecords() {
|
||||||
|
try {
|
||||||
|
while (running) {
|
||||||
|
|
||||||
|
Integer nid = nodeId;
|
||||||
|
if (nid == null) {
|
||||||
|
waitForResume();
|
||||||
|
continue; // re-fetch the records after resuming
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if we need to refresh the availability data
|
||||||
|
Instant nextRefresh = dnsLastSync.plus(Duration.ofHours(24));
|
||||||
|
if (Instant.now().isBefore(nextRefresh)) {
|
||||||
|
Duration remaining = Duration.between(Instant.now(), nextRefresh);
|
||||||
|
TimeUnit.MINUTES.sleep(Math.max(1, remaining.toMinutes()));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
dnsUpdateSchedule.replaceQueue(pingDao.getDnsUpdateSchedule(nid));
|
||||||
|
dnsLastSync = Instant.now();
|
||||||
|
}
|
||||||
|
} catch (InterruptedException e) {
|
||||||
|
Thread.currentThread().interrupt();
|
||||||
|
logger.error("DNS job fetch interrupted", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
@@ -0,0 +1,102 @@
|
|||||||
|
package nu.marginalia.ping;
|
||||||
|
|
||||||
|
import com.google.gson.Gson;
|
||||||
|
import com.google.inject.Guice;
|
||||||
|
import com.google.inject.Inject;
|
||||||
|
import com.google.inject.Injector;
|
||||||
|
import nu.marginalia.WmsaHome;
|
||||||
|
import nu.marginalia.coordination.DomainCoordinationModule;
|
||||||
|
import nu.marginalia.geoip.GeoIpDictionary;
|
||||||
|
import nu.marginalia.mq.MessageQueueFactory;
|
||||||
|
import nu.marginalia.mqapi.ProcessInboxNames;
|
||||||
|
import nu.marginalia.mqapi.ping.PingRequest;
|
||||||
|
import nu.marginalia.process.ProcessConfiguration;
|
||||||
|
import nu.marginalia.process.ProcessConfigurationModule;
|
||||||
|
import nu.marginalia.process.ProcessMainClass;
|
||||||
|
import nu.marginalia.service.module.DatabaseModule;
|
||||||
|
import nu.marginalia.service.module.ServiceDiscoveryModule;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import java.security.Security;
|
||||||
|
|
||||||
|
public class PingMain extends ProcessMainClass {
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(PingMain.class);
|
||||||
|
|
||||||
|
private final PingJobScheduler pingJobScheduler;
|
||||||
|
private final int node;
|
||||||
|
|
||||||
|
private static final Logger logger = LoggerFactory.getLogger(PingMain.class);
|
||||||
|
|
||||||
|
@Inject
|
||||||
|
public PingMain(MessageQueueFactory messageQueueFactory,
|
||||||
|
ProcessConfiguration config,
|
||||||
|
Gson gson,
|
||||||
|
PingJobScheduler pingJobScheduler,
|
||||||
|
ProcessConfiguration processConfiguration
|
||||||
|
) {
|
||||||
|
super(messageQueueFactory, config, gson, ProcessInboxNames.PING_INBOX);
|
||||||
|
|
||||||
|
this.pingJobScheduler = pingJobScheduler;
|
||||||
|
this.node = processConfiguration.node();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void runPrimary() {
|
||||||
|
log.info("Starting PingMain...");
|
||||||
|
|
||||||
|
// Start the ping job scheduler
|
||||||
|
pingJobScheduler.start();
|
||||||
|
pingJobScheduler.enableForNode(node);
|
||||||
|
|
||||||
|
log.info("PingMain started successfully.");
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void main(String... args) throws Exception {
|
||||||
|
// Prevent Java from caching DNS lookups forever (filling up the system RAM as a result)
|
||||||
|
Security.setProperty("networkaddress.cache.ttl" , "3600");
|
||||||
|
|
||||||
|
// This must run *early*
|
||||||
|
System.setProperty("http.agent", WmsaHome.getUserAgent().uaString());
|
||||||
|
|
||||||
|
// If these aren't set properly, the JVM will hang forever on some requests
|
||||||
|
System.setProperty("sun.net.client.defaultConnectTimeout", "30000");
|
||||||
|
System.setProperty("sun.net.client.defaultReadTimeout", "30000");
|
||||||
|
|
||||||
|
// Set the maximum number of connections to keep alive in the connection pool
|
||||||
|
System.setProperty("jdk.httpclient.idleTimeout", "15"); // 15 seconds
|
||||||
|
System.setProperty("jdk.httpclient.connectionPoolSize", "256");
|
||||||
|
|
||||||
|
// We don't want to use too much memory caching sessions for https
|
||||||
|
System.setProperty("javax.net.ssl.sessionCacheSize", "2048");
|
||||||
|
|
||||||
|
|
||||||
|
Injector injector = Guice.createInjector(
|
||||||
|
new PingModule(),
|
||||||
|
new ServiceDiscoveryModule(),
|
||||||
|
new DomainCoordinationModule(),
|
||||||
|
new ProcessConfigurationModule("ping"),
|
||||||
|
new DatabaseModule(false)
|
||||||
|
);
|
||||||
|
|
||||||
|
GeoIpDictionary geoIpDictionary = injector.getInstance(GeoIpDictionary.class);
|
||||||
|
|
||||||
|
geoIpDictionary.waitReady(); // Ensure the GeoIpDictionary is ready before proceeding
|
||||||
|
|
||||||
|
PingMain main = injector.getInstance(PingMain.class);
|
||||||
|
|
||||||
|
var instructions = main.fetchInstructions(PingRequest.class);
|
||||||
|
|
||||||
|
try {
|
||||||
|
main.runPrimary();
|
||||||
|
for(;;)
|
||||||
|
synchronized (main) { // Wait on the object lock to avoid busy-looping
|
||||||
|
main.wait();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catch (Throwable ex) {
|
||||||
|
logger.error("Error running ping process", ex);
|
||||||
|
instructions.err();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@@ -0,0 +1,82 @@
|
|||||||
|
package nu.marginalia.ping;
|
||||||
|
|
||||||
|
import com.google.inject.AbstractModule;
|
||||||
|
import com.google.inject.Provides;
|
||||||
|
import com.google.inject.name.Named;
|
||||||
|
import nu.marginalia.ping.io.HttpClientProvider;
|
||||||
|
import nu.marginalia.ping.model.ErrorClassification;
|
||||||
|
import org.apache.hc.client5.http.classic.HttpClient;
|
||||||
|
|
||||||
|
import java.security.NoSuchAlgorithmException;
|
||||||
|
import java.time.Duration;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
public class PingModule extends AbstractModule {
|
||||||
|
|
||||||
|
public PingModule() throws NoSuchAlgorithmException {
|
||||||
|
}
|
||||||
|
|
||||||
|
public static PingIntervalsConfiguration createPingIntervalsConfiguration() {
|
||||||
|
Map<ErrorClassification, Duration> initialTimeouts = new HashMap<>();
|
||||||
|
Map<ErrorClassification, Duration> maxTimeouts = new HashMap<>();
|
||||||
|
|
||||||
|
for (var classification : ErrorClassification.values()) {
|
||||||
|
switch (classification) {
|
||||||
|
case CONNECTION_ERROR -> {
|
||||||
|
initialTimeouts.put(classification, Duration.ofMinutes(15));
|
||||||
|
maxTimeouts.put(classification, Duration.ofDays(1));
|
||||||
|
}
|
||||||
|
case HTTP_CLIENT_ERROR -> {
|
||||||
|
initialTimeouts.put(classification, Duration.ofMinutes(15));
|
||||||
|
maxTimeouts.put(classification, Duration.ofDays(1));
|
||||||
|
}
|
||||||
|
case HTTP_SERVER_ERROR -> {
|
||||||
|
initialTimeouts.put(classification, Duration.ofMinutes(8));
|
||||||
|
maxTimeouts.put(classification, Duration.ofHours(6));
|
||||||
|
}
|
||||||
|
case SSL_ERROR -> {
|
||||||
|
initialTimeouts.put(classification, Duration.ofMinutes(45));
|
||||||
|
maxTimeouts.put(classification, Duration.ofDays(1));
|
||||||
|
}
|
||||||
|
case DNS_ERROR -> {
|
||||||
|
initialTimeouts.put(classification, Duration.ofMinutes(60));
|
||||||
|
maxTimeouts.put(classification, Duration.ofDays(7));
|
||||||
|
}
|
||||||
|
case TIMEOUT -> {
|
||||||
|
initialTimeouts.put(classification, Duration.ofMinutes(5));
|
||||||
|
maxTimeouts.put(classification, Duration.ofHours(6));
|
||||||
|
}
|
||||||
|
case UNKNOWN -> {
|
||||||
|
initialTimeouts.put(classification, Duration.ofMinutes(30));
|
||||||
|
maxTimeouts.put(classification, Duration.ofDays(1));
|
||||||
|
}
|
||||||
|
case NONE -> {
|
||||||
|
initialTimeouts.put(classification, Duration.ofHours(6));
|
||||||
|
maxTimeouts.put(classification, Duration.ofDays(6));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return new PingIntervalsConfiguration(
|
||||||
|
Duration.ofHours(3),
|
||||||
|
initialTimeouts,
|
||||||
|
maxTimeouts
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void configure() {
|
||||||
|
bind(HttpClient.class).toProvider(HttpClientProvider.class);
|
||||||
|
|
||||||
|
bind(PingIntervalsConfiguration.class).toInstance(createPingIntervalsConfiguration());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Provides
|
||||||
|
@Named("ping.nameservers")
|
||||||
|
public List<String> providePingNameservers() {
|
||||||
|
// Google's public DNS servers currently have the best rate limiting
|
||||||
|
return List.of("8.8.8.8", "8.8.4.4");
|
||||||
|
}
|
||||||
|
}
|
@@ -0,0 +1,109 @@
|
|||||||
|
package nu.marginalia.ping;
|
||||||
|
|
||||||
|
import java.time.Duration;
|
||||||
|
import java.time.Instant;
|
||||||
|
import java.util.*;
|
||||||
|
import java.util.function.Predicate;
|
||||||
|
|
||||||
|
/** In-memory schedule for updates, allowing jobs to be added and processed in order of their scheduled time.
|
||||||
|
* This is not a particularly high-performance implementation, but exists to take contention off the database's
|
||||||
|
* timestamp index.
|
||||||
|
* */
|
||||||
|
public class UpdateSchedule<T, T2> {
|
||||||
|
private final PriorityQueue<UpdateJob<T, T2>> updateQueue;
|
||||||
|
public record UpdateJob<T, T2>(T key, Instant updateTime) {}
|
||||||
|
|
||||||
|
public UpdateSchedule(int initialCapacity) {
|
||||||
|
updateQueue = new PriorityQueue<>(initialCapacity, Comparator.comparing(UpdateJob::updateTime));
|
||||||
|
}
|
||||||
|
|
||||||
|
public synchronized void add(T key, Instant updateTime) {
|
||||||
|
updateQueue.add(new UpdateJob<>(key, updateTime));
|
||||||
|
notifyAll();
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Returns the next job in the queue that is due to be processed.
|
||||||
|
* If no jobs are due, it will block until a job is added or a job becomes due.
|
||||||
|
* */
|
||||||
|
public synchronized T next() throws InterruptedException {
|
||||||
|
while (true) {
|
||||||
|
if (updateQueue.isEmpty()) {
|
||||||
|
wait(); // Wait for a new job to be added
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
UpdateJob<T, T2> job = updateQueue.peek();
|
||||||
|
Instant now = Instant.now();
|
||||||
|
|
||||||
|
if (job.updateTime.isAfter(now)) {
|
||||||
|
Duration toWait = Duration.between(now, job.updateTime);
|
||||||
|
wait(Math.max(1, toWait.toMillis()));
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
updateQueue.poll(); // Remove the job from the queue since it's due
|
||||||
|
return job.key();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/** Returns the first job in the queue matching the predicate that is not scheduled into the future,
|
||||||
|
* blocking until a job is added or a job becomes due.
|
||||||
|
*/
|
||||||
|
public synchronized T nextIf(Predicate<T> predicate) throws InterruptedException {
|
||||||
|
List<UpdateJob<T, T2>> rejectedJobs = new ArrayList<>();
|
||||||
|
|
||||||
|
try {
|
||||||
|
while (true) {
|
||||||
|
if (updateQueue.isEmpty()) {
|
||||||
|
wait(); // Wait for a new job to be added
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
UpdateJob<T, T2> job = updateQueue.peek();
|
||||||
|
Instant now = Instant.now();
|
||||||
|
|
||||||
|
if (job.updateTime.isAfter(now)) {
|
||||||
|
Duration toWait = Duration.between(now, job.updateTime);
|
||||||
|
|
||||||
|
// Return the rejected jobs to the queue for other threads to process
|
||||||
|
updateQueue.addAll(rejectedJobs);
|
||||||
|
if (!rejectedJobs.isEmpty())
|
||||||
|
notifyAll();
|
||||||
|
rejectedJobs.clear();
|
||||||
|
|
||||||
|
wait(Math.max(1, toWait.toMillis()));
|
||||||
|
} else {
|
||||||
|
var candidate = updateQueue.poll(); // Remove the job from the queue since it's due
|
||||||
|
|
||||||
|
assert candidate != null : "Update job should not be null at this point, since we just peeked it in a synchronized block";
|
||||||
|
|
||||||
|
if (!predicate.test(candidate.key())) {
|
||||||
|
rejectedJobs.add(candidate);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
return candidate.key();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
finally {
|
||||||
|
// Return the rejected jobs to the queue for other threads to process
|
||||||
|
updateQueue.addAll(rejectedJobs);
|
||||||
|
if (!rejectedJobs.isEmpty())
|
||||||
|
notifyAll();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public synchronized void clear() {
|
||||||
|
updateQueue.clear();
|
||||||
|
notifyAll();
|
||||||
|
}
|
||||||
|
|
||||||
|
public synchronized void replaceQueue(Collection<UpdateJob<T,T2>> newJobs) {
|
||||||
|
updateQueue.clear();
|
||||||
|
updateQueue.addAll(newJobs);
|
||||||
|
notifyAll();
|
||||||
|
}
|
||||||
|
}
|
@@ -0,0 +1,96 @@
|
|||||||
|
package nu.marginalia.ping.fetcher;
|
||||||
|
|
||||||
|
import com.google.inject.Inject;
|
||||||
|
import com.google.inject.name.Named;
|
||||||
|
import nu.marginalia.ping.model.SingleDnsRecord;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
import org.xbill.DNS.ExtendedResolver;
|
||||||
|
import org.xbill.DNS.Lookup;
|
||||||
|
import org.xbill.DNS.TextParseException;
|
||||||
|
import org.xbill.DNS.Type;
|
||||||
|
|
||||||
|
import java.net.UnknownHostException;
|
||||||
|
import java.time.Duration;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.concurrent.*;
|
||||||
|
|
||||||
|
public class PingDnsFetcher {
|
||||||
|
private final ThreadLocal<ExtendedResolver> resolver;
|
||||||
|
private static final ExecutorService digExecutor = Executors.newFixedThreadPool(100);
|
||||||
|
private static final Logger logger = LoggerFactory.getLogger(PingDnsFetcher.class);
|
||||||
|
|
||||||
|
private static final int[] RECORD_TYPES = {
|
||||||
|
Type.A, Type.AAAA, Type.NS, Type.MX, Type.TXT,
|
||||||
|
Type.SOA, Type.CNAME, Type.CAA, Type.SPF
|
||||||
|
};
|
||||||
|
|
||||||
|
@Inject
|
||||||
|
public PingDnsFetcher(@Named("ping.nameservers")
|
||||||
|
List<String> nameservers) {
|
||||||
|
resolver = ThreadLocal.withInitial(() -> createResolver(nameservers));
|
||||||
|
}
|
||||||
|
|
||||||
|
private ExtendedResolver createResolver(List<String> nameservers) {
|
||||||
|
try {
|
||||||
|
ExtendedResolver r = new ExtendedResolver(
|
||||||
|
nameservers.toArray(new String[0])
|
||||||
|
);
|
||||||
|
r.setLoadBalance(true);
|
||||||
|
r.setTimeout(Duration.ofSeconds(5));
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
catch (UnknownHostException e) {
|
||||||
|
throw new RuntimeException("Failed to create DNS resolver", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private List<SingleDnsRecord> query(String domainName, int recordType) throws TextParseException {
|
||||||
|
var resolver = this.resolver.get();
|
||||||
|
var query = new Lookup(domainName, recordType);
|
||||||
|
query.setResolver(resolver);
|
||||||
|
|
||||||
|
var result = query.run();
|
||||||
|
|
||||||
|
if (result == null || result.length == 0) {
|
||||||
|
return List.of();
|
||||||
|
}
|
||||||
|
|
||||||
|
List<SingleDnsRecord> records = new ArrayList<>(result.length);
|
||||||
|
|
||||||
|
for (var record : result) {
|
||||||
|
if (record == null) continue;
|
||||||
|
records.add(new SingleDnsRecord(
|
||||||
|
Type.string(recordType),
|
||||||
|
record.toString())
|
||||||
|
);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
return records;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<SingleDnsRecord> dig(String domainName) {
|
||||||
|
List<Callable<List<SingleDnsRecord>>> tasks = new ArrayList<>(RECORD_TYPES.length);
|
||||||
|
for (var recordType : RECORD_TYPES) {
|
||||||
|
tasks.add(() -> query(domainName, recordType));
|
||||||
|
}
|
||||||
|
List<SingleDnsRecord> results = new ArrayList<>(RECORD_TYPES.length);
|
||||||
|
try {
|
||||||
|
List<Future<List<SingleDnsRecord>>> futures = digExecutor.invokeAll(tasks);
|
||||||
|
for (Future<List<SingleDnsRecord>> future : futures) {
|
||||||
|
try {
|
||||||
|
results.addAll(future.get(1, TimeUnit.MINUTES));
|
||||||
|
} catch (Exception e) {
|
||||||
|
logger.error("Error fetching DNS records", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (InterruptedException e) {
|
||||||
|
Thread.currentThread().interrupt();
|
||||||
|
logger.error("DNS query interrupted", e);
|
||||||
|
}
|
||||||
|
return results;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@@ -0,0 +1,95 @@
|
|||||||
|
package nu.marginalia.ping.fetcher;
|
||||||
|
|
||||||
|
import com.google.inject.Inject;
|
||||||
|
import nu.marginalia.UserAgent;
|
||||||
|
import nu.marginalia.WmsaHome;
|
||||||
|
import nu.marginalia.ping.fetcher.response.*;
|
||||||
|
import org.apache.hc.client5.http.HttpHostConnectException;
|
||||||
|
import org.apache.hc.client5.http.classic.HttpClient;
|
||||||
|
import org.apache.hc.client5.http.protocol.HttpClientContext;
|
||||||
|
import org.apache.hc.core5.http.Header;
|
||||||
|
import org.apache.hc.core5.http.io.entity.EntityUtils;
|
||||||
|
import org.apache.hc.core5.http.io.support.ClassicRequestBuilder;
|
||||||
|
|
||||||
|
import javax.net.ssl.SSLHandshakeException;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.net.SocketTimeoutException;
|
||||||
|
import java.time.Duration;
|
||||||
|
import java.time.Instant;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
public class PingHttpFetcher {
|
||||||
|
private final UserAgent userAgent = WmsaHome.getUserAgent();
|
||||||
|
private final HttpClient client;
|
||||||
|
|
||||||
|
@Inject
|
||||||
|
public PingHttpFetcher(HttpClient client) {
|
||||||
|
this.client = client;
|
||||||
|
}
|
||||||
|
|
||||||
|
public PingRequestResponse fetchUrl(String url, Method method, String etag, String lastModified) {
|
||||||
|
|
||||||
|
var builder = ClassicRequestBuilder.create(method.name())
|
||||||
|
.setUri(url)
|
||||||
|
.addHeader("Accept", "text/*, */*;q=0.9")
|
||||||
|
.addHeader("User-Agent", userAgent.uaString())
|
||||||
|
.addHeader("Accept-Encoding", "gzip");
|
||||||
|
if (etag != null) {
|
||||||
|
builder.addHeader("If-None-Match", etag);
|
||||||
|
}
|
||||||
|
if (lastModified != null) {
|
||||||
|
builder.addHeader("If-Modified-Since", lastModified);
|
||||||
|
}
|
||||||
|
|
||||||
|
var req = builder.build();
|
||||||
|
|
||||||
|
HttpClientContext context = HttpClientContext.create();
|
||||||
|
try {
|
||||||
|
Instant start = Instant.now();
|
||||||
|
return client.execute(req, context, (rsp) -> {
|
||||||
|
|
||||||
|
var entity = rsp.getEntity();
|
||||||
|
|
||||||
|
try {
|
||||||
|
|
||||||
|
Header[] rawHeaders = rsp.getHeaders();
|
||||||
|
Map<String, List<String>> headers = new HashMap<>(rawHeaders.length);
|
||||||
|
for (Header header : rawHeaders) {
|
||||||
|
headers.computeIfAbsent(header.getName(), k -> new ArrayList<>())
|
||||||
|
.add(header.getValue());
|
||||||
|
}
|
||||||
|
|
||||||
|
if (method == Method.GET && entity == null) {
|
||||||
|
return new ProtocolError("GET request returned no content");
|
||||||
|
}
|
||||||
|
|
||||||
|
byte[] body = entity != null ? EntityUtils.toByteArray(entity) : null;
|
||||||
|
|
||||||
|
Duration responseTime = Duration.between(start, Instant.now());
|
||||||
|
|
||||||
|
return PingRequestResponse.of(
|
||||||
|
rsp.getVersion(),
|
||||||
|
rsp.getCode(),
|
||||||
|
body,
|
||||||
|
headers,
|
||||||
|
responseTime,
|
||||||
|
context.getSSLSession()
|
||||||
|
);
|
||||||
|
} finally {
|
||||||
|
EntityUtils.consume(entity);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
} catch (SocketTimeoutException ex) {
|
||||||
|
return new TimeoutResponse(ex.getMessage());
|
||||||
|
} catch (HttpHostConnectException | SSLHandshakeException e) {
|
||||||
|
return new ConnectionError(e.getClass().getSimpleName());
|
||||||
|
} catch (IOException e) {
|
||||||
|
return new ProtocolError(e.getClass().getSimpleName());
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@@ -0,0 +1,4 @@
|
|||||||
|
package nu.marginalia.ping.fetcher.response;
|
||||||
|
|
||||||
|
public record ConnectionError(String errorMessage) implements PingRequestResponse {
|
||||||
|
}
|
@@ -0,0 +1,18 @@
|
|||||||
|
package nu.marginalia.ping.fetcher.response;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
public record Headers(Map<String, List<String>> headers) {
|
||||||
|
public List<String> get(String name) {
|
||||||
|
return headers.getOrDefault(name, List.of());
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getFirst(String name) {
|
||||||
|
return headers.getOrDefault(name, List.of()).stream().findFirst().orElse(null);
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean contains(String name) {
|
||||||
|
return headers.containsKey(name);
|
||||||
|
}
|
||||||
|
}
|
@@ -0,0 +1,12 @@
|
|||||||
|
package nu.marginalia.ping.fetcher.response;
|
||||||
|
|
||||||
|
import java.time.Duration;
|
||||||
|
|
||||||
|
public record HttpResponse(
|
||||||
|
String version,
|
||||||
|
int httpStatus,
|
||||||
|
byte[] body,
|
||||||
|
Headers headers,
|
||||||
|
Duration httpResponseTime
|
||||||
|
) implements PingRequestResponse {
|
||||||
|
}
|
@@ -0,0 +1,15 @@
|
|||||||
|
package nu.marginalia.ping.fetcher.response;
|
||||||
|
|
||||||
|
import java.security.cert.Certificate;
|
||||||
|
import java.time.Duration;
|
||||||
|
|
||||||
|
public record HttpsResponse(
|
||||||
|
String version,
|
||||||
|
int httpStatus,
|
||||||
|
byte[] body,
|
||||||
|
Headers headers,
|
||||||
|
Certificate[] sslCertificates,
|
||||||
|
SslMetadata sslMetadata,
|
||||||
|
Duration httpResponseTime
|
||||||
|
) implements PingRequestResponse {
|
||||||
|
}
|
@@ -0,0 +1,5 @@
|
|||||||
|
package nu.marginalia.ping.fetcher.response;
|
||||||
|
|
||||||
|
public enum Method {
|
||||||
|
GET, HEAD
|
||||||
|
}
|
@@ -0,0 +1,22 @@
|
|||||||
|
package nu.marginalia.ping.fetcher.response;
|
||||||
|
|
||||||
|
import org.apache.hc.core5.http.ProtocolVersion;
|
||||||
|
|
||||||
|
import javax.net.ssl.SSLPeerUnverifiedException;
|
||||||
|
import javax.net.ssl.SSLSession;
|
||||||
|
import java.time.Duration;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
public sealed interface PingRequestResponse
|
||||||
|
permits HttpResponse, HttpsResponse, TimeoutResponse, ConnectionError, ProtocolError, UnknownHostError {
|
||||||
|
static PingRequestResponse of(ProtocolVersion version, int httpStatus, byte[] body, Map<String, List<String>> headers, Duration time, SSLSession sslSession) throws SSLPeerUnverifiedException {
|
||||||
|
|
||||||
|
if (sslSession == null) {
|
||||||
|
return new HttpResponse(version.toString(), httpStatus, body, new Headers(headers), time);
|
||||||
|
} else {
|
||||||
|
return new HttpsResponse(version.toString(), httpStatus, body, new Headers(headers), sslSession.getPeerCertificates(), new SslMetadata(sslSession), time);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@@ -0,0 +1,4 @@
|
|||||||
|
package nu.marginalia.ping.fetcher.response;
|
||||||
|
|
||||||
|
public record ProtocolError(String errorMessage) implements PingRequestResponse {
|
||||||
|
}
|
@@ -0,0 +1,14 @@
|
|||||||
|
package nu.marginalia.ping.fetcher.response;
|
||||||
|
|
||||||
|
import javax.net.ssl.SSLSession;
|
||||||
|
|
||||||
|
public record SslMetadata(
|
||||||
|
String cipherSuite,
|
||||||
|
String protocol) {
|
||||||
|
public SslMetadata(SSLSession session) {
|
||||||
|
this(
|
||||||
|
session.getCipherSuite(),
|
||||||
|
session.getProtocol()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
@@ -0,0 +1,4 @@
|
|||||||
|
package nu.marginalia.ping.fetcher.response;
|
||||||
|
|
||||||
|
public record TimeoutResponse(String errorMessage) implements PingRequestResponse {
|
||||||
|
}
|
@@ -0,0 +1,4 @@
|
|||||||
|
package nu.marginalia.ping.fetcher.response;
|
||||||
|
|
||||||
|
public record UnknownHostError() implements PingRequestResponse {
|
||||||
|
}
|
@@ -0,0 +1,165 @@
|
|||||||
|
package nu.marginalia.ping.io;
|
||||||
|
|
||||||
|
import com.google.inject.Provider;
|
||||||
|
import org.apache.hc.client5.http.ConnectionKeepAliveStrategy;
|
||||||
|
import org.apache.hc.client5.http.classic.HttpClient;
|
||||||
|
import org.apache.hc.client5.http.config.ConnectionConfig;
|
||||||
|
import org.apache.hc.client5.http.config.RequestConfig;
|
||||||
|
import org.apache.hc.client5.http.cookie.StandardCookieSpec;
|
||||||
|
import org.apache.hc.client5.http.impl.classic.CloseableHttpClient;
|
||||||
|
import org.apache.hc.client5.http.impl.classic.HttpClients;
|
||||||
|
import org.apache.hc.client5.http.impl.io.PoolingHttpClientConnectionManager;
|
||||||
|
import org.apache.hc.client5.http.impl.io.PoolingHttpClientConnectionManagerBuilder;
|
||||||
|
import org.apache.hc.client5.http.ssl.DefaultClientTlsStrategy;
|
||||||
|
import org.apache.hc.client5.http.ssl.NoopHostnameVerifier;
|
||||||
|
import org.apache.hc.core5.http.HeaderElement;
|
||||||
|
import org.apache.hc.core5.http.HeaderElements;
|
||||||
|
import org.apache.hc.core5.http.HttpResponse;
|
||||||
|
import org.apache.hc.core5.http.io.SocketConfig;
|
||||||
|
import org.apache.hc.core5.http.message.MessageSupport;
|
||||||
|
import org.apache.hc.core5.http.protocol.HttpContext;
|
||||||
|
import org.apache.hc.core5.ssl.SSLContextBuilder;
|
||||||
|
import org.apache.hc.core5.util.TimeValue;
|
||||||
|
import org.apache.hc.core5.util.Timeout;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import javax.net.ssl.SSLContext;
|
||||||
|
import javax.net.ssl.TrustManager;
|
||||||
|
import javax.net.ssl.X509TrustManager;
|
||||||
|
import java.security.KeyManagementException;
|
||||||
|
import java.security.NoSuchAlgorithmException;
|
||||||
|
import java.security.cert.X509Certificate;
|
||||||
|
import java.util.Iterator;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
|
||||||
|
public class HttpClientProvider implements Provider<HttpClient> {
|
||||||
|
private static final HttpClient client;
|
||||||
|
private static PoolingHttpClientConnectionManager connectionManager;
|
||||||
|
|
||||||
|
private static final Logger logger = LoggerFactory.getLogger(HttpClientProvider.class);
|
||||||
|
|
||||||
|
static {
|
||||||
|
try {
|
||||||
|
client = createClient();
|
||||||
|
} catch (Exception e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static CloseableHttpClient createClient() throws NoSuchAlgorithmException, KeyManagementException {
|
||||||
|
final ConnectionConfig connectionConfig = ConnectionConfig.custom()
|
||||||
|
.setSocketTimeout(15, TimeUnit.SECONDS)
|
||||||
|
.setConnectTimeout(15, TimeUnit.SECONDS)
|
||||||
|
.setValidateAfterInactivity(TimeValue.ofSeconds(5))
|
||||||
|
.build();
|
||||||
|
|
||||||
|
// No-op up front validation of server certificates.
|
||||||
|
//
|
||||||
|
// We will validate certificates later, after the connection is established
|
||||||
|
// as we want to store the certificate chain and validation
|
||||||
|
// outcome to the database.
|
||||||
|
|
||||||
|
var trustMeBro = new X509TrustManager() {
|
||||||
|
private X509Certificate[] lastServerCertChain;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void checkClientTrusted(X509Certificate[] chain, String authType) {
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void checkServerTrusted(X509Certificate[] chain, String authType) {
|
||||||
|
this.lastServerCertChain = chain.clone();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public X509Certificate[] getAcceptedIssuers() {
|
||||||
|
return new X509Certificate[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
public X509Certificate[] getLastServerCertChain() {
|
||||||
|
return lastServerCertChain != null ? lastServerCertChain.clone() : null;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
SSLContext sslContext = SSLContextBuilder.create().build();
|
||||||
|
sslContext.init(null, new TrustManager[]{trustMeBro}, null);
|
||||||
|
|
||||||
|
connectionManager = PoolingHttpClientConnectionManagerBuilder.create()
|
||||||
|
.setMaxConnPerRoute(2)
|
||||||
|
.setMaxConnTotal(50)
|
||||||
|
.setDefaultConnectionConfig(connectionConfig)
|
||||||
|
.setTlsSocketStrategy(
|
||||||
|
new DefaultClientTlsStrategy(sslContext, NoopHostnameVerifier.INSTANCE))
|
||||||
|
.build();
|
||||||
|
|
||||||
|
connectionManager.setDefaultSocketConfig(SocketConfig.custom()
|
||||||
|
.setSoLinger(TimeValue.ofSeconds(-1))
|
||||||
|
.setSoTimeout(Timeout.ofSeconds(10))
|
||||||
|
.build()
|
||||||
|
);
|
||||||
|
|
||||||
|
Thread.ofPlatform().daemon(true).start(() -> {
|
||||||
|
try {
|
||||||
|
for (;;) {
|
||||||
|
TimeUnit.SECONDS.sleep(15);
|
||||||
|
logger.info("Connection pool stats: {}", connectionManager.getTotalStats());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catch (InterruptedException e) {
|
||||||
|
Thread.currentThread().interrupt();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
final RequestConfig defaultRequestConfig = RequestConfig.custom()
|
||||||
|
.setCookieSpec(StandardCookieSpec.RELAXED)
|
||||||
|
.setResponseTimeout(10, TimeUnit.SECONDS)
|
||||||
|
.setConnectionRequestTimeout(5, TimeUnit.MINUTES)
|
||||||
|
.build();
|
||||||
|
|
||||||
|
return HttpClients.custom()
|
||||||
|
.setConnectionManager(connectionManager)
|
||||||
|
.setRetryStrategy(new RetryStrategy())
|
||||||
|
.setKeepAliveStrategy(new ConnectionKeepAliveStrategy() {
|
||||||
|
// Default keep-alive duration is 3 minutes, but this is too long for us,
|
||||||
|
// as we are either going to re-use it fairly quickly or close it for a long time.
|
||||||
|
//
|
||||||
|
// So we set it to 30 seconds or clamp the server-provided value to a minimum of 10 seconds.
|
||||||
|
private static final TimeValue defaultValue = TimeValue.ofSeconds(30);
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public TimeValue getKeepAliveDuration(HttpResponse response, HttpContext context) {
|
||||||
|
final Iterator<HeaderElement> it = MessageSupport.iterate(response, HeaderElements.KEEP_ALIVE);
|
||||||
|
|
||||||
|
while (it.hasNext()) {
|
||||||
|
final HeaderElement he = it.next();
|
||||||
|
final String param = he.getName();
|
||||||
|
final String value = he.getValue();
|
||||||
|
|
||||||
|
if (value == null)
|
||||||
|
continue;
|
||||||
|
if (!"timeout".equalsIgnoreCase(param))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
try {
|
||||||
|
long timeout = Long.parseLong(value);
|
||||||
|
timeout = Math.clamp(timeout, 30, defaultValue.toSeconds());
|
||||||
|
return TimeValue.ofSeconds(timeout);
|
||||||
|
} catch (final NumberFormatException ignore) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return defaultValue;
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.disableRedirectHandling()
|
||||||
|
.setDefaultRequestConfig(defaultRequestConfig)
|
||||||
|
.build();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public HttpClient get() {
|
||||||
|
return client;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@@ -0,0 +1,77 @@
|
|||||||
|
package nu.marginalia.ping.io;
|
||||||
|
|
||||||
|
import org.apache.hc.client5.http.HttpHostConnectException;
|
||||||
|
import org.apache.hc.client5.http.HttpRequestRetryStrategy;
|
||||||
|
import org.apache.hc.core5.http.HttpRequest;
|
||||||
|
import org.apache.hc.core5.http.HttpResponse;
|
||||||
|
import org.apache.hc.core5.http.protocol.HttpContext;
|
||||||
|
import org.apache.hc.core5.util.TimeValue;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import javax.net.ssl.SSLException;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.net.SocketTimeoutException;
|
||||||
|
import java.net.UnknownHostException;
|
||||||
|
|
||||||
|
public class RetryStrategy implements HttpRequestRetryStrategy {
|
||||||
|
private static final Logger logger = LoggerFactory.getLogger(RetryStrategy.class);
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean retryRequest(HttpRequest request, IOException exception, int executionCount, HttpContext context) {
|
||||||
|
return switch (exception) {
|
||||||
|
case SocketTimeoutException ste -> false;
|
||||||
|
case SSLException ssle -> false;
|
||||||
|
case UnknownHostException uhe -> false;
|
||||||
|
case HttpHostConnectException ex -> executionCount <= 2; // Only retry once for connection errors
|
||||||
|
default -> executionCount <= 3;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean retryRequest(HttpResponse response, int executionCount, HttpContext context) {
|
||||||
|
return switch (response.getCode()) {
|
||||||
|
case 500, 503 -> executionCount <= 2;
|
||||||
|
case 429 -> executionCount <= 3;
|
||||||
|
default -> false;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public TimeValue getRetryInterval(HttpRequest request, IOException exception, int executionCount, HttpContext context) {
|
||||||
|
return TimeValue.ofSeconds(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public TimeValue getRetryInterval(HttpResponse response, int executionCount, HttpContext context) {
|
||||||
|
|
||||||
|
int statusCode = response.getCode();
|
||||||
|
|
||||||
|
// Give 503 a bit more time
|
||||||
|
if (statusCode == 503) return TimeValue.ofSeconds(5);
|
||||||
|
|
||||||
|
if (statusCode == 429) {
|
||||||
|
// get the Retry-After header
|
||||||
|
var retryAfterHeader = response.getFirstHeader("Retry-After");
|
||||||
|
if (retryAfterHeader == null) {
|
||||||
|
return TimeValue.ofSeconds(3);
|
||||||
|
}
|
||||||
|
|
||||||
|
String retryAfter = retryAfterHeader.getValue();
|
||||||
|
if (retryAfter == null) {
|
||||||
|
return TimeValue.ofSeconds(2);
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
int retryAfterTime = Integer.parseInt(retryAfter);
|
||||||
|
retryAfterTime = Math.clamp(retryAfterTime, 1, 5);
|
||||||
|
|
||||||
|
return TimeValue.ofSeconds(retryAfterTime);
|
||||||
|
} catch (NumberFormatException e) {
|
||||||
|
logger.warn("Invalid Retry-After header: {}", retryAfter);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return TimeValue.ofSeconds(2);
|
||||||
|
}
|
||||||
|
}
|
@@ -0,0 +1,29 @@
|
|||||||
|
package nu.marginalia.ping.model;
|
||||||
|
|
||||||
|
public enum AvailabilityOutageType {
|
||||||
|
NONE,
|
||||||
|
TIMEOUT,
|
||||||
|
SSL_ERROR,
|
||||||
|
DNS_ERROR,
|
||||||
|
CONNECTION_ERROR,
|
||||||
|
HTTP_CLIENT_ERROR,
|
||||||
|
HTTP_SERVER_ERROR,
|
||||||
|
UNKNOWN;
|
||||||
|
|
||||||
|
public static AvailabilityOutageType fromErrorClassification(ErrorClassification errorClassification) {
|
||||||
|
if (null == errorClassification) {
|
||||||
|
return UNKNOWN;
|
||||||
|
}
|
||||||
|
|
||||||
|
return switch (errorClassification) {
|
||||||
|
case NONE -> NONE;
|
||||||
|
case TIMEOUT -> TIMEOUT;
|
||||||
|
case SSL_ERROR -> SSL_ERROR;
|
||||||
|
case DNS_ERROR -> DNS_ERROR;
|
||||||
|
case CONNECTION_ERROR -> CONNECTION_ERROR;
|
||||||
|
case HTTP_CLIENT_ERROR -> HTTP_CLIENT_ERROR;
|
||||||
|
case HTTP_SERVER_ERROR -> HTTP_SERVER_ERROR;
|
||||||
|
case UNKNOWN -> UNKNOWN;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
@@ -0,0 +1,49 @@
|
|||||||
|
package nu.marginalia.ping.model;
|
||||||
|
|
||||||
|
import java.sql.Connection;
|
||||||
|
import java.sql.SQLException;
|
||||||
|
import java.time.Instant;
|
||||||
|
|
||||||
|
public record DomainAvailabilityEvent(
|
||||||
|
int domainId,
|
||||||
|
int nodeId,
|
||||||
|
boolean available,
|
||||||
|
AvailabilityOutageType outageType, // e.g., 'TIMEOUT', 'DNS_ERROR', etc.
|
||||||
|
Integer httpStatusCode, // Nullable, as it may not always be applicable
|
||||||
|
String errorMessage, // Specific error details
|
||||||
|
Instant tsUpdate // Timestamp of the last update
|
||||||
|
) implements WritableModel {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void write(Connection conn) throws SQLException {
|
||||||
|
try (var ps = conn.prepareStatement("""
|
||||||
|
INSERT INTO DOMAIN_AVAILABILITY_EVENTS (
|
||||||
|
domain_id,
|
||||||
|
node_id,
|
||||||
|
available,
|
||||||
|
outage_type,
|
||||||
|
http_status_code,
|
||||||
|
error_message,
|
||||||
|
ts_change
|
||||||
|
) VALUES (?, ?, ?, ?, ?, ?, ?)
|
||||||
|
"""))
|
||||||
|
{
|
||||||
|
ps.setInt(1, domainId());
|
||||||
|
ps.setInt(2, nodeId());
|
||||||
|
ps.setBoolean(3, available());
|
||||||
|
ps.setString(4, outageType().name());
|
||||||
|
if (httpStatusCode() == null) {
|
||||||
|
ps.setNull(5, java.sql.Types.INTEGER);
|
||||||
|
} else {
|
||||||
|
ps.setInt(5, httpStatusCode());
|
||||||
|
}
|
||||||
|
if (errorMessage() == null) {
|
||||||
|
ps.setNull(6, java.sql.Types.VARCHAR);
|
||||||
|
} else {
|
||||||
|
ps.setString(6, errorMessage());
|
||||||
|
}
|
||||||
|
ps.setTimestamp(7, java.sql.Timestamp.from(tsUpdate()));
|
||||||
|
ps.executeUpdate();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@@ -0,0 +1,363 @@
|
|||||||
|
package nu.marginalia.ping.model;
|
||||||
|
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
|
||||||
|
import javax.annotation.Nullable;
|
||||||
|
import java.sql.Connection;
|
||||||
|
import java.sql.ResultSet;
|
||||||
|
import java.sql.SQLException;
|
||||||
|
import java.time.Duration;
|
||||||
|
import java.time.Instant;
|
||||||
|
|
||||||
|
public record DomainAvailabilityRecord(
|
||||||
|
int domainId,
|
||||||
|
int nodeId,
|
||||||
|
boolean serverAvailable,
|
||||||
|
@Nullable byte[] serverIp,
|
||||||
|
@Nullable Integer asn,
|
||||||
|
@Nullable Long dataHash,
|
||||||
|
@Nullable Long securityConfigHash,
|
||||||
|
@Nullable HttpSchema httpSchema,
|
||||||
|
@Nullable String httpEtag,
|
||||||
|
@Nullable String httpLastModified,
|
||||||
|
@Nullable Integer httpStatus,
|
||||||
|
@Nullable String httpLocation,
|
||||||
|
@Nullable Duration httpResponseTime,
|
||||||
|
@Nullable ErrorClassification errorClassification,
|
||||||
|
@Nullable String errorMessage,
|
||||||
|
|
||||||
|
@Nullable Instant tsLastPing,
|
||||||
|
@Nullable Instant tsLastAvailable,
|
||||||
|
@Nullable Instant tsLastError,
|
||||||
|
|
||||||
|
Instant nextScheduledUpdate,
|
||||||
|
int backoffConsecutiveFailures,
|
||||||
|
Duration backoffFetchInterval
|
||||||
|
)
|
||||||
|
implements WritableModel
|
||||||
|
{
|
||||||
|
public DomainAvailabilityRecord(ResultSet rs) throws SQLException {
|
||||||
|
this(
|
||||||
|
rs.getInt("DOMAIN_AVAILABILITY_INFORMATION.DOMAIN_ID"),
|
||||||
|
rs.getInt("DOMAIN_AVAILABILITY_INFORMATION.NODE_ID"),
|
||||||
|
rs.getBoolean("DOMAIN_AVAILABILITY_INFORMATION.SERVER_AVAILABLE"),
|
||||||
|
rs.getBytes("DOMAIN_AVAILABILITY_INFORMATION.SERVER_IP"),
|
||||||
|
rs.getObject("DOMAIN_AVAILABILITY_INFORMATION.SERVER_IP_ASN", Integer.class),
|
||||||
|
rs.getObject("DOMAIN_AVAILABILITY_INFORMATION.DATA_HASH", Long.class),
|
||||||
|
rs.getObject("DOMAIN_AVAILABILITY_INFORMATION.SECURITY_CONFIG_HASH", Long.class),
|
||||||
|
httpSchemaFromString(rs.getObject("DOMAIN_AVAILABILITY_INFORMATION.HTTP_SCHEMA", String.class)),
|
||||||
|
rs.getString("DOMAIN_AVAILABILITY_INFORMATION.HTTP_ETAG"),
|
||||||
|
rs.getString("DOMAIN_AVAILABILITY_INFORMATION.HTTP_LAST_MODIFIED"),
|
||||||
|
rs.getObject("DOMAIN_AVAILABILITY_INFORMATION.HTTP_STATUS", Integer.class),
|
||||||
|
rs.getString("DOMAIN_AVAILABILITY_INFORMATION.HTTP_LOCATION"),
|
||||||
|
durationFromMillis(rs.getObject("DOMAIN_AVAILABILITY_INFORMATION.HTTP_RESPONSE_TIME_MS", Integer.class)),
|
||||||
|
errorClassificationFromString(rs.getObject("DOMAIN_AVAILABILITY_INFORMATION.ERROR_CLASSIFICATION", String.class)),
|
||||||
|
rs.getString("DOMAIN_AVAILABILITY_INFORMATION.ERROR_MESSAGE"),
|
||||||
|
rs.getObject("DOMAIN_AVAILABILITY_INFORMATION.TS_LAST_PING", Instant.class),
|
||||||
|
rs.getObject("DOMAIN_AVAILABILITY_INFORMATION.TS_LAST_AVAILABLE", Instant.class),
|
||||||
|
rs.getObject("DOMAIN_AVAILABILITY_INFORMATION.TS_LAST_ERROR", Instant.class),
|
||||||
|
rs.getObject("DOMAIN_AVAILABILITY_INFORMATION.NEXT_SCHEDULED_UPDATE", Instant.class),
|
||||||
|
rs.getInt("DOMAIN_AVAILABILITY_INFORMATION.BACKOFF_CONSECUTIVE_FAILURES"),
|
||||||
|
Duration.ofSeconds(rs.getInt("DOMAIN_AVAILABILITY_INFORMATION.BACKOFF_FETCH_INTERVAL"))
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static HttpSchema httpSchemaFromString(@Nullable String schema) {
|
||||||
|
return schema == null ? null : HttpSchema.valueOf(schema);
|
||||||
|
}
|
||||||
|
private static ErrorClassification errorClassificationFromString(@Nullable String classification) {
|
||||||
|
return classification == null ? null : ErrorClassification.valueOf(classification);
|
||||||
|
}
|
||||||
|
private static Duration durationFromMillis(@Nullable Integer millis) {
|
||||||
|
return millis == null ? null : Duration.ofMillis(millis);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Instant nextUpdateTime() {
|
||||||
|
return nextScheduledUpdate;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void write(Connection connection) throws SQLException {
|
||||||
|
try (var ps = connection.prepareStatement(
|
||||||
|
"""
|
||||||
|
REPLACE INTO DOMAIN_AVAILABILITY_INFORMATION (
|
||||||
|
domain_id,
|
||||||
|
node_id,
|
||||||
|
server_available,
|
||||||
|
server_ip,
|
||||||
|
data_hash,
|
||||||
|
security_config_hash,
|
||||||
|
http_schema,
|
||||||
|
http_etag,
|
||||||
|
http_last_modified,
|
||||||
|
http_status,
|
||||||
|
http_location,
|
||||||
|
http_response_time_ms,
|
||||||
|
error_classification,
|
||||||
|
error_message,
|
||||||
|
ts_last_ping,
|
||||||
|
ts_last_available,
|
||||||
|
ts_last_error,
|
||||||
|
next_scheduled_update,
|
||||||
|
backoff_consecutive_failures,
|
||||||
|
backoff_fetch_interval,
|
||||||
|
server_ip_asn)
|
||||||
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?,?)
|
||||||
|
""")) {
|
||||||
|
|
||||||
|
ps.setInt(1, domainId());
|
||||||
|
ps.setInt(2, nodeId());
|
||||||
|
ps.setBoolean(3, serverAvailable());
|
||||||
|
if (serverIp() == null) {
|
||||||
|
ps.setNull(4, java.sql.Types.BINARY);
|
||||||
|
} else {
|
||||||
|
ps.setBytes(4, serverIp());
|
||||||
|
}
|
||||||
|
if (dataHash() == null) {
|
||||||
|
ps.setNull(5, java.sql.Types.BIGINT);
|
||||||
|
} else {
|
||||||
|
ps.setLong(5, dataHash());
|
||||||
|
}
|
||||||
|
if (securityConfigHash() == null) {
|
||||||
|
ps.setNull(6, java.sql.Types.BIGINT);
|
||||||
|
} else {
|
||||||
|
ps.setLong(6, securityConfigHash());
|
||||||
|
}
|
||||||
|
if (httpSchema() == null) {
|
||||||
|
ps.setNull(7, java.sql.Types.VARCHAR);
|
||||||
|
} else {
|
||||||
|
ps.setString(7, httpSchema().name());
|
||||||
|
}
|
||||||
|
if (httpEtag() == null) {
|
||||||
|
ps.setNull(8, java.sql.Types.VARCHAR);
|
||||||
|
} else {
|
||||||
|
ps.setString(8, httpEtag());
|
||||||
|
}
|
||||||
|
if (httpLastModified() == null) {
|
||||||
|
ps.setNull(9, java.sql.Types.VARCHAR);
|
||||||
|
} else {
|
||||||
|
ps.setString(9, httpLastModified());
|
||||||
|
}
|
||||||
|
if (httpStatus() == null) {
|
||||||
|
ps.setNull(10, java.sql.Types.INTEGER);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
ps.setInt(10, httpStatus());
|
||||||
|
}
|
||||||
|
if (httpLocation() == null) {
|
||||||
|
ps.setNull(11, java.sql.Types.VARCHAR);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
ps.setString(11, httpLocation());
|
||||||
|
}
|
||||||
|
|
||||||
|
if (httpResponseTime() == null) {
|
||||||
|
ps.setNull(12, java.sql.Types.SMALLINT);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
ps.setInt(12, Math.clamp(httpResponseTime().toMillis(), 0, 0xFFFF)); // "unsigned short" in SQL
|
||||||
|
}
|
||||||
|
|
||||||
|
if (errorClassification() == null) {
|
||||||
|
ps.setNull(13, java.sql.Types.VARCHAR);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
ps.setString(13, errorClassification().name());
|
||||||
|
}
|
||||||
|
|
||||||
|
if (errorMessage() == null) {
|
||||||
|
ps.setNull(14, java.sql.Types.VARCHAR);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
ps.setString(14, errorMessage());
|
||||||
|
}
|
||||||
|
|
||||||
|
ps.setTimestamp(15, java.sql.Timestamp.from(tsLastPing()));
|
||||||
|
|
||||||
|
if (tsLastAvailable() == null) {
|
||||||
|
ps.setNull(16, java.sql.Types.TIMESTAMP);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
ps.setTimestamp(16, java.sql.Timestamp.from(tsLastAvailable()));
|
||||||
|
}
|
||||||
|
if (tsLastError() == null) {
|
||||||
|
ps.setNull(17, java.sql.Types.TIMESTAMP);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
ps.setTimestamp(17, java.sql.Timestamp.from(tsLastError()));
|
||||||
|
}
|
||||||
|
|
||||||
|
ps.setTimestamp(18, java.sql.Timestamp.from(nextScheduledUpdate()));
|
||||||
|
ps.setInt(19, backoffConsecutiveFailures());
|
||||||
|
ps.setInt(20, (int) backoffFetchInterval().getSeconds());
|
||||||
|
|
||||||
|
if (asn() == null) {
|
||||||
|
ps.setNull(21, java.sql.Types.INTEGER);
|
||||||
|
} else {
|
||||||
|
ps.setInt(21, asn());
|
||||||
|
}
|
||||||
|
|
||||||
|
ps.executeUpdate();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static class Builder {
|
||||||
|
private int domainId;
|
||||||
|
private int nodeId;
|
||||||
|
private boolean serverAvailable;
|
||||||
|
private byte[] serverIp;
|
||||||
|
private Integer serverIpAsn;
|
||||||
|
private Long dataHash;
|
||||||
|
private Long securityConfigHash;
|
||||||
|
private HttpSchema httpSchema;
|
||||||
|
private String httpEtag;
|
||||||
|
private String httpLastModified;
|
||||||
|
private Integer httpStatus;
|
||||||
|
private String httpLocation;
|
||||||
|
private Duration httpResponseTime;
|
||||||
|
private ErrorClassification errorClassification;
|
||||||
|
private String errorMessage;
|
||||||
|
private Instant tsLastPing;
|
||||||
|
private Instant tsLastAvailable;
|
||||||
|
private Instant tsLastError;
|
||||||
|
private Instant nextScheduledUpdate;
|
||||||
|
private int backoffConsecutiveFailures;
|
||||||
|
private Duration backoffFetchInterval;
|
||||||
|
|
||||||
|
public Builder domainId(int domainId) {
|
||||||
|
this.domainId = domainId;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder nodeId(int nodeId) {
|
||||||
|
this.nodeId = nodeId;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder serverAvailable(boolean serverAvailable) {
|
||||||
|
this.serverAvailable = serverAvailable;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder serverIp(byte[] serverIp) {
|
||||||
|
this.serverIp = serverIp;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder serverIpAsn(Integer asn) {
|
||||||
|
this.serverIpAsn = asn;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder dataHash(Long dataHash) {
|
||||||
|
this.dataHash = dataHash;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder securityConfigHash(Long securityConfigHash) {
|
||||||
|
this.securityConfigHash = securityConfigHash;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder httpSchema(HttpSchema httpSchema) {
|
||||||
|
this.httpSchema = httpSchema;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder httpEtag(String httpEtag) {
|
||||||
|
this.httpEtag = httpEtag;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder httpLastModified(String httpLastModified) {
|
||||||
|
this.httpLastModified = httpLastModified;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder httpStatus(Integer httpStatus) {
|
||||||
|
this.httpStatus = httpStatus;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder httpLocation(String httpLocation) {
|
||||||
|
this.httpLocation = StringUtils.abbreviate(httpLocation, "...",255);
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder httpResponseTime(Duration httpResponseTime) {
|
||||||
|
this.httpResponseTime = httpResponseTime;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder errorClassification(ErrorClassification errorClassification) {
|
||||||
|
this.errorClassification = errorClassification;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder errorMessage(String errorMessage) {
|
||||||
|
this.errorMessage = errorMessage;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder tsLastPing(Instant tsLastPing) {
|
||||||
|
this.tsLastPing = tsLastPing;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder tsLastAvailable(Instant tsLastAvailable) {
|
||||||
|
this.tsLastAvailable = tsLastAvailable;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder tsLastError(Instant tsLastError) {
|
||||||
|
this.tsLastError = tsLastError;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder nextScheduledUpdate(Instant nextScheduledUpdate) {
|
||||||
|
this.nextScheduledUpdate = nextScheduledUpdate;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder backoffConsecutiveFailures(int backoffConsecutiveFailures) {
|
||||||
|
this.backoffConsecutiveFailures = backoffConsecutiveFailures;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder backoffFetchInterval(Duration backoffFetchInterval) {
|
||||||
|
this.backoffFetchInterval = backoffFetchInterval;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public DomainAvailabilityRecord build() {
|
||||||
|
return new DomainAvailabilityRecord(
|
||||||
|
domainId,
|
||||||
|
nodeId,
|
||||||
|
serverAvailable,
|
||||||
|
serverIp,
|
||||||
|
serverIpAsn,
|
||||||
|
dataHash,
|
||||||
|
securityConfigHash,
|
||||||
|
httpSchema,
|
||||||
|
httpEtag,
|
||||||
|
httpLastModified,
|
||||||
|
httpStatus,
|
||||||
|
httpLocation,
|
||||||
|
httpResponseTime,
|
||||||
|
errorClassification,
|
||||||
|
errorMessage,
|
||||||
|
tsLastPing,
|
||||||
|
tsLastAvailable,
|
||||||
|
tsLastError,
|
||||||
|
nextScheduledUpdate,
|
||||||
|
backoffConsecutiveFailures,
|
||||||
|
backoffFetchInterval
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Builder builder() {
|
||||||
|
return new Builder();
|
||||||
|
}
|
||||||
|
}
|
@@ -0,0 +1,369 @@
|
|||||||
|
package nu.marginalia.ping.model;
|
||||||
|
|
||||||
|
import com.google.gson.Gson;
|
||||||
|
import nu.marginalia.model.gson.GsonFactory;
|
||||||
|
|
||||||
|
import javax.annotation.Nullable;
|
||||||
|
import java.sql.Connection;
|
||||||
|
import java.sql.ResultSet;
|
||||||
|
import java.sql.SQLException;
|
||||||
|
import java.time.Instant;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
public record DomainDnsRecord(
|
||||||
|
Integer dnsRootDomainId,
|
||||||
|
String rootDomainName,
|
||||||
|
int nodeAffinity,
|
||||||
|
@Nullable List<String> aRecords,
|
||||||
|
@Nullable List<String> aaaaRecords,
|
||||||
|
@Nullable String cnameRecord,
|
||||||
|
@Nullable List<String> mxRecords,
|
||||||
|
@Nullable List<String> caaRecords,
|
||||||
|
@Nullable List<String> txtRecords,
|
||||||
|
@Nullable List<String> nsRecords,
|
||||||
|
@Nullable String soaRecord,
|
||||||
|
Instant tsLastUpdate,
|
||||||
|
Instant tsNextScheduledUpdate,
|
||||||
|
int dnsCheckPriority)
|
||||||
|
implements WritableModel
|
||||||
|
{
|
||||||
|
private static Gson gson = GsonFactory.get();
|
||||||
|
|
||||||
|
public DomainDnsRecord(ResultSet rs) throws SQLException {
|
||||||
|
this(
|
||||||
|
rs.getObject("DNS_ROOT_DOMAIN_ID", Integer.class),
|
||||||
|
rs.getString("ROOT_DOMAIN_NAME"),
|
||||||
|
rs.getInt("NODE_AFFINITY"),
|
||||||
|
deserializeJsonArray(rs.getString("DNS_A_RECORDS")),
|
||||||
|
deserializeJsonArray(rs.getString("DNS_AAAA_RECORDS")),
|
||||||
|
rs.getString("DNS_CNAME_RECORD"),
|
||||||
|
deserializeJsonArray(rs.getString("DNS_MX_RECORDS")),
|
||||||
|
deserializeJsonArray(rs.getString("DNS_CAA_RECORDS")),
|
||||||
|
deserializeJsonArray(rs.getString("DNS_TXT_RECORDS")),
|
||||||
|
deserializeJsonArray(rs.getString("DNS_NS_RECORDS")),
|
||||||
|
rs.getString("DNS_SOA_RECORD"),
|
||||||
|
rs.getObject("TS_LAST_DNS_CHECK", Instant.class),
|
||||||
|
rs.getObject("TS_NEXT_DNS_CHECK", Instant.class),
|
||||||
|
rs.getInt("DNS_CHECK_PRIORITY")
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
static List<String> deserializeJsonArray(@Nullable String json) {
|
||||||
|
if (json == null || json.isEmpty()) {
|
||||||
|
return List.of();
|
||||||
|
}
|
||||||
|
return gson.fromJson(json, List.class);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Builder builder() {
|
||||||
|
return new Builder();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Instant nextUpdateTime() {
|
||||||
|
return tsNextScheduledUpdate;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void write(Connection connection) throws SQLException {
|
||||||
|
|
||||||
|
if (dnsRootDomainId() != null) {
|
||||||
|
update(connection);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
try (var ps = connection.prepareStatement("""
|
||||||
|
REPLACE INTO DOMAIN_DNS_INFORMATION (
|
||||||
|
ROOT_DOMAIN_NAME,
|
||||||
|
NODE_AFFINITY,
|
||||||
|
DNS_A_RECORDS,
|
||||||
|
DNS_AAAA_RECORDS,
|
||||||
|
DNS_CNAME_RECORD,
|
||||||
|
DNS_MX_RECORDS,
|
||||||
|
DNS_CAA_RECORDS,
|
||||||
|
DNS_TXT_RECORDS,
|
||||||
|
DNS_NS_RECORDS,
|
||||||
|
DNS_SOA_RECORD,
|
||||||
|
TS_LAST_DNS_CHECK,
|
||||||
|
TS_NEXT_DNS_CHECK,
|
||||||
|
DNS_CHECK_PRIORITY
|
||||||
|
) VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?)
|
||||||
|
""")) {
|
||||||
|
|
||||||
|
ps.setString(1, rootDomainName());
|
||||||
|
ps.setInt(2, nodeAffinity());
|
||||||
|
|
||||||
|
if (aRecords() == null) {
|
||||||
|
ps.setNull(3, java.sql.Types.VARCHAR);
|
||||||
|
} else {
|
||||||
|
ps.setString(3, gson.toJson(aRecords()));
|
||||||
|
}
|
||||||
|
if (aaaaRecords() == null) {
|
||||||
|
ps.setNull(4, java.sql.Types.VARCHAR);
|
||||||
|
} else {
|
||||||
|
ps.setString(4, gson.toJson(aaaaRecords()));
|
||||||
|
}
|
||||||
|
if (cnameRecord() == null) {
|
||||||
|
ps.setNull(5, java.sql.Types.VARCHAR);
|
||||||
|
} else {
|
||||||
|
ps.setString(5, cnameRecord());
|
||||||
|
}
|
||||||
|
if (mxRecords() == null) {
|
||||||
|
ps.setNull(6, java.sql.Types.VARCHAR);
|
||||||
|
} else {
|
||||||
|
ps.setString(6, gson.toJson(mxRecords()));
|
||||||
|
}
|
||||||
|
if (caaRecords() == null) {
|
||||||
|
ps.setNull(7, java.sql.Types.VARCHAR);
|
||||||
|
} else {
|
||||||
|
ps.setString(7, gson.toJson(caaRecords()));
|
||||||
|
}
|
||||||
|
if (txtRecords() == null) {
|
||||||
|
ps.setNull(8, java.sql.Types.VARCHAR);
|
||||||
|
} else {
|
||||||
|
ps.setString(8, gson.toJson(txtRecords()));
|
||||||
|
}
|
||||||
|
if (nsRecords() == null) {
|
||||||
|
ps.setNull(9, java.sql.Types.VARCHAR);
|
||||||
|
} else {
|
||||||
|
ps.setString(9, gson.toJson(nsRecords()));
|
||||||
|
}
|
||||||
|
if (soaRecord() == null) {
|
||||||
|
ps.setNull(10, java.sql.Types.VARCHAR);
|
||||||
|
} else {
|
||||||
|
ps.setString(10, soaRecord());
|
||||||
|
}
|
||||||
|
ps.setString(10, soaRecord());
|
||||||
|
ps.setTimestamp(11, java.sql.Timestamp.from(tsLastUpdate()));
|
||||||
|
ps.setTimestamp(12, java.sql.Timestamp.from(tsNextScheduledUpdate()));
|
||||||
|
ps.setInt(13, dnsCheckPriority());
|
||||||
|
ps.executeUpdate();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void update(Connection connection) throws SQLException {
|
||||||
|
|
||||||
|
try (var ps = connection.prepareStatement("""
|
||||||
|
REPLACE INTO DOMAIN_DNS_INFORMATION (
|
||||||
|
DNS_ROOT_DOMAIN_ID,
|
||||||
|
ROOT_DOMAIN_NAME,
|
||||||
|
NODE_AFFINITY,
|
||||||
|
DNS_A_RECORDS,
|
||||||
|
DNS_AAAA_RECORDS,
|
||||||
|
DNS_CNAME_RECORD,
|
||||||
|
DNS_MX_RECORDS,
|
||||||
|
DNS_CAA_RECORDS,
|
||||||
|
DNS_TXT_RECORDS,
|
||||||
|
DNS_NS_RECORDS,
|
||||||
|
DNS_SOA_RECORD,
|
||||||
|
TS_LAST_DNS_CHECK,
|
||||||
|
TS_NEXT_DNS_CHECK,
|
||||||
|
DNS_CHECK_PRIORITY
|
||||||
|
) VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?)
|
||||||
|
""")) {
|
||||||
|
|
||||||
|
ps.setObject(1, dnsRootDomainId(), java.sql.Types.INTEGER);
|
||||||
|
ps.setString(2, rootDomainName());
|
||||||
|
ps.setInt(3, nodeAffinity());
|
||||||
|
|
||||||
|
if (aRecords() == null) {
|
||||||
|
ps.setNull(4, java.sql.Types.VARCHAR);
|
||||||
|
} else {
|
||||||
|
ps.setString(4, gson.toJson(aRecords()));
|
||||||
|
}
|
||||||
|
if (aaaaRecords() == null) {
|
||||||
|
ps.setNull(5, java.sql.Types.VARCHAR);
|
||||||
|
} else {
|
||||||
|
ps.setString(5, gson.toJson(aaaaRecords()));
|
||||||
|
}
|
||||||
|
if (cnameRecord() == null) {
|
||||||
|
ps.setNull(6, java.sql.Types.VARCHAR);
|
||||||
|
} else {
|
||||||
|
ps.setString(6, cnameRecord());
|
||||||
|
}
|
||||||
|
if (mxRecords() == null) {
|
||||||
|
ps.setNull(7, java.sql.Types.VARCHAR);
|
||||||
|
} else {
|
||||||
|
ps.setString(7, gson.toJson(mxRecords()));
|
||||||
|
}
|
||||||
|
if (caaRecords() == null) {
|
||||||
|
ps.setNull(8, java.sql.Types.VARCHAR);
|
||||||
|
} else {
|
||||||
|
ps.setString(8, gson.toJson(caaRecords()));
|
||||||
|
}
|
||||||
|
if (txtRecords() == null) {
|
||||||
|
ps.setNull(9, java.sql.Types.VARCHAR);
|
||||||
|
} else {
|
||||||
|
ps.setString(9, gson.toJson(txtRecords()));
|
||||||
|
}
|
||||||
|
if (nsRecords() == null) {
|
||||||
|
ps.setNull(10, java.sql.Types.VARCHAR);
|
||||||
|
} else {
|
||||||
|
ps.setString(10, gson.toJson(nsRecords()));
|
||||||
|
}
|
||||||
|
if (soaRecord() == null) {
|
||||||
|
ps.setNull(11, java.sql.Types.VARCHAR);
|
||||||
|
} else {
|
||||||
|
ps.setString(11, soaRecord());
|
||||||
|
}
|
||||||
|
ps.setTimestamp(12, java.sql.Timestamp.from(tsLastUpdate()));
|
||||||
|
ps.setTimestamp(13, java.sql.Timestamp.from(tsNextScheduledUpdate()));
|
||||||
|
ps.setInt(14, dnsCheckPriority());
|
||||||
|
ps.executeUpdate();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static class Builder {
|
||||||
|
private Integer dnsRootDomainId;
|
||||||
|
private String rootDomainName;
|
||||||
|
private int nodeAffinity;
|
||||||
|
private List<String> aRecords;
|
||||||
|
private List<String> aaaaRecords;
|
||||||
|
private String cnameRecord;
|
||||||
|
private List<String> mxRecords;
|
||||||
|
private List<String> caaRecords;
|
||||||
|
private List<String> txtRecords;
|
||||||
|
private List<String> nsRecords;
|
||||||
|
private String soaRecord;
|
||||||
|
private Instant tsLastUpdate;
|
||||||
|
private Instant tsNextScheduledUpdate;
|
||||||
|
private int dnsCheckPriority;
|
||||||
|
|
||||||
|
public Builder dnsRootDomainId(Integer dnsRootDomainId) {
|
||||||
|
this.dnsRootDomainId = dnsRootDomainId;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder rootDomainName(String rootDomainName) {
|
||||||
|
this.rootDomainName = rootDomainName;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder nodeAffinity(int nodeAffinity) {
|
||||||
|
this.nodeAffinity = nodeAffinity;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder addARecord(String aRecord) {
|
||||||
|
if (this.aRecords == null) {
|
||||||
|
this.aRecords = new ArrayList<>();
|
||||||
|
}
|
||||||
|
this.aRecords.add(aRecord);
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder aRecords(List<String> aRecords) {
|
||||||
|
this.aRecords = aRecords;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder addAaaaRecord(String aaaaRecord) {
|
||||||
|
if (this.aaaaRecords == null) {
|
||||||
|
this.aaaaRecords = new ArrayList<>();
|
||||||
|
}
|
||||||
|
this.aaaaRecords.add(aaaaRecord);
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder aaaaRecords(List<String> aaaaRecords) {
|
||||||
|
this.aaaaRecords = aaaaRecords;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder cnameRecord(String cnameRecord) {
|
||||||
|
this.cnameRecord = cnameRecord;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder addMxRecord(String mxRecord) {
|
||||||
|
if (this.mxRecords == null) {
|
||||||
|
this.mxRecords = new ArrayList<>();
|
||||||
|
}
|
||||||
|
this.mxRecords.add(mxRecord);
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder mxRecords(List<String> mxRecords) {
|
||||||
|
this.mxRecords = mxRecords;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder addCaaRecord(String caaRecord) {
|
||||||
|
if (this.caaRecords == null) {
|
||||||
|
this.caaRecords = new ArrayList<>();
|
||||||
|
}
|
||||||
|
this.caaRecords.add(caaRecord);
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder caaRecords(List<String> caaRecords) {
|
||||||
|
this.caaRecords = caaRecords;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder addTxtRecord(String txtRecord) {
|
||||||
|
if (this.txtRecords == null) {
|
||||||
|
this.txtRecords = new ArrayList<>();
|
||||||
|
}
|
||||||
|
this.txtRecords.add(txtRecord);
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder txtRecords(List<String> txtRecords) {
|
||||||
|
this.txtRecords = txtRecords;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder addNsRecord(String nsRecord) {
|
||||||
|
if (this.nsRecords == null) {
|
||||||
|
this.nsRecords = new ArrayList<>();
|
||||||
|
}
|
||||||
|
this.nsRecords.add(nsRecord);
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder nsRecords(List<String> nsRecords) {
|
||||||
|
this.nsRecords = nsRecords;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder soaRecord(String soaRecord) {
|
||||||
|
this.soaRecord = soaRecord;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder tsLastUpdate(Instant tsLastUpdate) {
|
||||||
|
this.tsLastUpdate = tsLastUpdate;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
public Builder tsNextScheduledUpdate(Instant nextScheduledUpdate) {
|
||||||
|
this.tsNextScheduledUpdate = nextScheduledUpdate;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
public Builder dnsCheckPriority(int dnsCheckPriority) {
|
||||||
|
this.dnsCheckPriority = dnsCheckPriority;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public DomainDnsRecord build() {
|
||||||
|
return new DomainDnsRecord(
|
||||||
|
dnsRootDomainId,
|
||||||
|
rootDomainName,
|
||||||
|
nodeAffinity,
|
||||||
|
aRecords,
|
||||||
|
aaaaRecords,
|
||||||
|
cnameRecord,
|
||||||
|
mxRecords,
|
||||||
|
caaRecords,
|
||||||
|
txtRecords,
|
||||||
|
nsRecords,
|
||||||
|
soaRecord,
|
||||||
|
tsLastUpdate,
|
||||||
|
tsNextScheduledUpdate,
|
||||||
|
dnsCheckPriority
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
@@ -0,0 +1,10 @@
|
|||||||
|
package nu.marginalia.ping.model;
|
||||||
|
|
||||||
|
import nu.marginalia.model.EdgeDomain;
|
||||||
|
|
||||||
|
public record DomainReference(int domainId, int nodeId, String domainName) {
|
||||||
|
public EdgeDomain asEdgeDomain() {
|
||||||
|
return new EdgeDomain(domainName);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@@ -0,0 +1,91 @@
|
|||||||
|
package nu.marginalia.ping.model;
|
||||||
|
|
||||||
|
import nu.marginalia.ping.util.JsonObject;
|
||||||
|
|
||||||
|
import java.sql.Connection;
|
||||||
|
import java.sql.SQLException;
|
||||||
|
import java.time.Duration;
|
||||||
|
import java.time.Instant;
|
||||||
|
|
||||||
|
public record DomainSecurityEvent(
|
||||||
|
int domainId,
|
||||||
|
int nodeId,
|
||||||
|
Instant tsChange,
|
||||||
|
boolean asnChanged,
|
||||||
|
boolean certificateFingerprintChanged,
|
||||||
|
boolean certificateProfileChanged,
|
||||||
|
boolean certificateSanChanged,
|
||||||
|
boolean certificatePublicKeyChanged,
|
||||||
|
boolean certificateSerialNumberChanged,
|
||||||
|
boolean certificateIssuerChanged,
|
||||||
|
SchemaChange schemaChange,
|
||||||
|
Duration oldCertificateTimeToExpiry,
|
||||||
|
boolean securityHeadersChanged,
|
||||||
|
boolean ipChanged,
|
||||||
|
boolean softwareChanged,
|
||||||
|
JsonObject<DomainSecurityRecord> securitySignatureBefore,
|
||||||
|
JsonObject<DomainSecurityRecord> securitySignatureAfter
|
||||||
|
) implements WritableModel {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void write(Connection connection) throws SQLException {
|
||||||
|
try (var ps = connection.prepareStatement("""
|
||||||
|
INSERT INTO DOMAIN_SECURITY_EVENTS (
|
||||||
|
domain_id,
|
||||||
|
node_id,
|
||||||
|
ts_change,
|
||||||
|
change_asn,
|
||||||
|
change_certificate_fingerprint,
|
||||||
|
change_certificate_profile,
|
||||||
|
change_certificate_san,
|
||||||
|
change_certificate_public_key,
|
||||||
|
change_security_headers,
|
||||||
|
change_ip_address,
|
||||||
|
change_software,
|
||||||
|
old_cert_time_to_expiry,
|
||||||
|
security_signature_before,
|
||||||
|
security_signature_after,
|
||||||
|
change_certificate_serial_number,
|
||||||
|
change_certificate_issuer,
|
||||||
|
change_schema
|
||||||
|
) VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)
|
||||||
|
"""))
|
||||||
|
{
|
||||||
|
|
||||||
|
ps.setInt(1, domainId());
|
||||||
|
ps.setInt(2, nodeId());
|
||||||
|
ps.setTimestamp(3, java.sql.Timestamp.from(tsChange()));
|
||||||
|
ps.setBoolean(4, asnChanged());
|
||||||
|
ps.setBoolean(5, certificateFingerprintChanged());
|
||||||
|
ps.setBoolean(6, certificateProfileChanged());
|
||||||
|
ps.setBoolean(7, certificateSanChanged());
|
||||||
|
ps.setBoolean(8, certificatePublicKeyChanged());
|
||||||
|
ps.setBoolean(9, securityHeadersChanged());
|
||||||
|
ps.setBoolean(10, ipChanged());
|
||||||
|
ps.setBoolean(11, softwareChanged());
|
||||||
|
|
||||||
|
if (oldCertificateTimeToExpiry() == null) {
|
||||||
|
ps.setNull(12, java.sql.Types.BIGINT);
|
||||||
|
} else {
|
||||||
|
ps.setLong(12, oldCertificateTimeToExpiry().toHours());
|
||||||
|
}
|
||||||
|
|
||||||
|
if (securitySignatureBefore() == null) {
|
||||||
|
ps.setNull(13, java.sql.Types.BLOB);
|
||||||
|
} else {
|
||||||
|
ps.setBytes(13, securitySignatureBefore().compressed());
|
||||||
|
}
|
||||||
|
if (securitySignatureAfter() == null) {
|
||||||
|
ps.setNull(14, java.sql.Types.BLOB);
|
||||||
|
} else {
|
||||||
|
ps.setBytes(14, securitySignatureAfter().compressed());
|
||||||
|
}
|
||||||
|
|
||||||
|
ps.setBoolean(15, certificateSerialNumberChanged());
|
||||||
|
ps.setBoolean(16, certificateIssuerChanged());
|
||||||
|
ps.setString(17, schemaChange.name());
|
||||||
|
|
||||||
|
ps.executeUpdate();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@@ -0,0 +1,604 @@
|
|||||||
|
package nu.marginalia.ping.model;
|
||||||
|
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
import org.jetbrains.annotations.NotNull;
|
||||||
|
|
||||||
|
import javax.annotation.Nullable;
|
||||||
|
import java.sql.Connection;
|
||||||
|
import java.sql.ResultSet;
|
||||||
|
import java.sql.SQLException;
|
||||||
|
import java.sql.Types;
|
||||||
|
import java.time.Instant;
|
||||||
|
import java.util.Objects;
|
||||||
|
|
||||||
|
public record DomainSecurityRecord(
|
||||||
|
int domainId,
|
||||||
|
int nodeId,
|
||||||
|
@Nullable Integer asn,
|
||||||
|
@Nullable HttpSchema httpSchema,
|
||||||
|
@Nullable String httpVersion,
|
||||||
|
@Nullable String httpCompression,
|
||||||
|
@Nullable String httpCacheControl,
|
||||||
|
@Nullable Instant sslCertNotBefore,
|
||||||
|
@Nullable Instant sslCertNotAfter,
|
||||||
|
@Nullable String sslCertIssuer,
|
||||||
|
@Nullable String sslCertSubject,
|
||||||
|
@Nullable byte[] sslCertPublicKeyHash,
|
||||||
|
@Nullable String sslCertSerialNumber,
|
||||||
|
@Nullable byte[] sslCertFingerprintSha256,
|
||||||
|
@Nullable String sslCertSan,
|
||||||
|
boolean sslCertWildcard,
|
||||||
|
@Nullable String sslProtocol,
|
||||||
|
@Nullable String sslCipherSuite,
|
||||||
|
@Nullable String sslKeyExchange,
|
||||||
|
@Nullable Integer sslCertificateChainLength,
|
||||||
|
boolean sslCertificateValid,
|
||||||
|
@Nullable String headerCorsAllowOrigin,
|
||||||
|
boolean headerCorsAllowCredentials,
|
||||||
|
@Nullable Integer headerContentSecurityPolicyHash,
|
||||||
|
@Nullable String headerStrictTransportSecurity,
|
||||||
|
@Nullable String headerReferrerPolicy,
|
||||||
|
@Nullable String headerXFrameOptions,
|
||||||
|
@Nullable String headerXContentTypeOptions,
|
||||||
|
@Nullable String headerXXssProtection,
|
||||||
|
@Nullable String headerServer,
|
||||||
|
@Nullable String headerXPoweredBy,
|
||||||
|
@Nullable Instant tsLastUpdate,
|
||||||
|
@Nullable Boolean sslChainValid,
|
||||||
|
@Nullable Boolean sslHostValid,
|
||||||
|
@Nullable Boolean sslDateValid
|
||||||
|
)
|
||||||
|
implements WritableModel
|
||||||
|
{
|
||||||
|
|
||||||
|
public int certificateProfileHash() {
|
||||||
|
return Objects.hash(
|
||||||
|
sslCertIssuer,
|
||||||
|
sslCertSubject,
|
||||||
|
sslCipherSuite,
|
||||||
|
sslKeyExchange
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
public int securityHeadersHash() {
|
||||||
|
return Objects.hash(
|
||||||
|
headerCorsAllowOrigin,
|
||||||
|
headerCorsAllowCredentials,
|
||||||
|
headerContentSecurityPolicyHash,
|
||||||
|
headerStrictTransportSecurity,
|
||||||
|
headerReferrerPolicy,
|
||||||
|
headerXFrameOptions,
|
||||||
|
headerXContentTypeOptions,
|
||||||
|
headerXXssProtection
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public DomainSecurityRecord(ResultSet rs) throws SQLException {
|
||||||
|
this(rs.getInt("DOMAIN_SECURITY_INFORMATION.DOMAIN_ID"),
|
||||||
|
rs.getInt("DOMAIN_SECURITY_INFORMATION.NODE_ID"),
|
||||||
|
rs.getObject("DOMAIN_SECURITY_INFORMATION.ASN", Integer.class),
|
||||||
|
httpSchemaFromString(rs.getString("DOMAIN_SECURITY_INFORMATION.HTTP_SCHEMA")),
|
||||||
|
rs.getString("DOMAIN_SECURITY_INFORMATION.HTTP_VERSION"),
|
||||||
|
rs.getString("DOMAIN_SECURITY_INFORMATION.HTTP_COMPRESSION"),
|
||||||
|
rs.getString("DOMAIN_SECURITY_INFORMATION.HTTP_CACHE_CONTROL"),
|
||||||
|
rs.getObject("DOMAIN_SECURITY_INFORMATION.SSL_CERT_NOT_BEFORE", Instant.class),
|
||||||
|
rs.getObject("DOMAIN_SECURITY_INFORMATION.SSL_CERT_NOT_AFTER", Instant.class),
|
||||||
|
rs.getString("DOMAIN_SECURITY_INFORMATION.SSL_CERT_ISSUER"),
|
||||||
|
rs.getString("DOMAIN_SECURITY_INFORMATION.SSL_CERT_SUBJECT"),
|
||||||
|
rs.getBytes("DOMAIN_SECURITY_INFORMATION.SSL_CERT_PUBLIC_KEY_HASH"),
|
||||||
|
rs.getString("DOMAIN_SECURITY_INFORMATION.SSL_CERT_SERIAL_NUMBER"),
|
||||||
|
rs.getBytes("DOMAIN_SECURITY_INFORMATION.SSL_CERT_FINGERPRINT_SHA256"),
|
||||||
|
rs.getString("DOMAIN_SECURITY_INFORMATION.SSL_CERT_SAN"),
|
||||||
|
rs.getBoolean("DOMAIN_SECURITY_INFORMATION.SSL_CERT_WILDCARD"),
|
||||||
|
rs.getString("DOMAIN_SECURITY_INFORMATION.SSL_PROTOCOL"),
|
||||||
|
rs.getString("DOMAIN_SECURITY_INFORMATION.SSL_CIPHER_SUITE"),
|
||||||
|
rs.getString("DOMAIN_SECURITY_INFORMATION.SSL_KEY_EXCHANGE"),
|
||||||
|
rs.getObject("DOMAIN_SECURITY_INFORMATION.SSL_CERTIFICATE_CHAIN_LENGTH", Integer.class),
|
||||||
|
rs.getBoolean("DOMAIN_SECURITY_INFORMATION.SSL_CERTIFICATE_VALID"),
|
||||||
|
rs.getString("DOMAIN_SECURITY_INFORMATION.HEADER_CORS_ALLOW_ORIGIN"),
|
||||||
|
rs.getBoolean("DOMAIN_SECURITY_INFORMATION.HEADER_CORS_ALLOW_CREDENTIALS"),
|
||||||
|
rs.getInt("DOMAIN_SECURITY_INFORMATION.HEADER_CONTENT_SECURITY_POLICY_HASH"),
|
||||||
|
rs.getString("DOMAIN_SECURITY_INFORMATION.HEADER_STRICT_TRANSPORT_SECURITY"),
|
||||||
|
rs.getString("DOMAIN_SECURITY_INFORMATION.HEADER_REFERRER_POLICY"),
|
||||||
|
rs.getString("DOMAIN_SECURITY_INFORMATION.HEADER_X_FRAME_OPTIONS"),
|
||||||
|
rs.getString("DOMAIN_SECURITY_INFORMATION.HEADER_X_CONTENT_TYPE_OPTIONS"),
|
||||||
|
rs.getString("DOMAIN_SECURITY_INFORMATION.HEADER_X_XSS_PROTECTION"),
|
||||||
|
rs.getString("DOMAIN_SECURITY_INFORMATION.HEADER_SERVER"),
|
||||||
|
rs.getString("DOMAIN_SECURITY_INFORMATION.HEADER_X_POWERED_BY"),
|
||||||
|
rs.getObject("DOMAIN_SECURITY_INFORMATION.TS_LAST_UPDATE", Instant.class),
|
||||||
|
rs.getObject("SSL_CHAIN_VALID", Boolean.class),
|
||||||
|
rs.getObject("SSL_HOST_VALID", Boolean.class),
|
||||||
|
rs.getObject("SSL_DATE_VALID", Boolean.class)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static HttpSchema httpSchemaFromString(@Nullable String schema) {
|
||||||
|
return schema == null ? null : HttpSchema.valueOf(schema);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static SslCertRevocationStatus sslCertRevocationStatusFromString(@Nullable String status) {
|
||||||
|
return status == null ? null : SslCertRevocationStatus.valueOf(status);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void write(Connection connection) throws SQLException {
|
||||||
|
try (var ps = connection.prepareStatement(
|
||||||
|
"""
|
||||||
|
REPLACE INTO DOMAIN_SECURITY_INFORMATION (
|
||||||
|
domain_id,
|
||||||
|
node_id,
|
||||||
|
http_schema,
|
||||||
|
http_version,
|
||||||
|
http_compression,
|
||||||
|
http_cache_control,
|
||||||
|
ssl_cert_not_before,
|
||||||
|
ssl_cert_not_after,
|
||||||
|
ssl_cert_issuer,
|
||||||
|
ssl_cert_subject,
|
||||||
|
ssl_cert_serial_number,
|
||||||
|
ssl_cert_fingerprint_sha256,
|
||||||
|
ssl_cert_san,
|
||||||
|
ssl_cert_wildcard,
|
||||||
|
ssl_protocol,
|
||||||
|
ssl_cipher_suite,
|
||||||
|
ssl_key_exchange,
|
||||||
|
ssl_certificate_chain_length,
|
||||||
|
ssl_certificate_valid,
|
||||||
|
header_cors_allow_origin,
|
||||||
|
header_cors_allow_credentials,
|
||||||
|
header_content_security_policy_hash,
|
||||||
|
header_strict_transport_security,
|
||||||
|
header_referrer_policy,
|
||||||
|
header_x_frame_options,
|
||||||
|
header_x_content_type_options,
|
||||||
|
header_x_xss_protection,
|
||||||
|
header_server,
|
||||||
|
header_x_powered_by,
|
||||||
|
ssl_cert_public_key_hash,
|
||||||
|
asn,
|
||||||
|
ts_last_update,
|
||||||
|
ssl_chain_valid,
|
||||||
|
ssl_host_valid,
|
||||||
|
ssl_date_valid)
|
||||||
|
VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)
|
||||||
|
"""))
|
||||||
|
{
|
||||||
|
ps.setInt(1, domainId());
|
||||||
|
ps.setInt(2, nodeId());
|
||||||
|
if (httpSchema() == null) {
|
||||||
|
ps.setNull(3, java.sql.Types.VARCHAR);
|
||||||
|
} else {
|
||||||
|
ps.setString(3, httpSchema().name());
|
||||||
|
}
|
||||||
|
if (httpVersion() == null) {
|
||||||
|
ps.setNull(4, java.sql.Types.VARCHAR);
|
||||||
|
} else {
|
||||||
|
ps.setString(4, httpVersion());
|
||||||
|
}
|
||||||
|
if (httpCompression() == null) {
|
||||||
|
ps.setNull(5, java.sql.Types.VARCHAR);
|
||||||
|
} else {
|
||||||
|
ps.setString(5, httpCompression());
|
||||||
|
}
|
||||||
|
if (httpCacheControl() == null) {
|
||||||
|
ps.setNull(6, java.sql.Types.VARCHAR);
|
||||||
|
} else {
|
||||||
|
ps.setString(6, httpCacheControl());
|
||||||
|
}
|
||||||
|
if (sslCertNotBefore() == null) {
|
||||||
|
ps.setNull(7, java.sql.Types.TIMESTAMP);
|
||||||
|
} else {
|
||||||
|
ps.setTimestamp(7, java.sql.Timestamp.from(sslCertNotBefore()));
|
||||||
|
}
|
||||||
|
if (sslCertNotAfter() == null) {
|
||||||
|
ps.setNull(8, java.sql.Types.TIMESTAMP);
|
||||||
|
} else {
|
||||||
|
ps.setTimestamp(8, java.sql.Timestamp.from(sslCertNotAfter()));
|
||||||
|
}
|
||||||
|
if (sslCertIssuer() == null) {
|
||||||
|
ps.setNull(9, java.sql.Types.VARCHAR);
|
||||||
|
} else {
|
||||||
|
ps.setString(9, sslCertIssuer());
|
||||||
|
}
|
||||||
|
if (sslCertSubject() == null) {
|
||||||
|
ps.setNull(10, java.sql.Types.VARCHAR);
|
||||||
|
} else {
|
||||||
|
ps.setString(10, sslCertSubject());
|
||||||
|
}
|
||||||
|
if (sslCertSerialNumber() == null) {
|
||||||
|
ps.setNull(11, java.sql.Types.VARCHAR);
|
||||||
|
} else {
|
||||||
|
ps.setString(11, sslCertSerialNumber());
|
||||||
|
}
|
||||||
|
if (sslCertFingerprintSha256() == null) {
|
||||||
|
ps.setNull(12, java.sql.Types.BINARY);
|
||||||
|
} else {
|
||||||
|
ps.setBytes(12, sslCertFingerprintSha256());
|
||||||
|
}
|
||||||
|
if (sslCertSan() == null) {
|
||||||
|
ps.setNull(13, java.sql.Types.VARCHAR);
|
||||||
|
} else {
|
||||||
|
ps.setString(13, sslCertSan());
|
||||||
|
}
|
||||||
|
ps.setBoolean(14, sslCertWildcard());
|
||||||
|
if (sslProtocol() == null) {
|
||||||
|
ps.setNull(15, java.sql.Types.VARCHAR);
|
||||||
|
} else {
|
||||||
|
ps.setString(15, sslProtocol());
|
||||||
|
}
|
||||||
|
if (sslCipherSuite() == null) {
|
||||||
|
ps.setNull(16, java.sql.Types.VARCHAR);
|
||||||
|
} else {
|
||||||
|
ps.setString(16, sslCipherSuite());
|
||||||
|
}
|
||||||
|
if (sslKeyExchange() == null) {
|
||||||
|
ps.setNull(17, java.sql.Types.VARCHAR);
|
||||||
|
} else {
|
||||||
|
ps.setString(17, sslKeyExchange());
|
||||||
|
}
|
||||||
|
if (sslCertificateChainLength() == null) {
|
||||||
|
ps.setNull(18, java.sql.Types.INTEGER);
|
||||||
|
} else {
|
||||||
|
ps.setInt(18, sslCertificateChainLength());
|
||||||
|
}
|
||||||
|
ps.setBoolean(19, sslCertificateValid());
|
||||||
|
if (headerCorsAllowOrigin() == null) {
|
||||||
|
ps.setNull(20, java.sql.Types.VARCHAR);
|
||||||
|
} else {
|
||||||
|
ps.setString(20, headerCorsAllowOrigin());
|
||||||
|
}
|
||||||
|
ps.setBoolean(21, headerCorsAllowCredentials());
|
||||||
|
if (headerContentSecurityPolicyHash() == null) {
|
||||||
|
ps.setNull(22, Types.INTEGER);
|
||||||
|
} else {
|
||||||
|
ps.setInt(22, headerContentSecurityPolicyHash());
|
||||||
|
}
|
||||||
|
if (headerStrictTransportSecurity() == null) {
|
||||||
|
ps.setNull(23, java.sql.Types.VARCHAR);
|
||||||
|
} else {
|
||||||
|
ps.setString(23, headerStrictTransportSecurity());
|
||||||
|
}
|
||||||
|
if (headerReferrerPolicy() == null) {
|
||||||
|
ps.setNull(24, java.sql.Types.VARCHAR);
|
||||||
|
} else {
|
||||||
|
ps.setString(24, headerReferrerPolicy());
|
||||||
|
}
|
||||||
|
if (headerXFrameOptions() == null) {
|
||||||
|
ps.setNull(25, java.sql.Types.VARCHAR);
|
||||||
|
} else {
|
||||||
|
ps.setString(25, headerXFrameOptions());
|
||||||
|
}
|
||||||
|
if (headerXContentTypeOptions() == null) {
|
||||||
|
ps.setNull(26, java.sql.Types.VARCHAR);
|
||||||
|
} else {
|
||||||
|
ps.setString(26, headerXContentTypeOptions());
|
||||||
|
}
|
||||||
|
if (headerXXssProtection() == null) {
|
||||||
|
ps.setNull(27, java.sql.Types.VARCHAR);
|
||||||
|
} else {
|
||||||
|
ps.setString(27, headerXXssProtection());
|
||||||
|
}
|
||||||
|
if (headerServer() == null) {
|
||||||
|
ps.setNull(28, java.sql.Types.VARCHAR);
|
||||||
|
} else {
|
||||||
|
ps.setString(28, headerServer());
|
||||||
|
}
|
||||||
|
if (headerXPoweredBy() == null) {
|
||||||
|
ps.setNull(29, java.sql.Types.VARCHAR);
|
||||||
|
} else {
|
||||||
|
ps.setString(29, headerXPoweredBy());
|
||||||
|
}
|
||||||
|
if (sslCertPublicKeyHash() == null) {
|
||||||
|
ps.setNull(30, java.sql.Types.BINARY);
|
||||||
|
} else {
|
||||||
|
ps.setBytes(30, sslCertPublicKeyHash());
|
||||||
|
}
|
||||||
|
if (asn() == null) {
|
||||||
|
ps.setNull(31, java.sql.Types.INTEGER);
|
||||||
|
} else {
|
||||||
|
ps.setInt(31, asn());
|
||||||
|
}
|
||||||
|
|
||||||
|
if (tsLastUpdate() == null) {
|
||||||
|
ps.setNull(32, java.sql.Types.TIMESTAMP);
|
||||||
|
} else {
|
||||||
|
ps.setTimestamp(32, java.sql.Timestamp.from(tsLastUpdate()));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (sslChainValid() == null) {
|
||||||
|
ps.setNull(33, java.sql.Types.BOOLEAN);
|
||||||
|
} else {
|
||||||
|
ps.setBoolean(33, sslChainValid());
|
||||||
|
}
|
||||||
|
|
||||||
|
if (sslHostValid() == null) {
|
||||||
|
ps.setNull(34, java.sql.Types.BOOLEAN);
|
||||||
|
} else {
|
||||||
|
ps.setBoolean(34, sslHostValid());
|
||||||
|
}
|
||||||
|
|
||||||
|
if (sslDateValid() == null) {
|
||||||
|
ps.setNull(35, java.sql.Types.BOOLEAN);
|
||||||
|
} else {
|
||||||
|
ps.setBoolean(35, sslDateValid());
|
||||||
|
}
|
||||||
|
|
||||||
|
ps.executeUpdate();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static class Builder {
|
||||||
|
private int domainId;
|
||||||
|
private int nodeId;
|
||||||
|
private Integer asn;
|
||||||
|
private HttpSchema httpSchema;
|
||||||
|
private String httpVersion;
|
||||||
|
private String httpCompression;
|
||||||
|
private String httpCacheControl;
|
||||||
|
private Instant sslCertNotBefore;
|
||||||
|
private Instant sslCertNotAfter;
|
||||||
|
private String sslCertIssuer;
|
||||||
|
private String sslCertSubject;
|
||||||
|
private String sslCertSerialNumber;
|
||||||
|
private byte[] sslCertPublicKeyHash;
|
||||||
|
private byte[] sslCertFingerprintSha256;
|
||||||
|
private String sslCertSan;
|
||||||
|
private boolean sslCertWildcard;
|
||||||
|
private String sslProtocol;
|
||||||
|
private String sslCipherSuite;
|
||||||
|
private String sslKeyExchange;
|
||||||
|
private Integer sslCertificateChainLength;
|
||||||
|
private boolean sslCertificateValid;
|
||||||
|
private String headerCorsAllowOrigin;
|
||||||
|
private boolean headerCorsAllowCredentials;
|
||||||
|
private Integer headerContentSecurityPolicyHash;
|
||||||
|
private String headerStrictTransportSecurity;
|
||||||
|
private String headerReferrerPolicy;
|
||||||
|
private String headerXFrameOptions;
|
||||||
|
private String headerXContentTypeOptions;
|
||||||
|
private String headerXXssProtection;
|
||||||
|
private String headerServer;
|
||||||
|
private String headerXPoweredBy;
|
||||||
|
private Instant tsLastUpdate;
|
||||||
|
|
||||||
|
private Boolean isCertChainValid;
|
||||||
|
private Boolean isCertHostValid;
|
||||||
|
private Boolean isCertDateValid;
|
||||||
|
|
||||||
|
|
||||||
|
private static Instant MAX_UNIX_TIMESTAMP = Instant.ofEpochSecond(Integer.MAX_VALUE);
|
||||||
|
|
||||||
|
public Builder() {
|
||||||
|
// Default values for boolean fields
|
||||||
|
this.sslCertWildcard = false;
|
||||||
|
this.sslCertificateValid = false;
|
||||||
|
this.headerCorsAllowCredentials = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder domainId(int domainId) {
|
||||||
|
this.domainId = domainId;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder nodeId(int nodeId) {
|
||||||
|
this.nodeId = nodeId;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder asn(@Nullable Integer asn) {
|
||||||
|
this.asn = asn;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder httpSchema(HttpSchema httpSchema) {
|
||||||
|
this.httpSchema = httpSchema;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder httpVersion(String httpVersion) {
|
||||||
|
this.httpVersion = StringUtils.truncate(httpVersion, 10);
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder httpCompression(String httpCompression) {
|
||||||
|
this.httpCompression = StringUtils.truncate(httpCompression, 50);
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder httpCacheControl(String httpCacheControl) {
|
||||||
|
this.httpCacheControl = httpCacheControl;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder sslCertNotBefore(@NotNull Instant sslCertNotBefore) {
|
||||||
|
if (sslCertNotBefore.isAfter(MAX_UNIX_TIMESTAMP)) {
|
||||||
|
sslCertNotBefore = MAX_UNIX_TIMESTAMP;
|
||||||
|
}
|
||||||
|
this.sslCertNotBefore = sslCertNotBefore;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder sslCertNotAfter(@NotNull Instant sslCertNotAfter) {
|
||||||
|
if (sslCertNotAfter.isAfter(MAX_UNIX_TIMESTAMP)) {
|
||||||
|
sslCertNotAfter = MAX_UNIX_TIMESTAMP;
|
||||||
|
}
|
||||||
|
this.sslCertNotAfter = sslCertNotAfter;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder sslCertIssuer(String sslCertIssuer) {
|
||||||
|
this.sslCertIssuer = StringUtils.truncate(sslCertIssuer, 255);
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder sslCertSubject(String sslCertSubject) {
|
||||||
|
this.sslCertSubject = StringUtils.truncate(sslCertSubject, 255);
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder sslCertSerialNumber(String sslCertSerialNumber) {
|
||||||
|
this.sslCertSerialNumber = sslCertSerialNumber;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder sslCertPublicKeyHash(byte[] sslCertPublicKeyHash) {
|
||||||
|
this.sslCertPublicKeyHash = sslCertPublicKeyHash;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder sslCertFingerprintSha256(byte[] sslCertFingerprintSha256) {
|
||||||
|
this.sslCertFingerprintSha256 = sslCertFingerprintSha256;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder sslCertSan(String sslCertSan) {
|
||||||
|
this.sslCertSan = sslCertSan;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder sslCertWildcard(boolean sslCertWildcard) {
|
||||||
|
this.sslCertWildcard = sslCertWildcard;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder sslProtocol(String sslProtocol) {
|
||||||
|
this.sslProtocol = sslProtocol;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder sslCipherSuite(String sslCipherSuite) {
|
||||||
|
this.sslCipherSuite = sslCipherSuite;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder sslKeyExchange(String sslKeyExchange) {
|
||||||
|
this.sslKeyExchange = sslKeyExchange;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder sslCertificateChainLength(Integer sslCertificateChainLength) {
|
||||||
|
this.sslCertificateChainLength = sslCertificateChainLength;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder sslCertificateValid(boolean sslCertificateValid) {
|
||||||
|
this.sslCertificateValid = sslCertificateValid;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder headerCorsAllowOrigin(String headerCorsAllowOrigin) {
|
||||||
|
this.headerCorsAllowOrigin = headerCorsAllowOrigin;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder headerCorsAllowCredentials(boolean headerCorsAllowCredentials) {
|
||||||
|
this.headerCorsAllowCredentials = headerCorsAllowCredentials;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder headerContentSecurityPolicyHash(Integer headerContentSecurityPolicyHash) {
|
||||||
|
this.headerContentSecurityPolicyHash = headerContentSecurityPolicyHash;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder headerStrictTransportSecurity(String headerStrictTransportSecurity) {
|
||||||
|
this.headerStrictTransportSecurity = StringUtils.truncate(headerStrictTransportSecurity, 255);
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder headerReferrerPolicy(String headerReferrerPolicy) {
|
||||||
|
this.headerReferrerPolicy = StringUtils.truncate(headerReferrerPolicy, 50);
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder headerXFrameOptions(String headerXFrameOptions) {
|
||||||
|
this.headerXFrameOptions = StringUtils.truncate(headerXFrameOptions, 50);
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder headerXContentTypeOptions(String headerXContentTypeOptions) {
|
||||||
|
this.headerXContentTypeOptions = StringUtils.truncate(headerXContentTypeOptions, 50);
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder headerXXssProtection(String headerXXssProtection) {
|
||||||
|
this.headerXXssProtection = StringUtils.truncate(headerXXssProtection, 50);
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder headerServer(String headerServer) {
|
||||||
|
this.headerServer = StringUtils.truncate(headerServer, 255);
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder headerXPoweredBy(String headerXPoweredBy) {
|
||||||
|
this.headerXPoweredBy = StringUtils.truncate(headerXPoweredBy, 255);
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder tsLastUpdate(Instant tsLastUpdate) {
|
||||||
|
this.tsLastUpdate = tsLastUpdate;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder sslChainValid(@Nullable Boolean isCertChainValid) {
|
||||||
|
this.isCertChainValid = isCertChainValid;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder sslHostValid(@Nullable Boolean isCertHostValid) {
|
||||||
|
this.isCertHostValid = isCertHostValid;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder sslDateValid(@Nullable Boolean isCertDateValid) {
|
||||||
|
this.isCertDateValid = isCertDateValid;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public DomainSecurityRecord build() {
|
||||||
|
return new DomainSecurityRecord(
|
||||||
|
domainId,
|
||||||
|
nodeId,
|
||||||
|
asn,
|
||||||
|
httpSchema,
|
||||||
|
httpVersion,
|
||||||
|
httpCompression,
|
||||||
|
httpCacheControl,
|
||||||
|
sslCertNotBefore,
|
||||||
|
sslCertNotAfter,
|
||||||
|
sslCertIssuer,
|
||||||
|
sslCertSubject,
|
||||||
|
sslCertPublicKeyHash,
|
||||||
|
sslCertSerialNumber,
|
||||||
|
sslCertFingerprintSha256,
|
||||||
|
sslCertSan,
|
||||||
|
sslCertWildcard,
|
||||||
|
sslProtocol,
|
||||||
|
sslCipherSuite,
|
||||||
|
sslKeyExchange,
|
||||||
|
sslCertificateChainLength,
|
||||||
|
sslCertificateValid,
|
||||||
|
headerCorsAllowOrigin,
|
||||||
|
headerCorsAllowCredentials,
|
||||||
|
headerContentSecurityPolicyHash,
|
||||||
|
headerStrictTransportSecurity,
|
||||||
|
headerReferrerPolicy,
|
||||||
|
headerXFrameOptions,
|
||||||
|
headerXContentTypeOptions,
|
||||||
|
headerXXssProtection,
|
||||||
|
headerServer,
|
||||||
|
headerXPoweredBy,
|
||||||
|
tsLastUpdate,
|
||||||
|
isCertChainValid,
|
||||||
|
isCertHostValid,
|
||||||
|
isCertDateValid
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Builder builder() {
|
||||||
|
return new Builder();
|
||||||
|
}
|
||||||
|
}
|
@@ -0,0 +1,12 @@
|
|||||||
|
package nu.marginalia.ping.model;
|
||||||
|
|
||||||
|
public enum ErrorClassification {
|
||||||
|
NONE,
|
||||||
|
TIMEOUT,
|
||||||
|
SSL_ERROR,
|
||||||
|
DNS_ERROR,
|
||||||
|
CONNECTION_ERROR,
|
||||||
|
HTTP_CLIENT_ERROR,
|
||||||
|
HTTP_SERVER_ERROR,
|
||||||
|
UNKNOWN
|
||||||
|
}
|
@@ -0,0 +1,13 @@
|
|||||||
|
package nu.marginalia.ping.model;
|
||||||
|
|
||||||
|
public sealed interface HistoricalAvailabilityData {
|
||||||
|
public String domain();
|
||||||
|
record JustDomainReference(DomainReference domainReference) implements HistoricalAvailabilityData {
|
||||||
|
@Override
|
||||||
|
public String domain() {
|
||||||
|
return domainReference.domainName();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
record JustAvailability(String domain, DomainAvailabilityRecord record) implements HistoricalAvailabilityData {}
|
||||||
|
record AvailabilityAndSecurity(String domain, DomainAvailabilityRecord availabilityRecord, DomainSecurityRecord securityRecord) implements HistoricalAvailabilityData {}
|
||||||
|
}
|
@@ -0,0 +1,6 @@
|
|||||||
|
package nu.marginalia.ping.model;
|
||||||
|
|
||||||
|
public enum HttpSchema {
|
||||||
|
HTTP,
|
||||||
|
HTTPS;
|
||||||
|
}
|
@@ -0,0 +1,6 @@
|
|||||||
|
package nu.marginalia.ping.model;
|
||||||
|
|
||||||
|
public sealed interface RootDomainReference {
|
||||||
|
record ByIdAndName(long id, String name) implements RootDomainReference { }
|
||||||
|
record ByName(String name) implements RootDomainReference { }
|
||||||
|
}
|
@@ -0,0 +1,12 @@
|
|||||||
|
package nu.marginalia.ping.model;
|
||||||
|
|
||||||
|
public enum SchemaChange {
|
||||||
|
UNKNOWN,
|
||||||
|
NONE,
|
||||||
|
HTTP_TO_HTTPS,
|
||||||
|
HTTPS_TO_HTTP;
|
||||||
|
|
||||||
|
public boolean isSignificant() {
|
||||||
|
return this != NONE && this != UNKNOWN;
|
||||||
|
}
|
||||||
|
}
|
@@ -0,0 +1,8 @@
|
|||||||
|
package nu.marginalia.ping.model;
|
||||||
|
|
||||||
|
public record SingleDnsRecord(
|
||||||
|
String recordType,
|
||||||
|
String data
|
||||||
|
) {
|
||||||
|
|
||||||
|
}
|
@@ -0,0 +1,8 @@
|
|||||||
|
package nu.marginalia.ping.model;
|
||||||
|
|
||||||
|
public enum SslCertRevocationStatus {
|
||||||
|
NOT_CHECKED,
|
||||||
|
VALID,
|
||||||
|
REVOKED,
|
||||||
|
UNKNOWN
|
||||||
|
}
|
@@ -0,0 +1,14 @@
|
|||||||
|
package nu.marginalia.ping.model;
|
||||||
|
|
||||||
|
import javax.annotation.Nullable;
|
||||||
|
import java.sql.Connection;
|
||||||
|
import java.sql.SQLException;
|
||||||
|
import java.time.Instant;
|
||||||
|
|
||||||
|
public interface WritableModel {
|
||||||
|
void write(Connection connection) throws SQLException;
|
||||||
|
@Nullable
|
||||||
|
default Instant nextUpdateTime() {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
@@ -0,0 +1,71 @@
|
|||||||
|
package nu.marginalia.ping.model.comparison;
|
||||||
|
|
||||||
|
import nu.marginalia.ping.model.DomainDnsRecord;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Objects;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
public sealed interface DnsRecordChange {
|
||||||
|
record None() implements DnsRecordChange { }
|
||||||
|
record Changed(
|
||||||
|
boolean aRecordsChanged,
|
||||||
|
boolean aaaaRecordsChanged,
|
||||||
|
boolean cnameRecordChanged,
|
||||||
|
boolean mxRecordsChanged,
|
||||||
|
boolean caaRecordsChanged,
|
||||||
|
boolean txtRecordsChanged,
|
||||||
|
boolean nsRecordsChanged,
|
||||||
|
boolean soaRecordChanged
|
||||||
|
) implements DnsRecordChange {}
|
||||||
|
|
||||||
|
static DnsRecordChange between(DomainDnsRecord before, DomainDnsRecord after) {
|
||||||
|
|
||||||
|
boolean aaaaRecordsChanged = !compareRecords(before.aaaaRecords(), after.aaaaRecords());
|
||||||
|
boolean aRecordsChanged = !compareRecords(before.aRecords(), after.aRecords());
|
||||||
|
boolean cnameRecordChanged = !Objects.equals(before.cnameRecord(), after.cnameRecord());
|
||||||
|
boolean mxRecordsChanged = !compareRecords(before.mxRecords(), after.mxRecords());
|
||||||
|
boolean caaRecordsChanged = !compareRecords(before.caaRecords(), after.caaRecords());
|
||||||
|
boolean txtRecordsChanged = !compareRecords(before.txtRecords(), after.txtRecords());
|
||||||
|
boolean nsRecordsChanged = !compareRecords(before.nsRecords(), after.nsRecords());
|
||||||
|
boolean soaRecordChanged = !Objects.equals(before.soaRecord(), after.soaRecord());
|
||||||
|
|
||||||
|
boolean anyChanged = aaaaRecordsChanged ||
|
||||||
|
aRecordsChanged ||
|
||||||
|
cnameRecordChanged ||
|
||||||
|
mxRecordsChanged ||
|
||||||
|
caaRecordsChanged ||
|
||||||
|
txtRecordsChanged ||
|
||||||
|
nsRecordsChanged ||
|
||||||
|
soaRecordChanged;
|
||||||
|
if (!anyChanged) {
|
||||||
|
return new DnsRecordChange.None();
|
||||||
|
} else {
|
||||||
|
return new DnsRecordChange.Changed(
|
||||||
|
aRecordsChanged,
|
||||||
|
aaaaRecordsChanged,
|
||||||
|
cnameRecordChanged,
|
||||||
|
mxRecordsChanged,
|
||||||
|
caaRecordsChanged,
|
||||||
|
txtRecordsChanged,
|
||||||
|
nsRecordsChanged,
|
||||||
|
soaRecordChanged
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
static boolean compareRecords(List<String> beforeRecords, List<String> afterRecords) {
|
||||||
|
if (null == beforeRecords && null == afterRecords) {
|
||||||
|
return true; // Both are null, no change
|
||||||
|
}
|
||||||
|
|
||||||
|
// empty and null are semantically equivalent
|
||||||
|
if (null == beforeRecords)
|
||||||
|
return afterRecords.isEmpty();
|
||||||
|
if (null == afterRecords)
|
||||||
|
return beforeRecords.isEmpty();
|
||||||
|
|
||||||
|
return Set.copyOf(beforeRecords).equals(Set.copyOf(afterRecords)); // Compare the sets for equality
|
||||||
|
}
|
||||||
|
}
|
@@ -0,0 +1,45 @@
|
|||||||
|
package nu.marginalia.ping.model.comparison;
|
||||||
|
|
||||||
|
import nu.marginalia.ping.model.AvailabilityOutageType;
|
||||||
|
import nu.marginalia.ping.model.DomainAvailabilityRecord;
|
||||||
|
|
||||||
|
import java.util.Objects;
|
||||||
|
|
||||||
|
public sealed interface DomainAvailabilityChange {
|
||||||
|
record None() implements DomainAvailabilityChange { }
|
||||||
|
record UnavailableToAvailable() implements DomainAvailabilityChange { }
|
||||||
|
record AvailableToUnavailable(AvailabilityOutageType outageType) implements DomainAvailabilityChange { }
|
||||||
|
record OutageTypeChange(AvailabilityOutageType newOutageType) implements DomainAvailabilityChange { }
|
||||||
|
|
||||||
|
static DomainAvailabilityChange between(
|
||||||
|
DomainAvailabilityRecord oldStatus,
|
||||||
|
DomainAvailabilityRecord newStatus
|
||||||
|
) {
|
||||||
|
if (oldStatus.serverAvailable() && newStatus.serverAvailable()) {
|
||||||
|
return new DomainAvailabilityChange.None();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (oldStatus.serverAvailable()) {
|
||||||
|
return new DomainAvailabilityChange.AvailableToUnavailable(
|
||||||
|
AvailabilityOutageType.fromErrorClassification(newStatus.errorClassification())
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (newStatus.serverAvailable()) {
|
||||||
|
return new DomainAvailabilityChange.UnavailableToAvailable();
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
var classOld = oldStatus.errorClassification();
|
||||||
|
var classNew = newStatus.errorClassification();
|
||||||
|
|
||||||
|
if (!Objects.equals(classOld, classNew)) {
|
||||||
|
return new DomainAvailabilityChange.OutageTypeChange(
|
||||||
|
AvailabilityOutageType.fromErrorClassification(newStatus.errorClassification())
|
||||||
|
);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
return new DomainAvailabilityChange.None();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@@ -0,0 +1,176 @@
|
|||||||
|
package nu.marginalia.ping.model.comparison;
|
||||||
|
|
||||||
|
import nu.marginalia.ping.model.DomainDnsRecord;
|
||||||
|
import nu.marginalia.ping.model.WritableModel;
|
||||||
|
import nu.marginalia.ping.util.JsonObject;
|
||||||
|
|
||||||
|
import java.sql.Connection;
|
||||||
|
import java.sql.SQLException;
|
||||||
|
import java.sql.Types;
|
||||||
|
import java.time.Instant;
|
||||||
|
|
||||||
|
public record DomainDnsEvent(
|
||||||
|
int rootDomainId,
|
||||||
|
int nodeId,
|
||||||
|
|
||||||
|
Instant tsChange,
|
||||||
|
boolean changeARecords,
|
||||||
|
boolean changeAaaaRecords,
|
||||||
|
boolean changeCname,
|
||||||
|
boolean changeMxRecords,
|
||||||
|
boolean changeCaaRecords,
|
||||||
|
boolean changeTxtRecords,
|
||||||
|
boolean changeNsRecords,
|
||||||
|
boolean changeSoaRecord,
|
||||||
|
|
||||||
|
JsonObject<DomainDnsRecord> dnsSignatureBefore,
|
||||||
|
JsonObject<DomainDnsRecord> dnsSignatureAfter
|
||||||
|
) implements WritableModel {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void write(Connection connection) throws SQLException {
|
||||||
|
try (var ps = connection.prepareStatement("""
|
||||||
|
INSERT INTO DOMAIN_DNS_EVENTS (
|
||||||
|
DNS_ROOT_DOMAIN_ID,
|
||||||
|
NODE_ID,
|
||||||
|
TS_CHANGE,
|
||||||
|
CHANGE_A_RECORDS,
|
||||||
|
CHANGE_AAAA_RECORDS,
|
||||||
|
CHANGE_CNAME,
|
||||||
|
CHANGE_MX_RECORDS,
|
||||||
|
CHANGE_CAA_RECORDS,
|
||||||
|
CHANGE_TXT_RECORDS,
|
||||||
|
CHANGE_NS_RECORDS,
|
||||||
|
CHANGE_SOA_RECORD,
|
||||||
|
DNS_SIGNATURE_BEFORE,
|
||||||
|
DNS_SIGNATURE_AFTER
|
||||||
|
) VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?)
|
||||||
|
""")) {
|
||||||
|
ps.setInt(1, rootDomainId());
|
||||||
|
ps.setInt(2, nodeId());
|
||||||
|
ps.setTimestamp(3, java.sql.Timestamp.from(tsChange()));
|
||||||
|
ps.setBoolean(4, changeARecords());
|
||||||
|
ps.setBoolean(5, changeAaaaRecords());
|
||||||
|
ps.setBoolean(6, changeCname());
|
||||||
|
ps.setBoolean(7, changeMxRecords());
|
||||||
|
ps.setBoolean(8, changeCaaRecords());
|
||||||
|
ps.setBoolean(9, changeTxtRecords());
|
||||||
|
ps.setBoolean(10, changeNsRecords());
|
||||||
|
ps.setBoolean(11, changeSoaRecord());
|
||||||
|
if (dnsSignatureBefore() == null) {
|
||||||
|
ps.setNull(12, Types.BLOB);
|
||||||
|
} else {
|
||||||
|
ps.setBytes(12, dnsSignatureBefore().compressed());
|
||||||
|
}
|
||||||
|
if (dnsSignatureAfter() == null) {
|
||||||
|
ps.setNull(13, Types.BLOB);
|
||||||
|
} else {
|
||||||
|
ps.setBytes(13, dnsSignatureAfter().compressed());
|
||||||
|
}
|
||||||
|
ps.executeUpdate();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Builder builder() {
|
||||||
|
return new Builder();
|
||||||
|
}
|
||||||
|
|
||||||
|
public static class Builder {
|
||||||
|
private int rootDomainId;
|
||||||
|
private int nodeId;
|
||||||
|
private Instant tsChange;
|
||||||
|
private boolean changeARecords;
|
||||||
|
private boolean changeAaaaRecords;
|
||||||
|
private boolean changeCname;
|
||||||
|
private boolean changeMxRecords;
|
||||||
|
private boolean changeCaaRecords;
|
||||||
|
private boolean changeTxtRecords;
|
||||||
|
private boolean changeNsRecords;
|
||||||
|
private boolean changeSoaRecord;
|
||||||
|
private JsonObject<DomainDnsRecord> dnsSignatureBefore;
|
||||||
|
private JsonObject<DomainDnsRecord> dnsSignatureAfter;
|
||||||
|
|
||||||
|
public Builder rootDomainId(int rootDomainId) {
|
||||||
|
this.rootDomainId = rootDomainId;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder nodeId(int nodeId) {
|
||||||
|
this.nodeId = nodeId;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder tsChange(Instant tsChange) {
|
||||||
|
this.tsChange = tsChange;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder changeARecords(boolean changeARecords) {
|
||||||
|
this.changeARecords = changeARecords;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder changeAaaaRecords(boolean changeAaaaRecords) {
|
||||||
|
this.changeAaaaRecords = changeAaaaRecords;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder changeCname(boolean changeCname) {
|
||||||
|
this.changeCname = changeCname;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder changeMxRecords(boolean changeMxRecords) {
|
||||||
|
this.changeMxRecords = changeMxRecords;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder changeCaaRecords(boolean changeCaaRecords) {
|
||||||
|
this.changeCaaRecords = changeCaaRecords;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder changeTxtRecords(boolean changeTxtRecords) {
|
||||||
|
this.changeTxtRecords = changeTxtRecords;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder changeNsRecords(boolean changeNsRecords) {
|
||||||
|
this.changeNsRecords = changeNsRecords;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder changeSoaRecord(boolean changeSoaRecord) {
|
||||||
|
this.changeSoaRecord = changeSoaRecord;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder dnsSignatureBefore(JsonObject<DomainDnsRecord> dnsSignatureBefore) {
|
||||||
|
this.dnsSignatureBefore = dnsSignatureBefore;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder dnsSignatureAfter(JsonObject<DomainDnsRecord> dnsSignatureAfter) {
|
||||||
|
this.dnsSignatureAfter = dnsSignatureAfter;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public DomainDnsEvent build() {
|
||||||
|
return new DomainDnsEvent(
|
||||||
|
rootDomainId,
|
||||||
|
nodeId,
|
||||||
|
tsChange,
|
||||||
|
changeARecords,
|
||||||
|
changeAaaaRecords,
|
||||||
|
changeCname,
|
||||||
|
changeMxRecords,
|
||||||
|
changeCaaRecords,
|
||||||
|
changeTxtRecords,
|
||||||
|
changeNsRecords,
|
||||||
|
changeSoaRecord,
|
||||||
|
dnsSignatureBefore,
|
||||||
|
dnsSignatureAfter
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@@ -0,0 +1,110 @@
|
|||||||
|
package nu.marginalia.ping.model.comparison;
|
||||||
|
|
||||||
|
import nu.marginalia.ping.model.DomainAvailabilityRecord;
|
||||||
|
import nu.marginalia.ping.model.DomainSecurityRecord;
|
||||||
|
import nu.marginalia.ping.model.HttpSchema;
|
||||||
|
import nu.marginalia.ping.model.SchemaChange;
|
||||||
|
import org.jetbrains.annotations.NotNull;
|
||||||
|
|
||||||
|
import java.time.Duration;
|
||||||
|
import java.time.Instant;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.Objects;
|
||||||
|
|
||||||
|
public record SecurityInformationChange(
|
||||||
|
boolean isChanged,
|
||||||
|
boolean isAsnChanged,
|
||||||
|
boolean isCertificateFingerprintChanged,
|
||||||
|
boolean isCertificateProfileChanged,
|
||||||
|
boolean isCertificateSanChanged,
|
||||||
|
boolean isCertificatePublicKeyChanged,
|
||||||
|
boolean isCertificateSerialNumberChanged,
|
||||||
|
boolean isCertificateIssuerChanged,
|
||||||
|
Duration oldCertificateTimeToExpiry,
|
||||||
|
boolean isSecurityHeadersChanged,
|
||||||
|
boolean isIpAddressChanged,
|
||||||
|
boolean isSoftwareHeaderChanged,
|
||||||
|
SchemaChange schemaChange
|
||||||
|
) {
|
||||||
|
public static SecurityInformationChange between(
|
||||||
|
DomainSecurityRecord before, DomainAvailabilityRecord availabilityBefore,
|
||||||
|
DomainSecurityRecord after, DomainAvailabilityRecord availabilityAfter
|
||||||
|
) {
|
||||||
|
boolean asnChanged = !Objects.equals(before.asn(), after.asn());
|
||||||
|
|
||||||
|
boolean ipChanged = 0 != Arrays.compare(availabilityBefore.serverIp(), availabilityAfter.serverIp());
|
||||||
|
|
||||||
|
boolean certificateFingerprintChanged = 0 != Arrays.compare(before.sslCertFingerprintSha256(), after.sslCertFingerprintSha256());
|
||||||
|
boolean certificateProfileChanged = before.certificateProfileHash() != after.certificateProfileHash();
|
||||||
|
boolean certificateSerialNumberChanged = !Objects.equals(before.sslCertSerialNumber(), after.sslCertSerialNumber());
|
||||||
|
boolean certificatePublicKeyChanged = 0 != Arrays.compare(before.sslCertPublicKeyHash(), after.sslCertPublicKeyHash());
|
||||||
|
boolean certificateSanChanged = !Objects.equals(before.sslCertSan(), after.sslCertSan());
|
||||||
|
boolean certificateIssuerChanged = !Objects.equals(before.sslCertIssuer(), after.sslCertIssuer());
|
||||||
|
|
||||||
|
Duration oldCertificateTimeToExpiry = before.sslCertNotAfter() == null ? null : Duration.between(
|
||||||
|
Instant.now(),
|
||||||
|
before.sslCertNotAfter()
|
||||||
|
);
|
||||||
|
|
||||||
|
boolean securityHeadersChanged = before.securityHeadersHash() != after.securityHeadersHash();
|
||||||
|
boolean softwareChanged = !Objects.equals(before.headerServer(), after.headerServer());
|
||||||
|
|
||||||
|
SchemaChange schemaChange = getSchemaChange(before, after);
|
||||||
|
|
||||||
|
// Note we don't include IP address changes in the overall change status,
|
||||||
|
// as this is not alone considered a change in security information; we may have
|
||||||
|
// multiple IP addresses for a domain, and the IP address may change frequently
|
||||||
|
// within the same ASN or certificate profile.
|
||||||
|
|
||||||
|
boolean isChanged = asnChanged
|
||||||
|
|| certificateFingerprintChanged
|
||||||
|
|| securityHeadersChanged
|
||||||
|
|| certificateProfileChanged
|
||||||
|
|| softwareChanged
|
||||||
|
|| schemaChange.isSignificant();
|
||||||
|
|
||||||
|
return new SecurityInformationChange(
|
||||||
|
isChanged,
|
||||||
|
asnChanged,
|
||||||
|
certificateFingerprintChanged,
|
||||||
|
certificateProfileChanged,
|
||||||
|
certificateSanChanged,
|
||||||
|
certificatePublicKeyChanged,
|
||||||
|
certificateSerialNumberChanged,
|
||||||
|
certificateIssuerChanged,
|
||||||
|
oldCertificateTimeToExpiry,
|
||||||
|
securityHeadersChanged,
|
||||||
|
ipChanged,
|
||||||
|
softwareChanged,
|
||||||
|
schemaChange
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static @NotNull SchemaChange getSchemaChange(DomainSecurityRecord before, DomainSecurityRecord after) {
|
||||||
|
if (before.httpSchema() == null || after.httpSchema() == null) {
|
||||||
|
return SchemaChange.UNKNOWN;
|
||||||
|
}
|
||||||
|
|
||||||
|
boolean beforeIsHttp = before.httpSchema() == HttpSchema.HTTP;
|
||||||
|
boolean afterIsHttp = after.httpSchema() == HttpSchema.HTTP;
|
||||||
|
boolean beforeIsHttps = before.httpSchema() == HttpSchema.HTTPS;
|
||||||
|
boolean afterIsHttps = after.httpSchema() == HttpSchema.HTTPS;
|
||||||
|
|
||||||
|
SchemaChange schemaChange;
|
||||||
|
|
||||||
|
if (beforeIsHttp && afterIsHttp) {
|
||||||
|
schemaChange = SchemaChange.NONE;
|
||||||
|
} else if (beforeIsHttps && afterIsHttps) {
|
||||||
|
schemaChange = SchemaChange.NONE;
|
||||||
|
} else if (beforeIsHttp && afterIsHttps) {
|
||||||
|
schemaChange = SchemaChange.HTTP_TO_HTTPS;
|
||||||
|
} else if (beforeIsHttps && afterIsHttp) {
|
||||||
|
schemaChange = SchemaChange.HTTPS_TO_HTTP;
|
||||||
|
} else {
|
||||||
|
schemaChange = SchemaChange.UNKNOWN;
|
||||||
|
}
|
||||||
|
return schemaChange;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
@@ -0,0 +1,59 @@
|
|||||||
|
package nu.marginalia.ping.ssl;
|
||||||
|
|
||||||
|
import org.bouncycastle.asn1.ASN1OctetString;
|
||||||
|
import org.bouncycastle.asn1.ASN1Primitive;
|
||||||
|
import org.bouncycastle.asn1.x509.*;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import java.security.cert.X509Certificate;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
public class AIAExtractor {
|
||||||
|
|
||||||
|
private static final Logger logger = LoggerFactory.getLogger(AIAExtractor.class);
|
||||||
|
|
||||||
|
public static List<String> getCaIssuerUrls(X509Certificate certificate) {
|
||||||
|
List<String> caIssuerUrls = new ArrayList<>();
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Get the AIA extension value
|
||||||
|
byte[] aiaExtensionValue = certificate.getExtensionValue(Extension.authorityInfoAccess.getId());
|
||||||
|
if (aiaExtensionValue == null) {
|
||||||
|
logger.warn("No AIA extension found");
|
||||||
|
return caIssuerUrls;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parse the extension - first unwrap the OCTET STRING
|
||||||
|
ASN1OctetString octetString = ASN1OctetString.getInstance(aiaExtensionValue);
|
||||||
|
ASN1Primitive aiaObj = ASN1Primitive.fromByteArray(octetString.getOctets());
|
||||||
|
|
||||||
|
// Parse as AuthorityInformationAccess
|
||||||
|
AuthorityInformationAccess aia = AuthorityInformationAccess.getInstance(aiaObj);
|
||||||
|
|
||||||
|
if (aia != null) {
|
||||||
|
AccessDescription[] accessDescriptions = aia.getAccessDescriptions();
|
||||||
|
|
||||||
|
for (AccessDescription accessDesc : accessDescriptions) {
|
||||||
|
// Check if this is a CA Issuers access method
|
||||||
|
if (X509ObjectIdentifiers.id_ad_caIssuers.equals(accessDesc.getAccessMethod())) {
|
||||||
|
GeneralName accessLocation = accessDesc.getAccessLocation();
|
||||||
|
|
||||||
|
// Check if it's a URI
|
||||||
|
if (accessLocation.getTagNo() == GeneralName.uniformResourceIdentifier) {
|
||||||
|
String url = accessLocation.getName().toString();
|
||||||
|
caIssuerUrls.add(url);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} catch (Exception e) {
|
||||||
|
logger.error("Error parsing AIA extension: {}", e.getMessage());
|
||||||
|
}
|
||||||
|
|
||||||
|
return caIssuerUrls;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@@ -0,0 +1,273 @@
|
|||||||
|
package nu.marginalia.ping.ssl;
|
||||||
|
|
||||||
|
import com.google.common.cache.Cache;
|
||||||
|
import com.google.common.cache.CacheBuilder;
|
||||||
|
import nu.marginalia.WmsaHome;
|
||||||
|
import org.apache.hc.client5.http.classic.HttpClient;
|
||||||
|
import org.apache.hc.client5.http.impl.classic.HttpClientBuilder;
|
||||||
|
import org.apache.hc.core5.http.ClassicHttpRequest;
|
||||||
|
import org.apache.hc.core5.http.io.support.ClassicRequestBuilder;
|
||||||
|
import org.bouncycastle.asn1.ASN1OctetString;
|
||||||
|
import org.bouncycastle.asn1.ASN1Primitive;
|
||||||
|
import org.bouncycastle.asn1.x509.*;
|
||||||
|
import org.bouncycastle.cert.X509CertificateHolder;
|
||||||
|
import org.bouncycastle.cert.jcajce.JcaX509CertificateConverter;
|
||||||
|
import org.bouncycastle.cms.CMSSignedData;
|
||||||
|
import org.bouncycastle.openssl.PEMParser;
|
||||||
|
import org.bouncycastle.util.Store;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import java.io.ByteArrayInputStream;
|
||||||
|
import java.io.StringReader;
|
||||||
|
import java.nio.charset.StandardCharsets;
|
||||||
|
import java.security.cert.CertificateFactory;
|
||||||
|
import java.security.cert.TrustAnchor;
|
||||||
|
import java.security.cert.X509Certificate;
|
||||||
|
import java.time.Duration;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
public class CertificateFetcher {
|
||||||
|
|
||||||
|
private static final Logger logger = LoggerFactory.getLogger(CertificateFetcher.class);
|
||||||
|
|
||||||
|
private static HttpClient client = HttpClientBuilder.create()
|
||||||
|
.build();
|
||||||
|
|
||||||
|
private static Cache<String, X509Certificate> cache = CacheBuilder
|
||||||
|
.newBuilder()
|
||||||
|
.expireAfterAccess(Duration.ofHours(6))
|
||||||
|
.maximumSize(10_000)
|
||||||
|
.build();
|
||||||
|
|
||||||
|
|
||||||
|
public static List<X509Certificate> fetchMissingIntermediates(X509Certificate leafCert) {
|
||||||
|
List<X509Certificate> intermediates = new ArrayList<>();
|
||||||
|
|
||||||
|
// Get CA Issuer URLs from AIA extension
|
||||||
|
List<String> caIssuerUrls = AIAExtractor.getCaIssuerUrls(leafCert);
|
||||||
|
|
||||||
|
for (String url : caIssuerUrls) {
|
||||||
|
try {
|
||||||
|
// Check cache first
|
||||||
|
X509Certificate cached = cache.getIfPresent(url);
|
||||||
|
if (cached != null) {
|
||||||
|
intermediates.add(cached);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Download certificate
|
||||||
|
X509Certificate downloaded = downloadCertificate(url);
|
||||||
|
if (downloaded != null) {
|
||||||
|
// Verify this certificate can actually sign the leaf
|
||||||
|
if (canSign(downloaded, leafCert)) {
|
||||||
|
intermediates.add(downloaded);
|
||||||
|
cache.put(url, downloaded);
|
||||||
|
logger.info("Downloaded certificate for url: {}", url);
|
||||||
|
} else {
|
||||||
|
logger.warn("Downloaded certificate cannot sign leaf cert from: {}", url);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} catch (Exception e) {
|
||||||
|
logger.error("Failed to fetch certificate from {}: {}", url, e.getMessage());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return intermediates;
|
||||||
|
}
|
||||||
|
private static X509Certificate downloadCertificate(String urlString) {
|
||||||
|
try {
|
||||||
|
ClassicHttpRequest request = ClassicRequestBuilder.create("GET")
|
||||||
|
.addHeader("User-Agent", WmsaHome.getUserAgent() + " (Certificate Fetcher)")
|
||||||
|
.setUri(urlString)
|
||||||
|
.build();
|
||||||
|
|
||||||
|
byte[] data = client.execute(request, rsp -> {
|
||||||
|
var entity = rsp.getEntity();
|
||||||
|
if (entity == null) {
|
||||||
|
logger.warn("GET request returned no content for {}", urlString);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
return entity.getContent().readAllBytes();
|
||||||
|
});
|
||||||
|
|
||||||
|
if (data.length == 0) {
|
||||||
|
logger.warn("Empty response from {}", urlString);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try different formats based on file extension
|
||||||
|
if (urlString.toLowerCase().endsWith(".p7c") || urlString.toLowerCase().endsWith(".p7b")) {
|
||||||
|
return parsePKCS7(data);
|
||||||
|
} else {
|
||||||
|
return parseX509(data);
|
||||||
|
}
|
||||||
|
|
||||||
|
} catch (Exception e) {
|
||||||
|
logger.warn("Failed to fetch certificate from {}: {}", urlString, e.getMessage());
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static List<X509Certificate> parseMultiplePEM(byte[] data) throws Exception {
|
||||||
|
List<X509Certificate> certificates = new ArrayList<>();
|
||||||
|
|
||||||
|
try (StringReader stringReader = new StringReader(new String(data, StandardCharsets.UTF_8));
|
||||||
|
PEMParser pemParser = new PEMParser(stringReader)) {
|
||||||
|
|
||||||
|
JcaX509CertificateConverter converter = new JcaX509CertificateConverter();
|
||||||
|
Object object;
|
||||||
|
|
||||||
|
while ((object = pemParser.readObject()) != null) {
|
||||||
|
if (object instanceof X509CertificateHolder) {
|
||||||
|
X509CertificateHolder certHolder = (X509CertificateHolder) object;
|
||||||
|
certificates.add(converter.getCertificate(certHolder));
|
||||||
|
} else if (object instanceof X509Certificate) {
|
||||||
|
certificates.add((X509Certificate) object);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return certificates;
|
||||||
|
}
|
||||||
|
private static X509Certificate parseX509(byte[] data) throws Exception {
|
||||||
|
CertificateFactory cf = CertificateFactory.getInstance("X.509");
|
||||||
|
return (X509Certificate) cf.generateCertificate(new ByteArrayInputStream(data));
|
||||||
|
}
|
||||||
|
|
||||||
|
private static X509Certificate parsePKCS7(byte[] data) throws Exception {
|
||||||
|
try {
|
||||||
|
// Parse PKCS#7/CMS structure
|
||||||
|
CMSSignedData cmsData = new CMSSignedData(data);
|
||||||
|
Store<X509CertificateHolder> certStore = cmsData.getCertificates();
|
||||||
|
|
||||||
|
JcaX509CertificateConverter converter = new JcaX509CertificateConverter();
|
||||||
|
|
||||||
|
// Get the first certificate from the store
|
||||||
|
for (X509CertificateHolder certHolder : certStore.getMatches(null)) {
|
||||||
|
X509Certificate cert = converter.getCertificate(certHolder);
|
||||||
|
return cert;
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.warn("No certificates found in PKCS#7 structure");
|
||||||
|
return null;
|
||||||
|
|
||||||
|
} catch (Exception e) {
|
||||||
|
logger.error("Failed to parse PKCS#7 structure from {}: {}", data.length, e.getMessage());
|
||||||
|
return parseX509(data);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static boolean canSign(X509Certificate issuerCert, X509Certificate subjectCert) {
|
||||||
|
try {
|
||||||
|
// Check if the issuer DN matches
|
||||||
|
if (!issuerCert.getSubjectDN().equals(subjectCert.getIssuerDN())) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try to verify the signature
|
||||||
|
subjectCert.verify(issuerCert.getPublicKey());
|
||||||
|
return true;
|
||||||
|
|
||||||
|
} catch (Exception e) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Recursive fetching for complete chains
|
||||||
|
public static List<X509Certificate> buildCompleteChain(X509Certificate leafCert) {
|
||||||
|
List<X509Certificate> completeChain = new ArrayList<>();
|
||||||
|
completeChain.add(leafCert);
|
||||||
|
|
||||||
|
X509Certificate currentCert = leafCert;
|
||||||
|
int maxDepth = 10; // Prevent infinite loops
|
||||||
|
|
||||||
|
while (maxDepth-- > 0) {
|
||||||
|
// If current cert is self-signed (root), we're done
|
||||||
|
if (currentCert.getSubjectDN().equals(currentCert.getIssuerDN())) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try to find the issuer
|
||||||
|
List<X509Certificate> intermediates = fetchMissingIntermediates(currentCert);
|
||||||
|
if (intermediates.isEmpty()) {
|
||||||
|
logger.error("Could not find issuer for: {}", currentCert.getSubjectDN());
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add the first valid intermediate
|
||||||
|
X509Certificate intermediate = intermediates.get(0);
|
||||||
|
completeChain.add(intermediate);
|
||||||
|
currentCert = intermediate;
|
||||||
|
}
|
||||||
|
|
||||||
|
return completeChain;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add this to your AIAExtractor class if not already present
|
||||||
|
public static List<String> getOCSPUrls(X509Certificate certificate) {
|
||||||
|
List<String> ocspUrls = new ArrayList<>();
|
||||||
|
|
||||||
|
try {
|
||||||
|
byte[] aiaExtensionValue = certificate.getExtensionValue(Extension.authorityInfoAccess.getId());
|
||||||
|
if (aiaExtensionValue == null) {
|
||||||
|
return ocspUrls;
|
||||||
|
}
|
||||||
|
|
||||||
|
ASN1OctetString octetString = ASN1OctetString.getInstance(aiaExtensionValue);
|
||||||
|
ASN1Primitive aiaObj = ASN1Primitive.fromByteArray(octetString.getOctets());
|
||||||
|
AuthorityInformationAccess aia = AuthorityInformationAccess.getInstance(aiaObj);
|
||||||
|
|
||||||
|
if (aia != null) {
|
||||||
|
AccessDescription[] accessDescriptions = aia.getAccessDescriptions();
|
||||||
|
|
||||||
|
for (AccessDescription accessDesc : accessDescriptions) {
|
||||||
|
if (X509ObjectIdentifiers.id_ad_ocsp.equals(accessDesc.getAccessMethod())) {
|
||||||
|
GeneralName accessLocation = accessDesc.getAccessLocation();
|
||||||
|
if (accessLocation.getTagNo() == GeneralName.uniformResourceIdentifier) {
|
||||||
|
String url = accessLocation.getName().toString();
|
||||||
|
ocspUrls.add(url);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} catch (Exception e) {
|
||||||
|
logger.error("Error parsing AIA extension for OCSP: {}", e.getMessage());
|
||||||
|
}
|
||||||
|
|
||||||
|
return ocspUrls;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Set<TrustAnchor> getRootCerts(String bundleUrl) throws Exception {
|
||||||
|
ClassicHttpRequest request = ClassicRequestBuilder.create("GET")
|
||||||
|
.addHeader("User-Agent", WmsaHome.getUserAgent() + " (Certificate Fetcher)")
|
||||||
|
.setUri(bundleUrl)
|
||||||
|
.build();
|
||||||
|
|
||||||
|
byte[] data = client.execute(request, rsp -> {
|
||||||
|
var entity = rsp.getEntity();
|
||||||
|
if (entity == null) {
|
||||||
|
logger.warn("GET request returned no content for {}", bundleUrl);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
return entity.getContent().readAllBytes();
|
||||||
|
});
|
||||||
|
|
||||||
|
List<TrustAnchor> anchors = new ArrayList<>();
|
||||||
|
for (var cert : parseMultiplePEM(data)) {
|
||||||
|
try {
|
||||||
|
anchors.add(new TrustAnchor(cert, null));
|
||||||
|
} catch (Exception e) {
|
||||||
|
logger.warn("Failed to create TrustAnchor for certificate: {}", e.getMessage());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.info("Loaded {} root certificates from {}", anchors.size(), bundleUrl);
|
||||||
|
|
||||||
|
return Set.copyOf(anchors);
|
||||||
|
}
|
||||||
|
}
|
@@ -0,0 +1,493 @@
|
|||||||
|
package nu.marginalia.ping.ssl;
|
||||||
|
|
||||||
|
import org.bouncycastle.asn1.ASN1OctetString;
|
||||||
|
import org.bouncycastle.asn1.ASN1Primitive;
|
||||||
|
import org.bouncycastle.asn1.x509.*;
|
||||||
|
|
||||||
|
import javax.security.auth.x500.X500Principal;
|
||||||
|
import java.security.cert.TrustAnchor;
|
||||||
|
import java.security.cert.X509Certificate;
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
|
/** Utility class for validating X.509 certificates.
|
||||||
|
* This class provides methods to validate certificate chains, check expiration,
|
||||||
|
* hostname validity, and revocation status.
|
||||||
|
* <p></p>
|
||||||
|
* This is extremely unsuitable for actual SSL/TLS validation,
|
||||||
|
* and is only to be used in analyzing certificates for fingerprinting
|
||||||
|
* and diagnosing servers!
|
||||||
|
*/
|
||||||
|
public class CertificateValidator {
|
||||||
|
// If true, will attempt to fetch missing intermediate certificates via AIA urls.
|
||||||
|
private static final boolean TRY_FETCH_MISSING_CERTS = false;
|
||||||
|
|
||||||
|
public static class ValidationResult {
|
||||||
|
public boolean chainValid = false;
|
||||||
|
public boolean certificateExpired = false;
|
||||||
|
public boolean certificateRevoked = false;
|
||||||
|
public boolean selfSigned = false;
|
||||||
|
public boolean hostnameValid = false;
|
||||||
|
|
||||||
|
public boolean isValid() {
|
||||||
|
return !selfSigned && !certificateExpired && !certificateRevoked && hostnameValid;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<String> errors = new ArrayList<>();
|
||||||
|
public List<String> warnings = new ArrayList<>();
|
||||||
|
public Map<String, Object> details = new HashMap<>();
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
StringBuilder sb = new StringBuilder();
|
||||||
|
sb.append("=== Certificate Validation Result ===\n");
|
||||||
|
sb.append("Chain Valid: ").append(chainValid ? "✓" : "✗").append("\n");
|
||||||
|
sb.append("Not Expired: ").append(!certificateExpired ? "✓" : "✗").append("\n");
|
||||||
|
sb.append("Not Revoked: ").append(!certificateRevoked ? "✓" : "✗").append("\n");
|
||||||
|
sb.append("Hostname Valid: ").append(hostnameValid ? "✓" : "✗").append("\n");
|
||||||
|
sb.append("Self-Signed: ").append(selfSigned ? "✓" : "✗").append("\n");
|
||||||
|
|
||||||
|
if (!errors.isEmpty()) {
|
||||||
|
sb.append("\nErrors:\n");
|
||||||
|
for (String error : errors) {
|
||||||
|
sb.append(" ✗ ").append(error).append("\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!warnings.isEmpty()) {
|
||||||
|
sb.append("\nWarnings:\n");
|
||||||
|
for (String warning : warnings) {
|
||||||
|
sb.append(" ⚠ ").append(warning).append("\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!details.isEmpty()) {
|
||||||
|
sb.append("\nDetails:\n");
|
||||||
|
for (Map.Entry<String, Object> entry : details.entrySet()) {
|
||||||
|
sb.append(" ").append(entry.getKey()).append(": ").append(entry.getValue()).append("\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return sb.toString();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static ValidationResult validateCertificate(X509Certificate[] certChain,
|
||||||
|
String hostname) {
|
||||||
|
return validateCertificate(certChain, hostname, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static ValidationResult validateCertificate(X509Certificate[] certChain,
|
||||||
|
String hostname,
|
||||||
|
boolean autoTrustFetchedRoots) {
|
||||||
|
ValidationResult result = new ValidationResult();
|
||||||
|
|
||||||
|
if (certChain == null || certChain.length == 0) {
|
||||||
|
result.errors.add("No certificates provided");
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
X509Certificate leafCert = certChain[0];
|
||||||
|
|
||||||
|
// 1. Check certificate expiration
|
||||||
|
result.certificateExpired = checkExpiration(leafCert, result);
|
||||||
|
|
||||||
|
// 2. Check hostname validity
|
||||||
|
result.hostnameValid = checkHostname(leafCert, hostname, result);
|
||||||
|
|
||||||
|
// 3. Not really checking if it's self-signed, but if the chain is incomplete (and likely self-signed)
|
||||||
|
result.selfSigned = certChain.length <= 1;
|
||||||
|
|
||||||
|
// 4. Check certificate chain validity (optionally with AIA fetching)
|
||||||
|
result.chainValid = checkChainValidity(certChain, RootCerts.getTrustAnchors(), result, autoTrustFetchedRoots);
|
||||||
|
|
||||||
|
// 5. Check revocation status
|
||||||
|
result.certificateRevoked = false; // not implemented
|
||||||
|
// checkRevocation(leafCert, result);
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static boolean checkExpiration(X509Certificate cert, ValidationResult result) {
|
||||||
|
try {
|
||||||
|
cert.checkValidity();
|
||||||
|
result.details.put("validFrom", cert.getNotBefore());
|
||||||
|
result.details.put("validTo", cert.getNotAfter());
|
||||||
|
|
||||||
|
// Warn if expires soon (30 days)
|
||||||
|
long daysUntilExpiry = (cert.getNotAfter().getTime() - System.currentTimeMillis()) / (1000 * 60 * 60 * 24);
|
||||||
|
if (daysUntilExpiry < 30) {
|
||||||
|
result.warnings.add("Certificate expires in " + daysUntilExpiry + " days");
|
||||||
|
}
|
||||||
|
|
||||||
|
return false; // Not expired
|
||||||
|
} catch (Exception e) {
|
||||||
|
result.errors.add("Certificate expired or not yet valid: " + e.getMessage());
|
||||||
|
return true; // Expired
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static boolean checkHostname(X509Certificate cert, String hostname, ValidationResult result) {
|
||||||
|
if (hostname == null || hostname.isEmpty()) {
|
||||||
|
result.warnings.add("No hostname provided for validation");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Check Subject CN
|
||||||
|
String subjectCN = getCommonName(cert.getSubjectX500Principal());
|
||||||
|
if (subjectCN != null && matchesHostname(subjectCN, hostname)) {
|
||||||
|
result.details.put("hostnameMatchedBy", "Subject CN: " + subjectCN);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check Subject Alternative Names
|
||||||
|
Collection<List<?>> subjectAltNames = cert.getSubjectAlternativeNames();
|
||||||
|
if (subjectAltNames != null) {
|
||||||
|
for (List<?> altName : subjectAltNames) {
|
||||||
|
if (altName.size() >= 2) {
|
||||||
|
Integer type = (Integer) altName.get(0);
|
||||||
|
if (type == 2) { // DNS name
|
||||||
|
String dnsName = (String) altName.get(1);
|
||||||
|
if (matchesHostname(dnsName, hostname)) {
|
||||||
|
result.details.put("hostnameMatchedBy", "SAN DNS: " + dnsName);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
result.errors.add("Hostname '" + hostname + "' does not match certificate");
|
||||||
|
result.details.put("subjectCN", subjectCN);
|
||||||
|
result.details.put("subjectAltNames", subjectAltNames);
|
||||||
|
return false;
|
||||||
|
|
||||||
|
} catch (Exception e) {
|
||||||
|
result.errors.add("Error checking hostname: " + e.getMessage());
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static boolean checkChainValidity(X509Certificate[] originalChain,
|
||||||
|
Set<TrustAnchor> trustAnchors,
|
||||||
|
ValidationResult result,
|
||||||
|
boolean autoTrustFetchedRoots) {
|
||||||
|
try {
|
||||||
|
// First try with the original chain
|
||||||
|
ChainValidationResult originalResult = validateChain(originalChain, trustAnchors);
|
||||||
|
|
||||||
|
if (originalResult.isValid) {
|
||||||
|
result.details.put("chainLength", originalChain.length);
|
||||||
|
result.details.put("chainExtended", false);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
else if (!TRY_FETCH_MISSING_CERTS) {
|
||||||
|
result.errors.addAll(originalResult.issues);
|
||||||
|
result.details.put("chainLength", originalChain.length);
|
||||||
|
result.details.put("chainExtended", false);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
List<X509Certificate> repairedChain = CertificateFetcher.buildCompleteChain(originalChain[0]);
|
||||||
|
|
||||||
|
if (!repairedChain.isEmpty()) {
|
||||||
|
|
||||||
|
X509Certificate[] extendedArray = repairedChain.toArray(new X509Certificate[0]);
|
||||||
|
|
||||||
|
// Create a copy of trust anchors for potential modification
|
||||||
|
Set<TrustAnchor> workingTrustAnchors = new HashSet<>(trustAnchors);
|
||||||
|
|
||||||
|
// If auto-trust is enabled, add any self-signed certs as trusted roots
|
||||||
|
if (autoTrustFetchedRoots) {
|
||||||
|
for (X509Certificate cert : extendedArray) {
|
||||||
|
if (cert.getSubjectX500Principal().equals(cert.getIssuerX500Principal())) {
|
||||||
|
// Self-signed certificate - add to trust anchors if not already there
|
||||||
|
boolean alreadyTrusted = false;
|
||||||
|
for (TrustAnchor anchor : workingTrustAnchors) {
|
||||||
|
if (anchor.getTrustedCert().equals(cert)) {
|
||||||
|
alreadyTrusted = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!alreadyTrusted) {
|
||||||
|
workingTrustAnchors.add(new TrustAnchor(cert, null));
|
||||||
|
result.warnings.add("Auto-trusted fetched root: " + cert.getSubjectX500Principal().getName());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ChainValidationResult extendedResult = validateChain(extendedArray, workingTrustAnchors);
|
||||||
|
|
||||||
|
result.details.put("chainLength", extendedArray.length);
|
||||||
|
result.details.put("originalChainLength", originalChain.length);
|
||||||
|
result.details.put("chainExtended", true);
|
||||||
|
result.details.put("fetchedIntermediates", extendedArray.length);
|
||||||
|
result.details.put("autoTrustedRoots", autoTrustFetchedRoots);
|
||||||
|
|
||||||
|
if (extendedResult.isValid) {
|
||||||
|
result.warnings.add("Extended certificate chain with " + extendedArray.length + " fetched intermediates");
|
||||||
|
return true;
|
||||||
|
} else {
|
||||||
|
result.errors.addAll(extendedResult.issues);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
result.warnings.add("Could not fetch missing intermediate certificates");
|
||||||
|
result.details.put("chainLength", originalChain.length);
|
||||||
|
result.details.put("chainExtended", false);
|
||||||
|
result.errors.addAll(originalResult.issues);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
} catch (Exception e) {
|
||||||
|
result.warnings.add("Failed to fetch intermediates: " + e.getMessage());
|
||||||
|
result.details.put("chainLength", originalChain.length);
|
||||||
|
result.details.put("chainExtended", false);
|
||||||
|
result.errors.addAll(originalResult.issues);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
} catch (Exception e) {
|
||||||
|
result.errors.add("Error validating chain: " + e.getMessage());
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private static void debugCertificateChain(List<X509Certificate> certs, Set<TrustAnchor> trustAnchors) {
|
||||||
|
System.out.println("=== Certificate Chain Analysis ===");
|
||||||
|
|
||||||
|
int length = certs.size();
|
||||||
|
System.out.println("Chain length: " + length);
|
||||||
|
|
||||||
|
int i = 0;
|
||||||
|
for (var x509cert : certs) {
|
||||||
|
System.out.println("\nCertificate " + i++ + ":");
|
||||||
|
System.out.println(" Subject: " + x509cert.getSubjectDN().getName());
|
||||||
|
System.out.println(" Issuer: " + x509cert.getIssuerDN().getName());
|
||||||
|
System.out.println(" Serial: " + x509cert.getSerialNumber().toString(16));
|
||||||
|
System.out.println(" Valid: " + x509cert.getNotBefore() + " to " + x509cert.getNotAfter());
|
||||||
|
System.out.println(" Self-signed: " + x509cert.getSubjectDN().equals(x509cert.getIssuerDN()));
|
||||||
|
|
||||||
|
// Check if we have the issuer in our trust anchors
|
||||||
|
boolean issuerFound = false;
|
||||||
|
for (TrustAnchor anchor : trustAnchors) {
|
||||||
|
if (anchor.getTrustedCert().getSubjectDN().equals(x509cert.getIssuerDN())) {
|
||||||
|
issuerFound = true;
|
||||||
|
System.out.println(" Issuer found in trust anchors: " + anchor.getTrustedCert().getSubjectDN().getName());
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!issuerFound && i == length) {
|
||||||
|
System.out.println(" *** MISSING ISSUER: " + x509cert.getIssuerDN().getName());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static class ChainValidationResult {
|
||||||
|
boolean isValid = false;
|
||||||
|
List<String> issues = new ArrayList<>();
|
||||||
|
}
|
||||||
|
|
||||||
|
private static ChainValidationResult validateChain(X509Certificate[] certChain, Set<TrustAnchor> trustAnchors) {
|
||||||
|
ChainValidationResult result = new ChainValidationResult();
|
||||||
|
|
||||||
|
// Check each certificate in the chain
|
||||||
|
for (int i = 0; i < certChain.length; i++) {
|
||||||
|
X509Certificate cert = certChain[i];
|
||||||
|
|
||||||
|
// Check certificate validity dates
|
||||||
|
try {
|
||||||
|
cert.checkValidity();
|
||||||
|
} catch (Exception e) {
|
||||||
|
result.issues.add("Certificate " + i + " expired: " + cert.getSubjectDN());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check signature (except for self-signed root)
|
||||||
|
if (i < certChain.length - 1) {
|
||||||
|
X509Certificate issuer = certChain[i + 1];
|
||||||
|
try {
|
||||||
|
cert.verify(issuer.getPublicKey());
|
||||||
|
} catch (Exception e) {
|
||||||
|
result.issues.add("Certificate " + i + " signature invalid: " + e.getMessage());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check issuer/subject relationship
|
||||||
|
if (!cert.getIssuerX500Principal().equals(issuer.getSubjectX500Principal())) {
|
||||||
|
result.issues.add("Certificate " + i + " issuer does not match certificate " + (i + 1) + " subject");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if chain ends with a trusted root
|
||||||
|
X509Certificate rootCert = certChain[certChain.length - 1];
|
||||||
|
boolean trustedRootFound = false;
|
||||||
|
|
||||||
|
if (rootCert.getSubjectX500Principal().equals(rootCert.getIssuerX500Principal())) {
|
||||||
|
// Self-signed root - check if it's in trust anchors
|
||||||
|
for (TrustAnchor anchor : trustAnchors) {
|
||||||
|
if (anchor.getTrustedCert().equals(rootCert)) {
|
||||||
|
trustedRootFound = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!trustedRootFound) {
|
||||||
|
// Check if we trust the root's subject even if the certificate is different
|
||||||
|
for (TrustAnchor anchor : trustAnchors) {
|
||||||
|
if (anchor.getTrustedCert().getSubjectX500Principal().equals(rootCert.getSubjectX500Principal())) {
|
||||||
|
trustedRootFound = true;
|
||||||
|
// Note: we'll add this as a warning in the main result
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Chain doesn't end with self-signed cert - check if issuer is trusted
|
||||||
|
for (TrustAnchor anchor : trustAnchors) {
|
||||||
|
if (anchor.getTrustedCert().getSubjectX500Principal().equals(rootCert.getIssuerX500Principal())) {
|
||||||
|
trustedRootFound = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!trustedRootFound) {
|
||||||
|
result.issues.add("Chain does not end with a trusted root");
|
||||||
|
}
|
||||||
|
|
||||||
|
result.isValid = result.issues.isEmpty();
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static boolean checkRevocation(X509Certificate cert, ValidationResult result) {
|
||||||
|
try {
|
||||||
|
// Try OCSP first
|
||||||
|
if (checkOCSP(cert, result)) {
|
||||||
|
return true; // Revoked
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fallback to CRL
|
||||||
|
if (checkCRL(cert, result)) {
|
||||||
|
return true; // Revoked
|
||||||
|
}
|
||||||
|
|
||||||
|
result.warnings.add("Could not check revocation status");
|
||||||
|
return false; // Assume not revoked if we can't check
|
||||||
|
|
||||||
|
} catch (Exception e) {
|
||||||
|
result.warnings.add("Error checking revocation: " + e.getMessage());
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static boolean checkOCSP(X509Certificate cert, ValidationResult result) {
|
||||||
|
// For now, just extract OCSP URL and note that we found it
|
||||||
|
try {
|
||||||
|
List<String> ocspUrls = CertificateFetcher.getOCSPUrls(cert);
|
||||||
|
if (!ocspUrls.isEmpty()) {
|
||||||
|
result.details.put("ocspUrls", ocspUrls);
|
||||||
|
result.warnings.add("OCSP checking not implemented - found OCSP URLs: " + ocspUrls);
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
} catch (Exception e) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static boolean checkCRL(X509Certificate cert, ValidationResult result) {
|
||||||
|
// Basic CRL URL extraction
|
||||||
|
try {
|
||||||
|
List<String> crlUrls = getCRLUrls(cert);
|
||||||
|
if (!crlUrls.isEmpty()) {
|
||||||
|
result.details.put("crlUrls", crlUrls);
|
||||||
|
result.warnings.add("CRL checking not implemented - found CRL URLs: " + crlUrls);
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
} catch (Exception e) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Helper methods
|
||||||
|
private static String getCommonName(X500Principal principal) {
|
||||||
|
String name = principal.getName();
|
||||||
|
String[] parts = name.split(",");
|
||||||
|
for (String part : parts) {
|
||||||
|
part = part.trim();
|
||||||
|
if (part.startsWith("CN=")) {
|
||||||
|
return part.substring(3);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static boolean matchesHostname(String certName, String hostname) {
|
||||||
|
if (certName == null || hostname == null) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Exact match
|
||||||
|
if (certName.equalsIgnoreCase(hostname)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Wildcard match
|
||||||
|
if (certName.startsWith("*.")) {
|
||||||
|
String certDomain = certName.substring(2);
|
||||||
|
String hostDomain = hostname;
|
||||||
|
int firstDot = hostname.indexOf('.');
|
||||||
|
if (firstDot > 0) {
|
||||||
|
hostDomain = hostname.substring(firstDot + 1);
|
||||||
|
}
|
||||||
|
return certDomain.equalsIgnoreCase(hostDomain);
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static List<String> getCRLUrls(X509Certificate cert) {
|
||||||
|
// This would need to parse the CRL Distribution Points extension
|
||||||
|
// For now, return empty list
|
||||||
|
return new ArrayList<>();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add this to your AIAExtractor class if not already present
|
||||||
|
public static List<String> getOCSPUrls(X509Certificate certificate) {
|
||||||
|
List<String> ocspUrls = new ArrayList<>();
|
||||||
|
|
||||||
|
try {
|
||||||
|
byte[] aiaExtensionValue = certificate.getExtensionValue(Extension.authorityInfoAccess.getId());
|
||||||
|
if (aiaExtensionValue == null) {
|
||||||
|
return ocspUrls;
|
||||||
|
}
|
||||||
|
|
||||||
|
ASN1OctetString octetString = ASN1OctetString.getInstance(aiaExtensionValue);
|
||||||
|
ASN1Primitive aiaObj = ASN1Primitive.fromByteArray(octetString.getOctets());
|
||||||
|
AuthorityInformationAccess aia = AuthorityInformationAccess.getInstance(aiaObj);
|
||||||
|
|
||||||
|
if (aia != null) {
|
||||||
|
AccessDescription[] accessDescriptions = aia.getAccessDescriptions();
|
||||||
|
|
||||||
|
for (AccessDescription accessDesc : accessDescriptions) {
|
||||||
|
if (X509ObjectIdentifiers.id_ad_ocsp.equals(accessDesc.getAccessMethod())) {
|
||||||
|
GeneralName accessLocation = accessDesc.getAccessLocation();
|
||||||
|
if (accessLocation.getTagNo() == GeneralName.uniformResourceIdentifier) {
|
||||||
|
String url = accessLocation.getName().toString();
|
||||||
|
ocspUrls.add(url);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} catch (Exception e) {
|
||||||
|
System.err.println("Error parsing AIA extension for OCSP: " + e.getMessage());
|
||||||
|
}
|
||||||
|
|
||||||
|
return ocspUrls;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@@ -0,0 +1,57 @@
|
|||||||
|
package nu.marginalia.ping.ssl;
|
||||||
|
|
||||||
|
import java.security.cert.TrustAnchor;
|
||||||
|
import java.time.Duration;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
public class RootCerts {
|
||||||
|
private static final String MOZILLA_CA_BUNDLE_URL = "https://curl.se/ca/cacert.pem";
|
||||||
|
|
||||||
|
volatile static boolean initialized = false;
|
||||||
|
volatile static Set<TrustAnchor> trustAnchors;
|
||||||
|
|
||||||
|
public static Set<TrustAnchor> getTrustAnchors() {
|
||||||
|
if (!initialized) {
|
||||||
|
try {
|
||||||
|
synchronized (RootCerts.class) {
|
||||||
|
while (!initialized) {
|
||||||
|
RootCerts.class.wait(100);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catch (InterruptedException e) {
|
||||||
|
Thread.currentThread().interrupt();
|
||||||
|
throw new RuntimeException("RootCerts initialization interrupted", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return trustAnchors;
|
||||||
|
}
|
||||||
|
|
||||||
|
static {
|
||||||
|
Thread.ofPlatform()
|
||||||
|
.name("RootCertsUpdater")
|
||||||
|
.daemon()
|
||||||
|
.unstarted(RootCerts::updateTrustAnchors)
|
||||||
|
.start();
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void updateTrustAnchors() {
|
||||||
|
while (true) {
|
||||||
|
try {
|
||||||
|
trustAnchors = CertificateFetcher.getRootCerts(MOZILLA_CA_BUNDLE_URL);
|
||||||
|
synchronized (RootCerts.class) {
|
||||||
|
initialized = true;
|
||||||
|
RootCerts.class.notifyAll(); // Notify any waiting threads
|
||||||
|
}
|
||||||
|
Thread.sleep(Duration.ofHours(24));
|
||||||
|
} catch (InterruptedException e) {
|
||||||
|
Thread.currentThread().interrupt();
|
||||||
|
break; // Exit if interrupted
|
||||||
|
} catch (Exception e) {
|
||||||
|
// Log the exception and continue to retry
|
||||||
|
System.err.println("Failed to update trust anchors: " + e.getMessage());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user