mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-10-07 13:52:39 +02:00
Compare commits
4 Commits
deploy-006
...
deploy-006
Author | SHA1 | Date | |
---|---|---|---|
|
567e4e1237 | ||
|
4342e42722 | ||
|
bc818056e6 | ||
|
de2feac238 |
@@ -47,7 +47,7 @@ ext {
|
|||||||
dockerImageBase='container-registry.oracle.com/graalvm/jdk:23'
|
dockerImageBase='container-registry.oracle.com/graalvm/jdk:23'
|
||||||
dockerImageTag='latest'
|
dockerImageTag='latest'
|
||||||
dockerImageRegistry='marginalia'
|
dockerImageRegistry='marginalia'
|
||||||
jibVersion = '3.4.3'
|
jibVersion = '3.4.4'
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -22,6 +22,7 @@ import java.nio.charset.StandardCharsets;
|
|||||||
import java.nio.file.Files;
|
import java.nio.file.Files;
|
||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
import java.security.NoSuchAlgorithmException;
|
import java.security.NoSuchAlgorithmException;
|
||||||
|
import java.time.Duration;
|
||||||
import java.time.Instant;
|
import java.time.Instant;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
@@ -89,6 +90,7 @@ public class WarcRecorder implements AutoCloseable {
|
|||||||
|
|
||||||
var call = client.newCall(request);
|
var call = client.newCall(request);
|
||||||
|
|
||||||
|
|
||||||
cookieInformation.update(client, request.url());
|
cookieInformation.update(client, request.url());
|
||||||
|
|
||||||
try (var response = call.execute();
|
try (var response = call.execute();
|
||||||
@@ -167,6 +169,25 @@ public class WarcRecorder implements AutoCloseable {
|
|||||||
warcRequest.http(); // force HTTP header to be parsed before body is consumed so that caller can use it
|
warcRequest.http(); // force HTTP header to be parsed before body is consumed so that caller can use it
|
||||||
writer.write(warcRequest);
|
writer.write(warcRequest);
|
||||||
|
|
||||||
|
if (Duration.between(date, Instant.now()).compareTo(Duration.ofSeconds(9)) > 0
|
||||||
|
&& inputBuffer.size() < 2048
|
||||||
|
&& !request.url().encodedPath().endsWith("robots.txt")) // don't bail on robots.txt
|
||||||
|
{
|
||||||
|
// Fast detection and mitigation of crawler traps that respond with slow
|
||||||
|
// small responses, with a high branching factor
|
||||||
|
|
||||||
|
// Note we bail *after* writing the warc records, this will effectively only
|
||||||
|
// prevent link extraction from the document.
|
||||||
|
|
||||||
|
logger.warn("URL {} took too long to fetch ({}s) and was too small for the effort ({}b)",
|
||||||
|
requestUri,
|
||||||
|
Duration.between(date, Instant.now()).getSeconds(),
|
||||||
|
inputBuffer.size()
|
||||||
|
);
|
||||||
|
|
||||||
|
return new HttpFetchResult.ResultException(new IOException("Likely crawler trap"));
|
||||||
|
}
|
||||||
|
|
||||||
return new HttpFetchResult.ResultOk(responseUri,
|
return new HttpFetchResult.ResultOk(responseUri,
|
||||||
response.code(),
|
response.code(),
|
||||||
inputBuffer.headers(),
|
inputBuffer.headers(),
|
||||||
|
@@ -72,11 +72,11 @@ services:
|
|||||||
image: "mariadb:lts"
|
image: "mariadb:lts"
|
||||||
container_name: "mariadb"
|
container_name: "mariadb"
|
||||||
env_file: "${INSTALL_DIR}/env/mariadb.env"
|
env_file: "${INSTALL_DIR}/env/mariadb.env"
|
||||||
command: ['mysqld', '--character-set-server=utf8mb4', '--collation-server=utf8mb4_unicode_ci']
|
command: ['mariadbd', '--character-set-server=utf8mb4', '--collation-server=utf8mb4_unicode_ci']
|
||||||
ports:
|
ports:
|
||||||
- "127.0.0.1:3306:3306/tcp"
|
- "127.0.0.1:3306:3306/tcp"
|
||||||
healthcheck:
|
healthcheck:
|
||||||
test: mysqladmin ping -h 127.0.0.1 -u ${uval} --password=${pval}
|
test: mariadb-admin ping -h 127.0.0.1 -u ${uval} --password=${pval}
|
||||||
start_period: 5s
|
start_period: 5s
|
||||||
interval: 5s
|
interval: 5s
|
||||||
timeout: 5s
|
timeout: 5s
|
||||||
|
@@ -103,11 +103,11 @@ services:
|
|||||||
image: "mariadb:lts"
|
image: "mariadb:lts"
|
||||||
container_name: "mariadb"
|
container_name: "mariadb"
|
||||||
env_file: "${INSTALL_DIR}/env/mariadb.env"
|
env_file: "${INSTALL_DIR}/env/mariadb.env"
|
||||||
command: ['mysqld', '--character-set-server=utf8mb4', '--collation-server=utf8mb4_unicode_ci']
|
command: ['mariadbd', '--character-set-server=utf8mb4', '--collation-server=utf8mb4_unicode_ci']
|
||||||
ports:
|
ports:
|
||||||
- "127.0.0.1:3306:3306/tcp"
|
- "127.0.0.1:3306:3306/tcp"
|
||||||
healthcheck:
|
healthcheck:
|
||||||
test: mysqladmin ping -h 127.0.0.1 -u ${uval} --password=${pval}
|
test: mariadb-admin ping -h 127.0.0.1 -u ${uval} --password=${pval}
|
||||||
start_period: 5s
|
start_period: 5s
|
||||||
interval: 5s
|
interval: 5s
|
||||||
timeout: 5s
|
timeout: 5s
|
||||||
|
@@ -129,11 +129,11 @@ services:
|
|||||||
image: "mariadb:lts"
|
image: "mariadb:lts"
|
||||||
container_name: "mariadb"
|
container_name: "mariadb"
|
||||||
env_file: "${INSTALL_DIR}/env/mariadb.env"
|
env_file: "${INSTALL_DIR}/env/mariadb.env"
|
||||||
command: ['mysqld', '--character-set-server=utf8mb4', '--collation-server=utf8mb4_unicode_ci']
|
command: ['mariadbd', '--character-set-server=utf8mb4', '--collation-server=utf8mb4_unicode_ci']
|
||||||
ports:
|
ports:
|
||||||
- "127.0.0.1:3306:3306/tcp"
|
- "127.0.0.1:3306:3306/tcp"
|
||||||
healthcheck:
|
healthcheck:
|
||||||
test: mysqladmin ping -h 127.0.0.1 -u ${uval} --password=${pval}
|
test: mariadb-admin ping -h 127.0.0.1 -u ${uval} --password=${pval}
|
||||||
start_period: 5s
|
start_period: 5s
|
||||||
interval: 5s
|
interval: 5s
|
||||||
timeout: 5s
|
timeout: 5s
|
||||||
|
@@ -3,11 +3,11 @@ services:
|
|||||||
image: "mariadb:lts"
|
image: "mariadb:lts"
|
||||||
container_name: "mariadb"
|
container_name: "mariadb"
|
||||||
env_file: "${INSTALL_DIR}/env/mariadb.env"
|
env_file: "${INSTALL_DIR}/env/mariadb.env"
|
||||||
command: ['mysqld', '--character-set-server=utf8mb4', '--collation-server=utf8mb4_unicode_ci']
|
command: ['mariadbd', '--character-set-server=utf8mb4', '--collation-server=utf8mb4_unicode_ci']
|
||||||
ports:
|
ports:
|
||||||
- "127.0.0.1:3306:3306/tcp"
|
- "127.0.0.1:3306:3306/tcp"
|
||||||
healthcheck:
|
healthcheck:
|
||||||
test: mysqladmin ping -h 127.0.0.1 -u ${uval} --password=${pval}
|
test: mariadb-admin ping -h 127.0.0.1 -u ${uval} --password=${pval}
|
||||||
start_period: 5s
|
start_period: 5s
|
||||||
interval: 5s
|
interval: 5s
|
||||||
timeout: 5s
|
timeout: 5s
|
||||||
|
Reference in New Issue
Block a user