mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-10-06 07:32:38 +02:00
Compare commits
4 Commits
deploy-006
...
deploy-006
Author | SHA1 | Date | |
---|---|---|---|
|
567e4e1237 | ||
|
4342e42722 | ||
|
bc818056e6 | ||
|
de2feac238 |
@@ -47,7 +47,7 @@ ext {
|
||||
dockerImageBase='container-registry.oracle.com/graalvm/jdk:23'
|
||||
dockerImageTag='latest'
|
||||
dockerImageRegistry='marginalia'
|
||||
jibVersion = '3.4.3'
|
||||
jibVersion = '3.4.4'
|
||||
|
||||
}
|
||||
|
||||
|
@@ -22,6 +22,7 @@ import java.nio.charset.StandardCharsets;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.security.NoSuchAlgorithmException;
|
||||
import java.time.Duration;
|
||||
import java.time.Instant;
|
||||
import java.util.*;
|
||||
|
||||
@@ -89,6 +90,7 @@ public class WarcRecorder implements AutoCloseable {
|
||||
|
||||
var call = client.newCall(request);
|
||||
|
||||
|
||||
cookieInformation.update(client, request.url());
|
||||
|
||||
try (var response = call.execute();
|
||||
@@ -167,6 +169,25 @@ public class WarcRecorder implements AutoCloseable {
|
||||
warcRequest.http(); // force HTTP header to be parsed before body is consumed so that caller can use it
|
||||
writer.write(warcRequest);
|
||||
|
||||
if (Duration.between(date, Instant.now()).compareTo(Duration.ofSeconds(9)) > 0
|
||||
&& inputBuffer.size() < 2048
|
||||
&& !request.url().encodedPath().endsWith("robots.txt")) // don't bail on robots.txt
|
||||
{
|
||||
// Fast detection and mitigation of crawler traps that respond with slow
|
||||
// small responses, with a high branching factor
|
||||
|
||||
// Note we bail *after* writing the warc records, this will effectively only
|
||||
// prevent link extraction from the document.
|
||||
|
||||
logger.warn("URL {} took too long to fetch ({}s) and was too small for the effort ({}b)",
|
||||
requestUri,
|
||||
Duration.between(date, Instant.now()).getSeconds(),
|
||||
inputBuffer.size()
|
||||
);
|
||||
|
||||
return new HttpFetchResult.ResultException(new IOException("Likely crawler trap"));
|
||||
}
|
||||
|
||||
return new HttpFetchResult.ResultOk(responseUri,
|
||||
response.code(),
|
||||
inputBuffer.headers(),
|
||||
|
@@ -72,11 +72,11 @@ services:
|
||||
image: "mariadb:lts"
|
||||
container_name: "mariadb"
|
||||
env_file: "${INSTALL_DIR}/env/mariadb.env"
|
||||
command: ['mysqld', '--character-set-server=utf8mb4', '--collation-server=utf8mb4_unicode_ci']
|
||||
command: ['mariadbd', '--character-set-server=utf8mb4', '--collation-server=utf8mb4_unicode_ci']
|
||||
ports:
|
||||
- "127.0.0.1:3306:3306/tcp"
|
||||
healthcheck:
|
||||
test: mysqladmin ping -h 127.0.0.1 -u ${uval} --password=${pval}
|
||||
test: mariadb-admin ping -h 127.0.0.1 -u ${uval} --password=${pval}
|
||||
start_period: 5s
|
||||
interval: 5s
|
||||
timeout: 5s
|
||||
|
@@ -103,11 +103,11 @@ services:
|
||||
image: "mariadb:lts"
|
||||
container_name: "mariadb"
|
||||
env_file: "${INSTALL_DIR}/env/mariadb.env"
|
||||
command: ['mysqld', '--character-set-server=utf8mb4', '--collation-server=utf8mb4_unicode_ci']
|
||||
command: ['mariadbd', '--character-set-server=utf8mb4', '--collation-server=utf8mb4_unicode_ci']
|
||||
ports:
|
||||
- "127.0.0.1:3306:3306/tcp"
|
||||
healthcheck:
|
||||
test: mysqladmin ping -h 127.0.0.1 -u ${uval} --password=${pval}
|
||||
test: mariadb-admin ping -h 127.0.0.1 -u ${uval} --password=${pval}
|
||||
start_period: 5s
|
||||
interval: 5s
|
||||
timeout: 5s
|
||||
|
@@ -129,11 +129,11 @@ services:
|
||||
image: "mariadb:lts"
|
||||
container_name: "mariadb"
|
||||
env_file: "${INSTALL_DIR}/env/mariadb.env"
|
||||
command: ['mysqld', '--character-set-server=utf8mb4', '--collation-server=utf8mb4_unicode_ci']
|
||||
command: ['mariadbd', '--character-set-server=utf8mb4', '--collation-server=utf8mb4_unicode_ci']
|
||||
ports:
|
||||
- "127.0.0.1:3306:3306/tcp"
|
||||
healthcheck:
|
||||
test: mysqladmin ping -h 127.0.0.1 -u ${uval} --password=${pval}
|
||||
test: mariadb-admin ping -h 127.0.0.1 -u ${uval} --password=${pval}
|
||||
start_period: 5s
|
||||
interval: 5s
|
||||
timeout: 5s
|
||||
|
@@ -3,11 +3,11 @@ services:
|
||||
image: "mariadb:lts"
|
||||
container_name: "mariadb"
|
||||
env_file: "${INSTALL_DIR}/env/mariadb.env"
|
||||
command: ['mysqld', '--character-set-server=utf8mb4', '--collation-server=utf8mb4_unicode_ci']
|
||||
command: ['mariadbd', '--character-set-server=utf8mb4', '--collation-server=utf8mb4_unicode_ci']
|
||||
ports:
|
||||
- "127.0.0.1:3306:3306/tcp"
|
||||
healthcheck:
|
||||
test: mysqladmin ping -h 127.0.0.1 -u ${uval} --password=${pval}
|
||||
test: mariadb-admin ping -h 127.0.0.1 -u ${uval} --password=${pval}
|
||||
start_period: 5s
|
||||
interval: 5s
|
||||
timeout: 5s
|
||||
|
Reference in New Issue
Block a user