1
1
mirror of https://github.com/MarginaliaSearch/MarginaliaSearch.git synced 2025-10-06 07:32:38 +02:00

Compare commits

...

6 Commits

Author SHA1 Message Date
Viktor Lofgren
e9d86dca4a (crawler) Add timeout to wrap-up phase of WarcInputBuffer. 2025-05-01 15:57:47 +02:00
Viktor Lofgren
1d693f0efa (build) Upgrade JIB to 3.4.5 2025-04-30 15:26:52 +02:00
Viktor Lofgren
5874a163dc (build) Upgrade gradle to 8.14 2025-04-30 15:26:37 +02:00
Viktor Lofgren
5ec7a1deab (crawler) Fix 80%-ish progress crawler stall
Since the crawl tasks are started in two phases, first when generating them in one loop, and then in a second loop that drains the task list; if the first loop contains a long-running crawl task that is triggered late, the rest of the crawl may halt until that task is finish.

Fixed the problem by draining and re-trying also in the first loop.
2025-04-29 12:23:51 +02:00
Viktor Lofgren
7fea2808ed (search) Fix error view
Fix rendering error when query was null

Fix border on error message.
2025-04-27 12:12:56 +02:00
Viktor Lofgren
8da74484f0 (search) Remove unused count modifier from the footer help 2025-04-27 12:08:34 +02:00
19 changed files with 43 additions and 30 deletions

View File

@@ -5,7 +5,7 @@ plugins {
// This is a workaround for a bug in the Jib plugin that causes it to stall randomly // This is a workaround for a bug in the Jib plugin that causes it to stall randomly
// https://github.com/GoogleContainerTools/jib/issues/3347 // https://github.com/GoogleContainerTools/jib/issues/3347
id 'com.google.cloud.tools.jib' version '3.4.4' apply(false) id 'com.google.cloud.tools.jib' version '3.4.5' apply(false)
} }
group 'marginalia' group 'marginalia'
@@ -47,7 +47,7 @@ ext {
dockerImageBase='container-registry.oracle.com/graalvm/jdk:24' dockerImageBase='container-registry.oracle.com/graalvm/jdk:24'
dockerImageTag='latest' dockerImageTag='latest'
dockerImageRegistry='marginalia' dockerImageRegistry='marginalia'
jibVersion = '3.4.4' jibVersion = '3.4.5'
} }
idea { idea {

View File

@@ -264,17 +264,16 @@ public class CrawlerMain extends ProcessMainClass {
if (workLog.isJobFinished(crawlSpec.domain)) if (workLog.isJobFinished(crawlSpec.domain))
continue; continue;
var task = new CrawlTask( var task = new CrawlTask(crawlSpec, anchorTagsSource, outputDir, warcArchiver, domainStateDb, workLog);
crawlSpec,
anchorTagsSource,
outputDir,
warcArchiver,
domainStateDb,
workLog);
// Try to run immediately, to avoid unnecessarily keeping the entire work set in RAM // Try to run immediately, to avoid unnecessarily keeping the entire work set in RAM
if (!trySubmitDeferredTask(task)) { if (!trySubmitDeferredTask(task)) {
// Otherwise add to the taskList for deferred execution
// Drain the retry queue to the taskList, and try to submit any tasks that are in the retry queue
retryQueue.drainTo(taskList);
taskList.removeIf(this::trySubmitDeferredTask);
// Then add this new task to the retry queue
taskList.add(task); taskList.add(task);
} }
} }

View File

@@ -57,6 +57,7 @@ public abstract class WarcInputBuffer implements AutoCloseable {
return new ErrorBuffer(); return new ErrorBuffer();
} }
Instant start = Instant.now();
InputStream is = null; InputStream is = null;
try { try {
is = entity.getContent(); is = entity.getContent();
@@ -71,8 +72,25 @@ public abstract class WarcInputBuffer implements AutoCloseable {
} }
} }
finally { finally {
// We're required to consume the stream to avoid leaking connections,
// but we also don't want to get stuck on slow or malicious connections
// forever, so we set a time limit on this phase and call abort() if it's exceeded.
try { try {
is.skip(Long.MAX_VALUE); while (is != null) {
// Consume some data
if (is.skip(65536) == 0) {
// Note that skip may return 0 if the stream is empty
// or for other unspecified reasons, so we need to check
// with read() as well to determine if the stream is done
if (is.read() == -1)
is = null;
}
// Check if the time limit has been exceeded
else if (Duration.between(start, Instant.now()).compareTo(timeLimit) > 0) {
request.abort();
is = null;
}
}
} }
catch (IOException e) { catch (IOException e) {
// Ignore the exception // Ignore the exception

View File

@@ -3,7 +3,7 @@ plugins {
id 'application' id 'application'
id 'jvm-test-suite' id 'jvm-test-suite'
id 'com.google.cloud.tools.jib' version '3.4.4' id 'com.google.cloud.tools.jib' version '3.4.5'
} }
java { java {

View File

@@ -3,7 +3,7 @@ plugins {
id 'application' id 'application'
id 'jvm-test-suite' id 'jvm-test-suite'
id 'com.google.cloud.tools.jib' version '3.4.4' id 'com.google.cloud.tools.jib' version '3.4.5'
} }
application { application {

View File

@@ -3,7 +3,7 @@ plugins {
id 'application' id 'application'
id 'jvm-test-suite' id 'jvm-test-suite'
id 'com.google.cloud.tools.jib' version '3.4.4' id 'com.google.cloud.tools.jib' version '3.4.5'
} }
application { application {

View File

@@ -5,7 +5,7 @@ plugins {
id 'application' id 'application'
id 'jvm-test-suite' id 'jvm-test-suite'
id 'com.google.cloud.tools.jib' version '3.4.4' id 'com.google.cloud.tools.jib' version '3.4.5'
} }
application { application {

View File

@@ -3,7 +3,7 @@ plugins {
id 'application' id 'application'
id 'jvm-test-suite' id 'jvm-test-suite'
id 'gg.jte.gradle' version '3.1.15' id 'gg.jte.gradle' version '3.1.15'
id 'com.google.cloud.tools.jib' version '3.4.4' id 'com.google.cloud.tools.jib' version '3.4.5'
} }
application { application {

View File

@@ -26,7 +26,7 @@
<!-- Main content --> <!-- Main content -->
<main class="flex-1 p-4 max-w-2xl space-y-4"> <main class="flex-1 p-4 max-w-2xl space-y-4">
<div class="border dark:border-gray-600 rounded bg-white text-black dark:bg-gray-800 dark:text-white text-m p-4"> <div class="border border-gray-300 dark:border-gray-600 rounded bg-white text-black dark:bg-gray-800 dark:text-white text-m p-4">
<div class="flex space-x-3 place-items-baseline"> <div class="flex space-x-3 place-items-baseline">
<i class="fa fa-circle-exclamation text-red-800"></i> <i class="fa fa-circle-exclamation text-red-800"></i>
<div class="grow">${model.errorTitle()}</div> <div class="grow">${model.errorTitle()}</div>

View File

@@ -80,10 +80,6 @@
<tr><td>rank&gt;50</td><td>The ranking of the website is at least 50 in a span of 1 - 255</td></tr> <tr><td>rank&gt;50</td><td>The ranking of the website is at least 50 in a span of 1 - 255</td></tr>
<tr><td>rank&lt;50</td><td>The ranking of the website is at most 50 in a span of 1 - 255</td></tr> <tr><td>rank&lt;50</td><td>The ranking of the website is at most 50 in a span of 1 - 255</td></tr>
<tr><td>count&gt;10</td><td> The search term must appear in at least 10 results form the domain</td></tr>
<tr><td>count&lt;10</td><td> The search term must appear in at most 10 results from the domain</td></tr>
<tr><td>format:html5</td><td>Filter documents using the HTML5 standard. This is typically modern websites.</td></tr> <tr><td>format:html5</td><td>Filter documents using the HTML5 standard. This is typically modern websites.</td></tr>
<tr><td>format:xhtml</td><td>Filter documents using the XHTML standard</td></tr> <tr><td>format:xhtml</td><td>Filter documents using the XHTML standard</td></tr>
<tr><td>format:html123</td><td>Filter documents using the HTML standards 1, 2, and 3. This is typically very old websites. </td></tr> <tr><td>format:html123</td><td>Filter documents using the HTML standards 1, 2, and 3. This is typically very old websites. </td></tr>

View File

@@ -7,7 +7,7 @@
<form class="flex-1 max-w-2xl" action="/search"> <form class="flex-1 max-w-2xl" action="/search">
<div class="flex"> <div class="flex">
@if (query.isBlank()) @if (query != null && query.isBlank())
<%-- Add autofocus if the query is blank --%> <%-- Add autofocus if the query is blank --%>
<input type="text" <input type="text"
class="shadow-inner flex-1 dark:bg-black dark:text-gray-100 bg-gray-50 border dark:border-gray-600 border-gray-300 text-gray-900 text-sm rounded-sm block w-full p-2.5" class="shadow-inner flex-1 dark:bg-black dark:text-gray-100 bg-gray-50 border dark:border-gray-600 border-gray-300 text-gray-900 text-sm rounded-sm block w-full p-2.5"

View File

@@ -2,7 +2,7 @@ plugins {
id 'java' id 'java'
id 'application' id 'application'
id 'jvm-test-suite' id 'jvm-test-suite'
id 'com.google.cloud.tools.jib' version '3.4.4' id 'com.google.cloud.tools.jib' version '3.4.5'
} }
java { java {

View File

@@ -3,7 +3,7 @@ plugins {
id 'application' id 'application'
id 'jvm-test-suite' id 'jvm-test-suite'
id 'com.google.cloud.tools.jib' version '3.4.4' id 'com.google.cloud.tools.jib' version '3.4.5'
} }
application { application {

View File

@@ -2,7 +2,7 @@ plugins {
id 'java' id 'java'
id 'application' id 'application'
id 'jvm-test-suite' id 'jvm-test-suite'
id 'com.google.cloud.tools.jib' version '3.4.4' id 'com.google.cloud.tools.jib' version '3.4.5'
} }
java { java {

View File

@@ -3,7 +3,7 @@ plugins {
id 'application' id 'application'
id 'jvm-test-suite' id 'jvm-test-suite'
id 'com.google.cloud.tools.jib' version '3.4.4' id 'com.google.cloud.tools.jib' version '3.4.5'
} }
application { application {

View File

@@ -3,7 +3,7 @@ plugins {
id 'application' id 'application'
id 'jvm-test-suite' id 'jvm-test-suite'
id 'com.google.cloud.tools.jib' version '3.4.4' id 'com.google.cloud.tools.jib' version '3.4.5'
} }
application { application {

View File

@@ -3,7 +3,7 @@ plugins {
id 'application' id 'application'
id 'jvm-test-suite' id 'jvm-test-suite'
id 'com.google.cloud.tools.jib' version '3.4.4' id 'com.google.cloud.tools.jib' version '3.4.5'
} }
application { application {

View File

@@ -3,7 +3,7 @@ plugins {
id 'application' id 'application'
id 'jvm-test-suite' id 'jvm-test-suite'
id 'com.google.cloud.tools.jib' version '3.4.4' id 'com.google.cloud.tools.jib' version '3.4.5'
} }
java { java {

View File

@@ -1,5 +1,5 @@
distributionBase=GRADLE_USER_HOME distributionBase=GRADLE_USER_HOME
distributionPath=wrapper/dists distributionPath=wrapper/dists
distributionUrl=https\://services.gradle.org/distributions/gradle-8.10-bin.zip distributionUrl=https\://services.gradle.org/distributions/gradle-8.14-bin.zip
zipStoreBase=GRADLE_USER_HOME zipStoreBase=GRADLE_USER_HOME
zipStorePath=wrapper/dists zipStorePath=wrapper/dists