1
1
mirror of https://github.com/MarginaliaSearch/MarginaliaSearch.git synced 2025-10-06 07:32:38 +02:00

Compare commits

...

6 Commits

Author SHA1 Message Date
Viktor Lofgren
9f18ced73d (crawler) Improve deferred task behavior 2025-03-18 12:54:18 +01:00
Viktor Lofgren
18e91269ab (crawler) Improve deferred task behavior 2025-03-18 12:25:22 +01:00
Viktor Lofgren
e315ca5758 (search) Change icon for small web filter
The previous icon was of an irregular size and shifted the layout in an unaesthetic way.
2025-03-17 12:07:34 +01:00
Viktor Lofgren
3ceea17c1d (search) Adjustments to devicd detection in CSS
Use pointer:fine media query to better distinguish between mobile devices and PCs with a window in portrait orientation.

With this, we never show mobile filtering functionality on mobile; and never show the touch-inaccessible minimized sidebar on mobile.
2025-03-17 12:04:34 +01:00
Viktor Lofgren
b34527c1a3 (search) Add small web filter for new UI 2025-03-17 11:39:19 +01:00
Viktor Lofgren
185bf28fca (crawler) Correct issue leading to parquet files not being correctly preconverted
Path.endsWith("str") != String.endsWith(".str")
2025-03-10 13:48:12 +01:00
6 changed files with 68 additions and 30 deletions

View File

@@ -41,7 +41,10 @@ import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardCopyOption;
import java.security.Security;
import java.util.*;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
@@ -248,44 +251,35 @@ public class CrawlerMain extends ProcessMainClass {
// List of deferred tasks used to ensure beneficial scheduling of domains with regard to DomainLocks,
// merely shuffling the domains tends to lead to a lot of threads being blocked waiting for a semphore,
// this will more aggressively attempt to schedule the jobs to avoid blocking
List<CrawlTask> deferredTasks = new LinkedList<>();
List<CrawlTask> taskList = new ArrayList<>();
// Create crawl tasks and submit them to the pool for execution
// Create crawl tasks
for (CrawlSpecRecord crawlSpec : crawlSpecRecords) {
if (workLog.isJobFinished(crawlSpec.domain()))
if (workLog.isJobFinished(crawlSpec.domain))
continue;
// Add to the end of the deferral list
deferredTasks.addLast(new CrawlTask(
var task = new CrawlTask(
crawlSpec,
anchorTagsSource,
outputDir,
warcArchiver,
domainStateDb,
workLog));
workLog);
// Start every task we currently can from the deferral list
deferredTasks.removeIf(task -> {
if (task.canRun()) {
if (pendingCrawlTasks.putIfAbsent(crawlSpec.domain(), task) != null) {
return true; // task has already run, duplicate in crawl specs
}
// This blocks the caller when the pool is full
pool.submitQuietly(task);
return true;
}
return false;
});
// Try to run immediately, to avoid unnecessarily keeping the entire work set in RAM
if (!trySubmitDeferredTask(task)) {
// Otherwise add to the taskList for deferred execution
taskList.add(task);
}
}
// Schedule any lingering tasks for immediate execution
for (var task : deferredTasks) {
if (pendingCrawlTasks.putIfAbsent(task.domain, task) != null)
continue;
// Schedule viable tasks for execution until list is empty
while (!taskList.isEmpty()) {
taskList.removeIf(this::trySubmitDeferredTask);
pool.submitQuietly(task);
// Add a small pause here to avoid busy looping toward the end of the execution cycle when
// we might have no new viable tasks to run for hours on end
TimeUnit.MILLISECONDS.sleep(50);
}
logger.info("Shutting down the pool, waiting for tasks to complete...");
@@ -312,6 +306,28 @@ public class CrawlerMain extends ProcessMainClass {
}
}
/** Submit a task for execution if it can be run, returns true if it was submitted
* or if it can be discarded */
private boolean trySubmitDeferredTask(CrawlTask task) {
if (!task.canRun()) {
return false;
}
if (pendingCrawlTasks.putIfAbsent(task.domain, task) != null) {
return true; // task has already run, duplicate in crawl specs
}
try {
// This blocks the caller when the pool is full
pool.submitQuietly(task);
return true;
}
catch (RuntimeException ex) {
logger.error("Failed to submit task " + task.domain, ex);
return false;
}
}
public void runForSingleDomain(String targetDomainName, FileStorageId fileStorageId) throws Exception {
runForSingleDomain(targetDomainName, fileStorageService.getStorage(fileStorageId).asPath());
}
@@ -377,6 +393,11 @@ public class CrawlerMain extends ProcessMainClass {
@Override
public void run() throws Exception {
if (workLog.isJobFinished(domain)) { // No-Op
logger.info("Omitting task {}, as it is already run", domain);
return;
}
Path newWarcFile = CrawlerOutputFile.createWarcPath(outputDir, id, domain, CrawlerOutputFile.WarcFileVersion.LIVE);
Path tempFile = CrawlerOutputFile.createWarcPath(outputDir, id, domain, CrawlerOutputFile.WarcFileVersion.TEMP);
Path slopFile = CrawlerOutputFile.createSlopPath(outputDir, id, domain);
@@ -431,7 +452,7 @@ public class CrawlerMain extends ProcessMainClass {
logger.error("Error fetching domain " + domain, e);
}
finally {
// We don't need to double-count these; it's also kept int he workLog
// We don't need to double-count these; it's also kept in the workLog
pendingCrawlTasks.remove(domain);
Thread.currentThread().setName("[idle]");
@@ -522,7 +543,7 @@ public class CrawlerMain extends ProcessMainClass {
//
// This must be synchronized as chewing through parquet files in parallel leads to enormous memory overhead
private synchronized Path migrateParquetData(Path inputPath, String domain, Path crawlDataRoot) throws IOException {
if (!inputPath.endsWith(".parquet")) {
if (!inputPath.toString().endsWith(".parquet")) {
return inputPath;
}

View File

@@ -81,6 +81,7 @@ public class SearchFilters {
),
List.of(
new Filter("Vintage", "fa-clock-rotate-left", SearchProfile.VINTAGE, parameters),
new Filter("Small Web", "fa-minus", SearchProfile.SMALLWEB, parameters),
new Filter("Plain Text", "fa-file", SearchProfile.PLAIN_TEXT, parameters),
new Filter("Tilde", "fa-house", SearchProfile.TILDE, parameters)
),

View File

@@ -9,6 +9,14 @@
nicotine: '#f8f8ee',
margeblue: '#3e5f6f',
liteblue: '#0066cc',
},
screens: {
'coarsepointer': {
'raw': '(pointer: coarse)'
},
'finepointer': {
'raw': '(pointer: fine)'
},
}
},
screens: {

View File

@@ -23,7 +23,7 @@
@template.serp.part.searchform(query = results.getParams().query(), profile = results.getProfile(), filters = results.getFilters())
</div>
<div class="grow"></div>
<button class="fixed bottom-10 right-5 sm:hidden text-sm bg-margeblue text-white p-4 rounded-xl active:text-slate-200" id="filter-button">
<button class="fixed bottom-10 right-5 finepointer:hidden md:hidden text-sm bg-margeblue text-white p-4 rounded-xl active:text-slate-200" id="filter-button">
<i class="fas fa-filter mr-3"></i>
Filters
</button>

View File

@@ -3,7 +3,7 @@
@param SearchFilters filters
<aside class="md:w-64 py-4 shrink-0 hidden sm:block">
<aside class="md:w-64 py-4 shrink-0 hidden md:block finepointer:block">
<div class="space-y-6 sticky top-4">
<div class="bg-white dark:bg-gray-800 p-4 border dark:border-gray-600 border-gray-300">
<h2 class="font-medium mb-3 flex items-center font-serif hidden md:block">

View File

@@ -9,6 +9,14 @@ module.exports = {
nicotine: '#f8f8ee',
margeblue: '#3e5f6f',
liteblue: '#0066cc',
},
screens: {
'coarsepointer': {
'raw': '(pointer: coarse)'
},
'finepointer': {
'raw': '(pointer: fine)'
},
}
},
screens: {