mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-10-06 17:32:39 +02:00
Compare commits
10 Commits
deploy-009
...
deploy-010
Author | SHA1 | Date | |
---|---|---|---|
|
18e91269ab | ||
|
e315ca5758 | ||
|
3ceea17c1d | ||
|
b34527c1a3 | ||
|
185bf28fca | ||
|
78cc25584a | ||
|
62ba30bacf | ||
|
3bb84eb206 | ||
|
be7d13ccce | ||
|
8c088a7c0b |
@@ -121,6 +121,7 @@ public class ServiceConfigurationModule extends AbstractModule {
|
|||||||
|
|
||||||
while (nets.hasMoreElements()) {
|
while (nets.hasMoreElements()) {
|
||||||
NetworkInterface netif = nets.nextElement();
|
NetworkInterface netif = nets.nextElement();
|
||||||
|
logger.info("Considering network interface {}: Up? {}, Loopback? {}", netif.getDisplayName(), netif.isUp(), netif.isLoopback());
|
||||||
if (!netif.isUp() || netif.isLoopback()) {
|
if (!netif.isUp() || netif.isLoopback()) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@@ -128,6 +129,7 @@ public class ServiceConfigurationModule extends AbstractModule {
|
|||||||
Enumeration<InetAddress> inetAddresses = netif.getInetAddresses();
|
Enumeration<InetAddress> inetAddresses = netif.getInetAddresses();
|
||||||
while (inetAddresses.hasMoreElements()) {
|
while (inetAddresses.hasMoreElements()) {
|
||||||
InetAddress addr = inetAddresses.nextElement();
|
InetAddress addr = inetAddresses.nextElement();
|
||||||
|
logger.info("Considering address {}: SiteLocal? {}, Loopback? {}", addr.getHostAddress(), addr.isSiteLocalAddress(), addr.isLoopbackAddress());
|
||||||
if (addr.isSiteLocalAddress() && !addr.isLoopbackAddress()) {
|
if (addr.isSiteLocalAddress() && !addr.isLoopbackAddress()) {
|
||||||
return addr.getHostAddress();
|
return addr.getHostAddress();
|
||||||
}
|
}
|
||||||
|
@@ -13,7 +13,7 @@ import java.net.InetSocketAddress;
|
|||||||
|
|
||||||
public class MetricsServer {
|
public class MetricsServer {
|
||||||
|
|
||||||
private static Logger logger = LoggerFactory.getLogger(MetricsServer.class);
|
private static final Logger logger = LoggerFactory.getLogger(MetricsServer.class);
|
||||||
|
|
||||||
@Inject
|
@Inject
|
||||||
public MetricsServer(ServiceConfiguration configuration) {
|
public MetricsServer(ServiceConfiguration configuration) {
|
||||||
@@ -30,6 +30,8 @@ public class MetricsServer {
|
|||||||
|
|
||||||
context.addServlet(new ServletHolder(new MetricsServlet()), "/metrics");
|
context.addServlet(new ServletHolder(new MetricsServlet()), "/metrics");
|
||||||
|
|
||||||
|
logger.info("MetricsServer listening on {}:{}", configuration.bindAddress(), configuration.metricsPort());
|
||||||
|
|
||||||
server.start();
|
server.start();
|
||||||
}
|
}
|
||||||
catch (Exception|NoSuchMethodError ex) {
|
catch (Exception|NoSuchMethodError ex) {
|
||||||
|
@@ -266,11 +266,11 @@ public class CrawlerMain extends ProcessMainClass {
|
|||||||
|
|
||||||
// Start every task we currently can from the deferral list
|
// Start every task we currently can from the deferral list
|
||||||
deferredTasks.removeIf(task -> {
|
deferredTasks.removeIf(task -> {
|
||||||
if (pendingCrawlTasks.putIfAbsent(crawlSpec.domain(), task) != null) {
|
|
||||||
return true; // task has already run, duplicate in crawl specs
|
|
||||||
}
|
|
||||||
|
|
||||||
if (task.canRun()) {
|
if (task.canRun()) {
|
||||||
|
if (pendingCrawlTasks.putIfAbsent(task.domain, task) != null) {
|
||||||
|
return true; // task has already run, duplicate in crawl specs
|
||||||
|
}
|
||||||
|
|
||||||
// This blocks the caller when the pool is full
|
// This blocks the caller when the pool is full
|
||||||
pool.submitQuietly(task);
|
pool.submitQuietly(task);
|
||||||
return true;
|
return true;
|
||||||
@@ -280,12 +280,23 @@ public class CrawlerMain extends ProcessMainClass {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
// Schedule any lingering tasks
|
|
||||||
for (var task : deferredTasks) {
|
|
||||||
if (pendingCrawlTasks.putIfAbsent(task.domain, task) != null)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
pool.submitQuietly(task);
|
// Schedule any lingering tasks for immediate execution until none exist
|
||||||
|
while (!deferredTasks.isEmpty()) {
|
||||||
|
deferredTasks.removeIf(task -> {
|
||||||
|
if (task.canRun()) {
|
||||||
|
if (pendingCrawlTasks.putIfAbsent(task.domain, task) != null) {
|
||||||
|
return true; // task has already run, duplicate in crawl specs
|
||||||
|
}
|
||||||
|
|
||||||
|
// This blocks the caller when the pool is full
|
||||||
|
pool.submitQuietly(task);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
});
|
||||||
|
TimeUnit.MILLISECONDS.sleep(50);
|
||||||
}
|
}
|
||||||
|
|
||||||
logger.info("Shutting down the pool, waiting for tasks to complete...");
|
logger.info("Shutting down the pool, waiting for tasks to complete...");
|
||||||
@@ -522,7 +533,7 @@ public class CrawlerMain extends ProcessMainClass {
|
|||||||
//
|
//
|
||||||
// This must be synchronized as chewing through parquet files in parallel leads to enormous memory overhead
|
// This must be synchronized as chewing through parquet files in parallel leads to enormous memory overhead
|
||||||
private synchronized Path migrateParquetData(Path inputPath, String domain, Path crawlDataRoot) throws IOException {
|
private synchronized Path migrateParquetData(Path inputPath, String domain, Path crawlDataRoot) throws IOException {
|
||||||
if (!inputPath.endsWith(".parquet")) {
|
if (!inputPath.toString().endsWith(".parquet")) {
|
||||||
return inputPath;
|
return inputPath;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -60,15 +60,7 @@ public class HttpFetcherImpl implements HttpFetcher {
|
|||||||
.cookieHandler(cookies)
|
.cookieHandler(cookies)
|
||||||
.followRedirects(HttpClient.Redirect.NORMAL)
|
.followRedirects(HttpClient.Redirect.NORMAL)
|
||||||
.connectTimeout(Duration.ofSeconds(8))
|
.connectTimeout(Duration.ofSeconds(8))
|
||||||
.executor(Executors.newCachedThreadPool(
|
.executor(Executors.newCachedThreadPool())
|
||||||
r -> Thread.ofPlatform()
|
|
||||||
.name("FetcherClient")
|
|
||||||
.daemon(true)
|
|
||||||
.uncaughtExceptionHandler((t, ex) -> {
|
|
||||||
logger.error("Uncaught Exception in " + t.getName(), ex);
|
|
||||||
})
|
|
||||||
.start(r)
|
|
||||||
))
|
|
||||||
.build();
|
.build();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -42,18 +42,20 @@ public interface SerializableCrawlDataStream extends AutoCloseable {
|
|||||||
{
|
{
|
||||||
|
|
||||||
String fileName = fullPath.getFileName().toString();
|
String fileName = fullPath.getFileName().toString();
|
||||||
if (fileName.endsWith(".parquet")) {
|
|
||||||
|
if (fileName.endsWith(".slop.zip")) {
|
||||||
try {
|
try {
|
||||||
return new ParquetSerializableCrawlDataStream(fullPath);
|
return new SlopSerializableCrawlDataStream(fullPath);
|
||||||
} catch (Exception ex) {
|
} catch (Exception ex) {
|
||||||
logger.error("Error reading domain data from " + fullPath, ex);
|
logger.error("Error reading domain data from " + fullPath, ex);
|
||||||
return SerializableCrawlDataStream.empty();
|
return SerializableCrawlDataStream.empty();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (fileName.endsWith(".slop.zip")) {
|
else if (fileName.endsWith(".parquet")) {
|
||||||
|
logger.error("Opening deprecated parquet-style crawl data stream", new Exception());
|
||||||
try {
|
try {
|
||||||
return new SlopSerializableCrawlDataStream(fullPath);
|
return new ParquetSerializableCrawlDataStream(fullPath);
|
||||||
} catch (Exception ex) {
|
} catch (Exception ex) {
|
||||||
logger.error("Error reading domain data from " + fullPath, ex);
|
logger.error("Error reading domain data from " + fullPath, ex);
|
||||||
return SerializableCrawlDataStream.empty();
|
return SerializableCrawlDataStream.empty();
|
||||||
|
@@ -81,6 +81,7 @@ public class SearchFilters {
|
|||||||
),
|
),
|
||||||
List.of(
|
List.of(
|
||||||
new Filter("Vintage", "fa-clock-rotate-left", SearchProfile.VINTAGE, parameters),
|
new Filter("Vintage", "fa-clock-rotate-left", SearchProfile.VINTAGE, parameters),
|
||||||
|
new Filter("Small Web", "fa-minus", SearchProfile.SMALLWEB, parameters),
|
||||||
new Filter("Plain Text", "fa-file", SearchProfile.PLAIN_TEXT, parameters),
|
new Filter("Plain Text", "fa-file", SearchProfile.PLAIN_TEXT, parameters),
|
||||||
new Filter("Tilde", "fa-house", SearchProfile.TILDE, parameters)
|
new Filter("Tilde", "fa-house", SearchProfile.TILDE, parameters)
|
||||||
),
|
),
|
||||||
|
@@ -9,6 +9,14 @@
|
|||||||
nicotine: '#f8f8ee',
|
nicotine: '#f8f8ee',
|
||||||
margeblue: '#3e5f6f',
|
margeblue: '#3e5f6f',
|
||||||
liteblue: '#0066cc',
|
liteblue: '#0066cc',
|
||||||
|
},
|
||||||
|
screens: {
|
||||||
|
'coarsepointer': {
|
||||||
|
'raw': '(pointer: coarse)'
|
||||||
|
},
|
||||||
|
'finepointer': {
|
||||||
|
'raw': '(pointer: fine)'
|
||||||
|
},
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
screens: {
|
screens: {
|
||||||
|
@@ -23,7 +23,7 @@
|
|||||||
@template.serp.part.searchform(query = results.getParams().query(), profile = results.getProfile(), filters = results.getFilters())
|
@template.serp.part.searchform(query = results.getParams().query(), profile = results.getProfile(), filters = results.getFilters())
|
||||||
</div>
|
</div>
|
||||||
<div class="grow"></div>
|
<div class="grow"></div>
|
||||||
<button class="fixed bottom-10 right-5 sm:hidden text-sm bg-margeblue text-white p-4 rounded-xl active:text-slate-200" id="filter-button">
|
<button class="fixed bottom-10 right-5 finepointer:hidden md:hidden text-sm bg-margeblue text-white p-4 rounded-xl active:text-slate-200" id="filter-button">
|
||||||
<i class="fas fa-filter mr-3"></i>
|
<i class="fas fa-filter mr-3"></i>
|
||||||
Filters
|
Filters
|
||||||
</button>
|
</button>
|
||||||
|
@@ -3,7 +3,7 @@
|
|||||||
|
|
||||||
@param SearchFilters filters
|
@param SearchFilters filters
|
||||||
|
|
||||||
<aside class="md:w-64 py-4 shrink-0 hidden sm:block">
|
<aside class="md:w-64 py-4 shrink-0 hidden md:block finepointer:block">
|
||||||
<div class="space-y-6 sticky top-4">
|
<div class="space-y-6 sticky top-4">
|
||||||
<div class="bg-white dark:bg-gray-800 p-4 border dark:border-gray-600 border-gray-300">
|
<div class="bg-white dark:bg-gray-800 p-4 border dark:border-gray-600 border-gray-300">
|
||||||
<h2 class="font-medium mb-3 flex items-center font-serif hidden md:block">
|
<h2 class="font-medium mb-3 flex items-center font-serif hidden md:block">
|
||||||
|
@@ -9,6 +9,14 @@ module.exports = {
|
|||||||
nicotine: '#f8f8ee',
|
nicotine: '#f8f8ee',
|
||||||
margeblue: '#3e5f6f',
|
margeblue: '#3e5f6f',
|
||||||
liteblue: '#0066cc',
|
liteblue: '#0066cc',
|
||||||
|
},
|
||||||
|
screens: {
|
||||||
|
'coarsepointer': {
|
||||||
|
'raw': '(pointer: coarse)'
|
||||||
|
},
|
||||||
|
'finepointer': {
|
||||||
|
'raw': '(pointer: fine)'
|
||||||
|
},
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
screens: {
|
screens: {
|
||||||
|
Reference in New Issue
Block a user