mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-10-06 07:32:38 +02:00
Compare commits
6 Commits
deploy-009
...
deploy-009
Author | SHA1 | Date | |
---|---|---|---|
|
185bf28fca | ||
|
78cc25584a | ||
|
62ba30bacf | ||
|
3bb84eb206 | ||
|
be7d13ccce | ||
|
8c088a7c0b |
@@ -121,6 +121,7 @@ public class ServiceConfigurationModule extends AbstractModule {
|
|||||||
|
|
||||||
while (nets.hasMoreElements()) {
|
while (nets.hasMoreElements()) {
|
||||||
NetworkInterface netif = nets.nextElement();
|
NetworkInterface netif = nets.nextElement();
|
||||||
|
logger.info("Considering network interface {}: Up? {}, Loopback? {}", netif.getDisplayName(), netif.isUp(), netif.isLoopback());
|
||||||
if (!netif.isUp() || netif.isLoopback()) {
|
if (!netif.isUp() || netif.isLoopback()) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@@ -128,6 +129,7 @@ public class ServiceConfigurationModule extends AbstractModule {
|
|||||||
Enumeration<InetAddress> inetAddresses = netif.getInetAddresses();
|
Enumeration<InetAddress> inetAddresses = netif.getInetAddresses();
|
||||||
while (inetAddresses.hasMoreElements()) {
|
while (inetAddresses.hasMoreElements()) {
|
||||||
InetAddress addr = inetAddresses.nextElement();
|
InetAddress addr = inetAddresses.nextElement();
|
||||||
|
logger.info("Considering address {}: SiteLocal? {}, Loopback? {}", addr.getHostAddress(), addr.isSiteLocalAddress(), addr.isLoopbackAddress());
|
||||||
if (addr.isSiteLocalAddress() && !addr.isLoopbackAddress()) {
|
if (addr.isSiteLocalAddress() && !addr.isLoopbackAddress()) {
|
||||||
return addr.getHostAddress();
|
return addr.getHostAddress();
|
||||||
}
|
}
|
||||||
|
@@ -13,7 +13,7 @@ import java.net.InetSocketAddress;
|
|||||||
|
|
||||||
public class MetricsServer {
|
public class MetricsServer {
|
||||||
|
|
||||||
private static Logger logger = LoggerFactory.getLogger(MetricsServer.class);
|
private static final Logger logger = LoggerFactory.getLogger(MetricsServer.class);
|
||||||
|
|
||||||
@Inject
|
@Inject
|
||||||
public MetricsServer(ServiceConfiguration configuration) {
|
public MetricsServer(ServiceConfiguration configuration) {
|
||||||
@@ -30,6 +30,8 @@ public class MetricsServer {
|
|||||||
|
|
||||||
context.addServlet(new ServletHolder(new MetricsServlet()), "/metrics");
|
context.addServlet(new ServletHolder(new MetricsServlet()), "/metrics");
|
||||||
|
|
||||||
|
logger.info("MetricsServer listening on {}:{}", configuration.bindAddress(), configuration.metricsPort());
|
||||||
|
|
||||||
server.start();
|
server.start();
|
||||||
}
|
}
|
||||||
catch (Exception|NoSuchMethodError ex) {
|
catch (Exception|NoSuchMethodError ex) {
|
||||||
|
@@ -266,11 +266,11 @@ public class CrawlerMain extends ProcessMainClass {
|
|||||||
|
|
||||||
// Start every task we currently can from the deferral list
|
// Start every task we currently can from the deferral list
|
||||||
deferredTasks.removeIf(task -> {
|
deferredTasks.removeIf(task -> {
|
||||||
if (pendingCrawlTasks.putIfAbsent(crawlSpec.domain(), task) != null) {
|
|
||||||
return true; // task has already run, duplicate in crawl specs
|
|
||||||
}
|
|
||||||
|
|
||||||
if (task.canRun()) {
|
if (task.canRun()) {
|
||||||
|
if (pendingCrawlTasks.putIfAbsent(crawlSpec.domain(), task) != null) {
|
||||||
|
return true; // task has already run, duplicate in crawl specs
|
||||||
|
}
|
||||||
|
|
||||||
// This blocks the caller when the pool is full
|
// This blocks the caller when the pool is full
|
||||||
pool.submitQuietly(task);
|
pool.submitQuietly(task);
|
||||||
return true;
|
return true;
|
||||||
@@ -280,7 +280,7 @@ public class CrawlerMain extends ProcessMainClass {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
// Schedule any lingering tasks
|
// Schedule any lingering tasks for immediate execution
|
||||||
for (var task : deferredTasks) {
|
for (var task : deferredTasks) {
|
||||||
if (pendingCrawlTasks.putIfAbsent(task.domain, task) != null)
|
if (pendingCrawlTasks.putIfAbsent(task.domain, task) != null)
|
||||||
continue;
|
continue;
|
||||||
@@ -522,7 +522,7 @@ public class CrawlerMain extends ProcessMainClass {
|
|||||||
//
|
//
|
||||||
// This must be synchronized as chewing through parquet files in parallel leads to enormous memory overhead
|
// This must be synchronized as chewing through parquet files in parallel leads to enormous memory overhead
|
||||||
private synchronized Path migrateParquetData(Path inputPath, String domain, Path crawlDataRoot) throws IOException {
|
private synchronized Path migrateParquetData(Path inputPath, String domain, Path crawlDataRoot) throws IOException {
|
||||||
if (!inputPath.endsWith(".parquet")) {
|
if (!inputPath.toString().endsWith(".parquet")) {
|
||||||
return inputPath;
|
return inputPath;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -60,15 +60,7 @@ public class HttpFetcherImpl implements HttpFetcher {
|
|||||||
.cookieHandler(cookies)
|
.cookieHandler(cookies)
|
||||||
.followRedirects(HttpClient.Redirect.NORMAL)
|
.followRedirects(HttpClient.Redirect.NORMAL)
|
||||||
.connectTimeout(Duration.ofSeconds(8))
|
.connectTimeout(Duration.ofSeconds(8))
|
||||||
.executor(Executors.newCachedThreadPool(
|
.executor(Executors.newCachedThreadPool())
|
||||||
r -> Thread.ofPlatform()
|
|
||||||
.name("FetcherClient")
|
|
||||||
.daemon(true)
|
|
||||||
.uncaughtExceptionHandler((t, ex) -> {
|
|
||||||
logger.error("Uncaught Exception in " + t.getName(), ex);
|
|
||||||
})
|
|
||||||
.start(r)
|
|
||||||
))
|
|
||||||
.build();
|
.build();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -42,18 +42,20 @@ public interface SerializableCrawlDataStream extends AutoCloseable {
|
|||||||
{
|
{
|
||||||
|
|
||||||
String fileName = fullPath.getFileName().toString();
|
String fileName = fullPath.getFileName().toString();
|
||||||
if (fileName.endsWith(".parquet")) {
|
|
||||||
|
if (fileName.endsWith(".slop.zip")) {
|
||||||
try {
|
try {
|
||||||
return new ParquetSerializableCrawlDataStream(fullPath);
|
return new SlopSerializableCrawlDataStream(fullPath);
|
||||||
} catch (Exception ex) {
|
} catch (Exception ex) {
|
||||||
logger.error("Error reading domain data from " + fullPath, ex);
|
logger.error("Error reading domain data from " + fullPath, ex);
|
||||||
return SerializableCrawlDataStream.empty();
|
return SerializableCrawlDataStream.empty();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (fileName.endsWith(".slop.zip")) {
|
else if (fileName.endsWith(".parquet")) {
|
||||||
|
logger.error("Opening deprecated parquet-style crawl data stream", new Exception());
|
||||||
try {
|
try {
|
||||||
return new SlopSerializableCrawlDataStream(fullPath);
|
return new ParquetSerializableCrawlDataStream(fullPath);
|
||||||
} catch (Exception ex) {
|
} catch (Exception ex) {
|
||||||
logger.error("Error reading domain data from " + fullPath, ex);
|
logger.error("Error reading domain data from " + fullPath, ex);
|
||||||
return SerializableCrawlDataStream.empty();
|
return SerializableCrawlDataStream.empty();
|
||||||
|
Reference in New Issue
Block a user