mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-10-06 07:32:38 +02:00
Compare commits
11 Commits
deploy-029
...
deploy-030
Author | SHA1 | Date | |
---|---|---|---|
|
9d3f9adb05 | ||
|
a43a1773f1 | ||
|
1e7a3a3c4f | ||
|
62b696b1c3 | ||
|
f1a900f383 | ||
|
700364b86d | ||
|
7e725ddaed | ||
|
120209e138 | ||
|
a771a5b6ce | ||
|
dac5b54128 | ||
|
6cfb143c15 |
@@ -7,7 +7,6 @@ public enum ServiceId {
|
||||
Search("search-service"),
|
||||
Index("index-service"),
|
||||
Query("query-service"),
|
||||
Executor("executor-service"),
|
||||
|
||||
Control("control-service"),
|
||||
|
||||
|
@@ -189,7 +189,7 @@ public class ExecutorClient {
|
||||
String uriPath = "/transfer/file/" + fileStorage.id();
|
||||
String uriQuery = "path=" + URLEncoder.encode(path, StandardCharsets.UTF_8);
|
||||
|
||||
var endpoints = registry.getEndpoints(ServiceKey.forRest(ServiceId.Executor, fileStorage.node()));
|
||||
var endpoints = registry.getEndpoints(ServiceKey.forRest(ServiceId.Index, fileStorage.node()));
|
||||
if (endpoints.isEmpty()) {
|
||||
throw new RuntimeException("No endpoints for node " + fileStorage.node());
|
||||
}
|
||||
|
@@ -1,4 +1,4 @@
|
||||
package nu.marginalia.executor;
|
||||
package nu.marginalia.svc;
|
||||
|
||||
import com.google.inject.Inject;
|
||||
import nu.marginalia.storage.FileStorageService;
|
@@ -1,5 +1,5 @@
|
||||
The execution subsystem is responsible for the execution of long running tasks on each
|
||||
index node. It lives in the [executor-service](../services-core/executor-service) module.
|
||||
index node. It lives in the [index-service](../services-core/index-service) module.
|
||||
|
||||
It accomplishes this using the [message queue and actor library](../libraries/message-queue/),
|
||||
which permits program state to survive crashes and reboots.
|
||||
|
@@ -1,4 +1,4 @@
|
||||
package nu.marginalia.executor;
|
||||
package nu.marginalia.svc;
|
||||
|
||||
import nu.marginalia.storage.FileStorageService;
|
||||
import nu.marginalia.storage.model.FileStorage;
|
@@ -47,6 +47,21 @@ public class DomSampleClient {
|
||||
}
|
||||
}
|
||||
|
||||
public Optional<RpcDomainSampleRequests> getSampleRequests(String domainName) {
|
||||
try {
|
||||
var val = channelPool.call(DomSampleApiGrpc.DomSampleApiBlockingStub::getSampleRequests)
|
||||
.run(RpcDomainName.newBuilder().setDomainName(domainName).build());
|
||||
|
||||
return Optional.of(val);
|
||||
}
|
||||
catch (StatusRuntimeException sre) {
|
||||
if (sre.getStatus() != Status.NOT_FOUND) {
|
||||
logger.error("Failed to fetch DOM sample", sre);
|
||||
}
|
||||
return Optional.empty();
|
||||
}
|
||||
}
|
||||
|
||||
public boolean hasSample(String domainName) {
|
||||
try {
|
||||
return channelPool.call(DomSampleApiGrpc.DomSampleApiBlockingStub::hasSample)
|
||||
|
@@ -7,6 +7,7 @@ option java_multiple_files=true;
|
||||
|
||||
service DomSampleApi {
|
||||
rpc getSample(RpcDomainName) returns (RpcDomainSample) {}
|
||||
rpc getSampleRequests(RpcDomainName) returns (RpcDomainSampleRequests) {}
|
||||
rpc hasSample(RpcDomainName) returns (RpcBooleanRsp) {}
|
||||
rpc getAllSamples(RpcDomainName) returns (stream RpcDomainSample) {}
|
||||
}
|
||||
@@ -19,10 +20,16 @@ message RpcBooleanRsp {
|
||||
bool answer = 1;
|
||||
}
|
||||
|
||||
message RpcDomainSampleRequests {
|
||||
string domainName = 1;
|
||||
string url = 2;
|
||||
repeated RpcOutgoingRequest outgoingRequests = 5;
|
||||
}
|
||||
|
||||
message RpcDomainSample {
|
||||
string domainName = 1;
|
||||
string url = 2;
|
||||
string htmlSample = 3;
|
||||
bytes htmlSampleZstd = 3;
|
||||
bool accepted_popover = 4;
|
||||
repeated RpcOutgoingRequest outgoingRequests = 5;
|
||||
}
|
||||
|
@@ -31,6 +31,7 @@ dependencies {
|
||||
implementation libs.jsoup
|
||||
implementation libs.opencsv
|
||||
implementation libs.slop
|
||||
implementation libs.zstd
|
||||
implementation libs.sqlite
|
||||
implementation libs.bundles.slf4j
|
||||
implementation libs.commons.lang3
|
||||
|
@@ -1,6 +1,8 @@
|
||||
package nu.marginalia.domsample;
|
||||
|
||||
import com.github.luben.zstd.Zstd;
|
||||
import com.google.inject.Inject;
|
||||
import com.google.protobuf.ByteString;
|
||||
import io.grpc.Status;
|
||||
import io.grpc.stub.StreamObserver;
|
||||
import nu.marginalia.api.domsample.*;
|
||||
@@ -9,6 +11,7 @@ import nu.marginalia.service.server.DiscoverableService;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.List;
|
||||
|
||||
public class DomSampleGrpcService
|
||||
@@ -42,7 +45,36 @@ public class DomSampleGrpcService
|
||||
}
|
||||
|
||||
// Grab the first sample
|
||||
RpcDomainSample.Builder response = convert(dbRecords.getFirst());
|
||||
RpcDomainSample.Builder response = convertFullSample(dbRecords.getFirst());
|
||||
|
||||
responseObserver.onNext(response.build());
|
||||
responseObserver.onCompleted();
|
||||
}
|
||||
catch (Exception e) {
|
||||
logger.error("Error in getSample()", e);
|
||||
responseObserver.onError(Status.INTERNAL.withCause(e).asRuntimeException());
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void getSampleRequests(RpcDomainName request, StreamObserver<RpcDomainSampleRequests> responseObserver) {
|
||||
String domainName = request.getDomainName();
|
||||
if (domainName.isBlank()) {
|
||||
responseObserver.onError(Status.INVALID_ARGUMENT
|
||||
.withDescription("Invalid domain name")
|
||||
.asRuntimeException());
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
List<DomSampleDb.Sample> dbRecords = domSampleDb.getSamples(domainName);
|
||||
if (dbRecords.isEmpty()) {
|
||||
responseObserver.onError(Status.NOT_FOUND.withDescription("No sample found").asRuntimeException());
|
||||
return;
|
||||
}
|
||||
|
||||
// Grab the first sample
|
||||
RpcDomainSampleRequests.Builder response = convertRequestData(dbRecords.getFirst());
|
||||
|
||||
responseObserver.onNext(response.build());
|
||||
responseObserver.onCompleted();
|
||||
@@ -87,7 +119,7 @@ public class DomSampleGrpcService
|
||||
List<DomSampleDb.Sample> dbRecords = domSampleDb.getSamples(domainName);
|
||||
|
||||
for (var record : dbRecords) {
|
||||
responseObserver.onNext(convert(record).build());
|
||||
responseObserver.onNext(convertFullSample(record).build());
|
||||
}
|
||||
|
||||
responseObserver.onCompleted();
|
||||
@@ -98,12 +130,14 @@ public class DomSampleGrpcService
|
||||
}
|
||||
}
|
||||
|
||||
private RpcDomainSample.Builder convert(DomSampleDb.Sample dbSample) {
|
||||
private RpcDomainSample.Builder convertFullSample(DomSampleDb.Sample dbSample) {
|
||||
|
||||
ByteString htmlZstd = ByteString.copyFrom(Zstd.compress(dbSample.sample().getBytes(StandardCharsets.UTF_8)));
|
||||
|
||||
var sampleBuilder = RpcDomainSample.newBuilder()
|
||||
.setDomainName(dbSample.domain())
|
||||
.setAcceptedPopover(dbSample.acceptedPopover())
|
||||
.setHtmlSample(dbSample.sample());
|
||||
.setHtmlSampleZstd(htmlZstd);
|
||||
|
||||
for (var req : dbSample.parseRequests()) {
|
||||
sampleBuilder.addOutgoingRequestsBuilder()
|
||||
@@ -120,4 +154,23 @@ public class DomSampleGrpcService
|
||||
return sampleBuilder;
|
||||
}
|
||||
|
||||
private RpcDomainSampleRequests.Builder convertRequestData(DomSampleDb.Sample dbSample) {
|
||||
|
||||
var sampleBuilder = RpcDomainSampleRequests.newBuilder()
|
||||
.setDomainName(dbSample.domain());
|
||||
|
||||
for (var req : dbSample.parseRequests()) {
|
||||
sampleBuilder.addOutgoingRequestsBuilder()
|
||||
.setUrl(req.uri().toString())
|
||||
.setMethod(switch (req.method().toUpperCase())
|
||||
{
|
||||
case "GET" -> RpcOutgoingRequest.RequestMethod.GET;
|
||||
case "POST" -> RpcOutgoingRequest.RequestMethod.POST;
|
||||
default -> RpcOutgoingRequest.RequestMethod.OTHER;
|
||||
})
|
||||
.setTimestamp(req.timestamp());
|
||||
}
|
||||
|
||||
return sampleBuilder;
|
||||
}
|
||||
}
|
||||
|
@@ -87,7 +87,7 @@ class FeedFetcherServiceTest extends AbstractModule {
|
||||
bind(DomainCoordinator.class).to(LocalDomainCoordinator.class);
|
||||
bind(HikariDataSource.class).toInstance(dataSource);
|
||||
bind(ServiceRegistryIf.class).toInstance(Mockito.mock(ServiceRegistryIf.class));
|
||||
bind(ServiceConfiguration.class).toInstance(new ServiceConfiguration(ServiceId.Executor, 1, "", "", 0, UUID.randomUUID()));
|
||||
bind(ServiceConfiguration.class).toInstance(new ServiceConfiguration(ServiceId.Index, 1, "", "", 0, UUID.randomUUID()));
|
||||
bind(Integer.class).annotatedWith(Names.named("wmsa-system-node")).toInstance(1);
|
||||
}
|
||||
|
||||
|
@@ -22,6 +22,7 @@ dependencies {
|
||||
|
||||
implementation libs.bundles.slf4j
|
||||
implementation libs.guava
|
||||
implementation libs.zstd
|
||||
implementation dependencies.create(libs.guice.get()) {
|
||||
exclude group: 'com.google.guava'
|
||||
}
|
||||
|
@@ -1,5 +1,6 @@
|
||||
package nu.marginalia.domclassifier;
|
||||
|
||||
import com.github.luben.zstd.ZstdInputStream;
|
||||
import com.google.inject.Inject;
|
||||
import com.google.inject.Singleton;
|
||||
import nu.marginalia.api.domsample.RpcDomainSample;
|
||||
@@ -19,6 +20,7 @@ import javax.xml.parsers.ParserConfigurationException;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.net.URISyntaxException;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.*;
|
||||
import java.util.function.Predicate;
|
||||
import java.util.regex.Pattern;
|
||||
@@ -84,8 +86,9 @@ public class DomSampleClassifier {
|
||||
|
||||
EdgeDomain sampleDomain = new EdgeDomain(sample.getDomainName());
|
||||
|
||||
try {
|
||||
var parsedDoc = Jsoup.parse(sample.getHtmlSample());
|
||||
try (var compressedStream = new ZstdInputStream(sample.getHtmlSampleZstd().newInput())) {
|
||||
String html = new String(compressedStream.readAllBytes(), StandardCharsets.UTF_8);
|
||||
var parsedDoc = Jsoup.parse(html);
|
||||
var fixedElements = parsedDoc.select("*[data-position=fixed]");
|
||||
|
||||
if (sample.getAcceptedPopover()) {
|
||||
@@ -104,7 +107,7 @@ public class DomSampleClassifier {
|
||||
}
|
||||
}
|
||||
catch (Exception ex) {
|
||||
logger.warn("Error when parsing DOM HTML sample");
|
||||
logger.warn("Error when parsing DOM HTML sample", ex);
|
||||
}
|
||||
|
||||
// Classify outgoing requests
|
||||
|
@@ -12,4 +12,5 @@ class DDGTrackerDataTest {
|
||||
data.getDomainInfo("hotjar.com").ifPresent(System.out::println);
|
||||
data.getAllClassifications().forEach(System.out::println);
|
||||
}
|
||||
|
||||
}
|
@@ -102,7 +102,8 @@ public class DomainProcessor {
|
||||
.thenApply(domSampleClassifier::classifySample)
|
||||
.handle((a,b) -> {
|
||||
if (b != null) {
|
||||
if (!(b instanceof StatusRuntimeException sre && sre.getStatus() != Status.NOT_FOUND)) {
|
||||
var cause = b.getCause();
|
||||
if (!(cause instanceof StatusRuntimeException sre && sre.getStatus() != Status.NOT_FOUND)) {
|
||||
logger.warn("Exception when fetching sample data", b);
|
||||
}
|
||||
return EnumSet.of(DomSampleClassification.UNCLASSIFIED);
|
||||
|
@@ -13,13 +13,13 @@ A map of the most important components and how they relate can be found below.
|
||||

|
||||
|
||||
The core part of the search engine is the index service, which is responsible for storing and retrieving
|
||||
the document data. The index serive is partitioned, along with the executor service, which is responsible for executing
|
||||
processes. At least one instance of each service must be run, but more can be run
|
||||
the document data. The index service is partitioned and is responsible for both index lookups and spawning
|
||||
per-partition processing tasks. At least one instance of each service must be run, but more can be run
|
||||
alongside. Multiple partitions is desirable in production to distribute load across multiple physical drives,
|
||||
as well as reducing the impact of downtime.
|
||||
|
||||
Search queries are delegated via the query service, which is a proxy that fans out the query to all
|
||||
eligible index services. The control service is responsible for distributing commands to the executor
|
||||
eligible index services. The control service is responsible for distributing commands to the partitions
|
||||
service, and for monitoring the health of the system. It also offers a web interface for operating the system.
|
||||
|
||||
### Services
|
||||
@@ -32,7 +32,6 @@ service, and for monitoring the health of the system. It also offers a web inte
|
||||
* [index](services-core/index-service)
|
||||
* Exposes the [index](index) subsystem
|
||||
* Exposes the [functions/link-graph](functions/link-graph) subsystem
|
||||
* [executor](services-core/executor-service)
|
||||
* Exposes the [execution](execution) subsystem
|
||||
* [assistant](services-core/assistant-service)
|
||||
* Exposes the [functions/math](functions/math) subsystem
|
||||
@@ -57,7 +56,7 @@ Services that expose HTTP endpoints tend to have more code. They are marked wit
|
||||
### Processes
|
||||
|
||||
Processes are batch jobs that deal with data retrieval, processing and loading. These are spawned and orchestrated by
|
||||
the executor service, which is controlled by the control service.
|
||||
the index service, which is controlled by the control service.
|
||||
|
||||
* [processes](processes/)
|
||||
* [crawling-process](processes/crawling-process)
|
||||
|
@@ -11,7 +11,7 @@ import nu.marginalia.api.domains.DomainInfoClient;
|
||||
import nu.marginalia.api.domains.model.DomainInformation;
|
||||
import nu.marginalia.api.domains.model.SimilarDomain;
|
||||
import nu.marginalia.api.domsample.DomSampleClient;
|
||||
import nu.marginalia.api.domsample.RpcDomainSample;
|
||||
import nu.marginalia.api.domsample.RpcDomainSampleRequests;
|
||||
import nu.marginalia.api.domsample.RpcOutgoingRequest;
|
||||
import nu.marginalia.api.feeds.FeedsClient;
|
||||
import nu.marginalia.api.feeds.RpcFeed;
|
||||
@@ -399,7 +399,7 @@ public class SearchSiteInfoService {
|
||||
return forServiceUnavailable(domainName);
|
||||
}
|
||||
|
||||
Optional<RpcDomainSample> sample = domSampleClient.getSample(domainName.toLowerCase());
|
||||
Optional<RpcDomainSampleRequests> sample = domSampleClient.getSampleRequests(domainName.toLowerCase());
|
||||
if (sample.isEmpty()) {
|
||||
return forNoData(domainName);
|
||||
}
|
||||
|
@@ -38,8 +38,8 @@
|
||||
<a href="https://old-search.marginalia.nu/" class="underline text-liteblue dark:text-blue-200">here</a>.
|
||||
</div>
|
||||
</div>
|
||||
<div class="mx-auto flex flex-col sm:flex-row my-4 sm:space-x-2 space-y-2 sm:space-y-0 w-full md:w-auto px-2 items-center sm:items-stretch">
|
||||
<div class="flex flex-col border border-gray-300 dark:border-gray-600 rounded overflow-hidden dark:bg-gray-800 bg-white p-8 sm:p-4 space-y-3 w-96 sm:w-64">
|
||||
<div class="mx-auto px-8 flex flex-col sm:flex-row my-4 sm:space-x-2 space-y-2 sm:space-y-0 w-full md:w-auto items-center sm:items-stretch">
|
||||
<div class="flex flex-col items-center border border-gray-300 dark:border-gray-600 rounded overflow-hidden dark:bg-gray-800 bg-white p-8 sm:p-4 space-y-3 w-[300px] sm:w-64">
|
||||
<div><i class="fas fa-sailboat mx-2 text-margeblue dark:text-slate-200"></i>Explore the Web</div>
|
||||
<ul class="list-disc ml-8 sm:ml-6 text-slate-700 dark:text-white text-xs leading-5">
|
||||
<li>Prioritizes non-commercial content</li>
|
||||
@@ -48,14 +48,14 @@
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
<div class="flex flex-col border border-gray-300 dark:border-gray-600 rounded overflow-hidden dark:bg-gray-800 bg-white p-8 sm:p-4 space-y-3 w-96 sm:w-64">
|
||||
<div class="flex flex-col items-center border border-gray-300 dark:border-gray-600 rounded overflow-hidden dark:bg-gray-800 bg-white p-8 sm:p-4 space-y-3 w-[300px] sm:w-64">
|
||||
<div><i class="fas fa-hand-holding-hand mx-2 text-margeblue dark:text-slate-200"></i>Open Source</div>
|
||||
<ul class="list-disc ml-8 sm:ml-6 text-slate-700 dark:text-white text-xs leading-5">
|
||||
<li>Custom index and crawler software</li>
|
||||
<li>Simple technology, no AI</li>
|
||||
<li>AGPL license</li>
|
||||
</ul>
|
||||
<div class="flex pt-4 gap-2">
|
||||
<div class="flex pt-4 gap-2 flex-col md:flex-row">
|
||||
<div class="text-xs text-liteblue dark:text-blue-200">
|
||||
<i class="fa-brands fa-github"></i>
|
||||
<a href="https://git.marginalia.nu/" class="underline">Git Repository</a>
|
||||
@@ -67,7 +67,7 @@
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="flex flex-col border border-gray-300 dark:border-gray-600 rounded overflow-hidden dark:bg-gray-800 bg-white p-8 sm:p-4 space-y-3 w-96 sm:w-64">
|
||||
<div class="flex flex-col items-center border border-gray-300 dark:border-gray-600 rounded overflow-hidden dark:bg-gray-800 bg-white p-8 sm:p-4 space-y-3 w-[300px] sm:w-64">
|
||||
<div><i class="fas fa-lock mx-2 text-margeblue dark:text-slate-200"></i> Privacy by default</div>
|
||||
<ul class="list-disc ml-8 sm:ml-6 text-slate-700 dark:text-white text-xs leading-5">
|
||||
<li>Filter out tracking </li>
|
||||
|
@@ -2,7 +2,7 @@ package nu.marginalia.control.node.model;
|
||||
|
||||
import nu.marginalia.nodecfg.model.NodeConfiguration;
|
||||
|
||||
public record IndexNodeStatus(NodeConfiguration configuration, boolean indexServiceOnline, boolean executorServiceOnline) {
|
||||
public record IndexNodeStatus(NodeConfiguration configuration, boolean indexServiceOnline) {
|
||||
public int id() {
|
||||
return configuration.node();
|
||||
}
|
||||
|
@@ -338,7 +338,7 @@ public class ControlNodeService {
|
||||
}
|
||||
|
||||
private List<EventLogEntry> getEvents(int nodeId) {
|
||||
List<String> services = List.of(ServiceId.Index.serviceName +":"+nodeId, ServiceId.Executor.serviceName +":"+nodeId);
|
||||
List<String> services = List.of(ServiceId.Index.serviceName +":"+nodeId);
|
||||
List<EventLogEntry> events = new ArrayList<>(20);
|
||||
for (var service :services) {
|
||||
events.addAll(eventLogService.getLastEntriesForService(service, Long.MAX_VALUE, 10));
|
||||
@@ -358,8 +358,7 @@ public class ControlNodeService {
|
||||
|
||||
public IndexNodeStatus getStatus(NodeConfiguration config) {
|
||||
return new IndexNodeStatus(config,
|
||||
monitors.isServiceUp(ServiceId.Index, config.node()),
|
||||
monitors.isServiceUp(ServiceId.Executor, config.node())
|
||||
monitors.isServiceUp(ServiceId.Index, config.node())
|
||||
);
|
||||
}
|
||||
|
||||
|
@@ -2,7 +2,7 @@
|
||||
<h2>Nodes</h2>
|
||||
<table class="table">
|
||||
<tr>
|
||||
<th>Node</th><th>Profile</th><th>Queries</th><th>Enabled</th><th>Index</th><th>Executor</th>
|
||||
<th>Node</th><th>Profile</th><th>Queries</th><th>Enabled</th><th>Index</th>
|
||||
</tr>
|
||||
{{#each .}}
|
||||
<tr>
|
||||
@@ -24,9 +24,6 @@
|
||||
</td>
|
||||
{{#if indexServiceOnline}}<td>Online</td>{{/if}}
|
||||
{{#unless indexServiceOnline}}<td class="table-danger">Offline</td>{{/unless}}
|
||||
|
||||
{{#if executorServiceOnline}}<td>Online</td>{{/if}}
|
||||
{{#unless executorServiceOnline}}<td class="table-warning">Offline</td>{{/unless}}
|
||||
</tr>
|
||||
{{/each}}
|
||||
</table>
|
||||
|
@@ -1,100 +0,0 @@
|
||||
plugins {
|
||||
id 'java'
|
||||
|
||||
id 'application'
|
||||
id 'jvm-test-suite'
|
||||
id 'com.google.cloud.tools.jib' version '3.4.5'
|
||||
}
|
||||
|
||||
application {
|
||||
mainClass = 'nu.marginalia.executor.ExecutorMain'
|
||||
applicationName = 'executor-service'
|
||||
}
|
||||
|
||||
tasks.distZip.enabled = false
|
||||
|
||||
java {
|
||||
toolchain {
|
||||
languageVersion.set(JavaLanguageVersion.of(rootProject.ext.jvmVersion))
|
||||
}
|
||||
}
|
||||
|
||||
apply from: "$rootProject.projectDir/srcsets.gradle"
|
||||
apply from: "$rootProject.projectDir/docker.gradle"
|
||||
dependencies {
|
||||
// These look weird but they're needed to be able to spawn the processes
|
||||
// from the executor service
|
||||
|
||||
implementation project(':code:processes:crawling-process')
|
||||
implementation project(':code:processes:loading-process')
|
||||
implementation project(':code:processes:converting-process')
|
||||
implementation project(':code:processes:index-constructor-process')
|
||||
|
||||
implementation project(':code:common:config')
|
||||
implementation project(':code:common:model')
|
||||
implementation project(':code:common:db')
|
||||
implementation project(':code:common:linkdb')
|
||||
|
||||
implementation project(':code:common:service')
|
||||
|
||||
implementation project(':third-party:commons-codec')
|
||||
|
||||
implementation project(':code:libraries:message-queue')
|
||||
|
||||
implementation project(':code:functions:link-graph:api')
|
||||
implementation project(':code:functions:favicon')
|
||||
implementation project(':code:functions:favicon:api')
|
||||
implementation project(':code:functions:nsfw-domain-filter')
|
||||
|
||||
implementation project(':code:processes:crawling-process:model')
|
||||
implementation project(':code:processes:crawling-process:model')
|
||||
implementation project(':code:processes:crawling-process:ft-link-parser')
|
||||
implementation project(':code:index:index-journal')
|
||||
implementation project(':code:index:api')
|
||||
implementation project(':code:processes:process-mq-api')
|
||||
|
||||
implementation project(':code:execution')
|
||||
implementation project(':code:execution:api')
|
||||
|
||||
implementation project(':third-party:encyclopedia-marginalia-nu')
|
||||
|
||||
implementation libs.bundles.slf4j
|
||||
|
||||
implementation dependencies.create(libs.spark.get()) {
|
||||
exclude group: 'org.eclipse.jetty'
|
||||
}
|
||||
implementation libs.bundles.jetty
|
||||
implementation libs.guava
|
||||
libs.bundles.grpc.get().each {
|
||||
implementation dependencies.create(it) {
|
||||
exclude group: 'com.google.guava'
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
implementation libs.gson
|
||||
implementation libs.prometheus
|
||||
implementation libs.notnull
|
||||
implementation libs.guava
|
||||
implementation dependencies.create(libs.guice.get()) {
|
||||
exclude group: 'com.google.guava'
|
||||
}
|
||||
implementation libs.trove
|
||||
implementation libs.zstd
|
||||
implementation libs.jsoup
|
||||
implementation libs.commons.io
|
||||
implementation libs.commons.compress
|
||||
implementation libs.commons.lang3
|
||||
implementation libs.bundles.mariadb
|
||||
|
||||
testImplementation libs.bundles.slf4j.test
|
||||
testImplementation libs.bundles.junit
|
||||
testImplementation libs.mockito
|
||||
|
||||
testImplementation platform('org.testcontainers:testcontainers-bom:1.17.4')
|
||||
testImplementation libs.commons.codec
|
||||
testImplementation 'org.testcontainers:mariadb:1.17.4'
|
||||
testImplementation 'org.testcontainers:junit-jupiter:1.17.4'
|
||||
testImplementation project(':code:libraries:test-helpers')
|
||||
|
||||
}
|
@@ -1,45 +0,0 @@
|
||||
package nu.marginalia.executor;
|
||||
|
||||
import com.google.inject.Guice;
|
||||
import com.google.inject.Inject;
|
||||
import com.google.inject.Injector;
|
||||
import nu.marginalia.nsfw.NsfwFilterModule;
|
||||
import nu.marginalia.service.MainClass;
|
||||
import nu.marginalia.service.ServiceId;
|
||||
import nu.marginalia.service.discovery.ServiceRegistryIf;
|
||||
import nu.marginalia.service.module.DatabaseModule;
|
||||
import nu.marginalia.service.module.ServiceConfiguration;
|
||||
import nu.marginalia.service.module.ServiceConfigurationModule;
|
||||
import nu.marginalia.service.module.ServiceDiscoveryModule;
|
||||
import nu.marginalia.service.server.Initialization;
|
||||
import nu.marginalia.service.server.NodeStatusWatcher;
|
||||
|
||||
public class ExecutorMain extends MainClass {
|
||||
private final ExecutorSvc service;
|
||||
|
||||
@Inject
|
||||
public ExecutorMain(ExecutorSvc service) {
|
||||
this.service = service;
|
||||
}
|
||||
|
||||
public static void main(String... args) {
|
||||
init(ServiceId.Executor, args);
|
||||
|
||||
Injector injector = Guice.createInjector(
|
||||
new ExecutorModule(),
|
||||
new DatabaseModule(false),
|
||||
new NsfwFilterModule(),
|
||||
new ServiceDiscoveryModule(),
|
||||
new ServiceConfigurationModule(ServiceId.Executor)
|
||||
);
|
||||
|
||||
// Orchestrate the boot order for the services
|
||||
var registry = injector.getInstance(ServiceRegistryIf.class);
|
||||
var configuration = injector.getInstance(ServiceConfiguration.class);
|
||||
orchestrateBoot(registry, configuration);
|
||||
|
||||
injector.getInstance(NodeStatusWatcher.class);
|
||||
injector.getInstance(ExecutorMain.class);
|
||||
injector.getInstance(Initialization.class).setReady();
|
||||
}
|
||||
}
|
@@ -1,8 +0,0 @@
|
||||
package nu.marginalia.executor;
|
||||
|
||||
import com.google.inject.AbstractModule;
|
||||
|
||||
public class ExecutorModule extends AbstractModule {
|
||||
public void configure() {
|
||||
}
|
||||
}
|
@@ -1,55 +0,0 @@
|
||||
package nu.marginalia.executor;
|
||||
|
||||
import com.google.inject.Inject;
|
||||
import nu.marginalia.execution.*;
|
||||
import nu.marginalia.functions.favicon.FaviconGrpcService;
|
||||
import nu.marginalia.service.discovery.property.ServicePartition;
|
||||
import nu.marginalia.service.server.BaseServiceParams;
|
||||
import nu.marginalia.service.server.SparkService;
|
||||
import nu.marginalia.service.server.mq.MqRequest;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import spark.Spark;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
// Weird name for this one to not have clashes with java.util.concurrent.ExecutorService
|
||||
public class ExecutorSvc extends SparkService {
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(ExecutorSvc.class);
|
||||
private final ExecutionInit executionInit;
|
||||
|
||||
@Inject
|
||||
public ExecutorSvc(BaseServiceParams params,
|
||||
ExecutorGrpcService executorGrpcService,
|
||||
ExecutorCrawlGrpcService executorCrawlGrpcService,
|
||||
ExecutorSideloadGrpcService executorSideloadGrpcService,
|
||||
ExecutorExportGrpcService executorExportGrpcService,
|
||||
FaviconGrpcService faviconGrpcService,
|
||||
ExecutionInit executionInit,
|
||||
ExecutorFileTransferService fileTransferService) throws Exception {
|
||||
super(params,
|
||||
ServicePartition.partition(params.configuration.node()),
|
||||
List.of(executorGrpcService,
|
||||
executorCrawlGrpcService,
|
||||
executorSideloadGrpcService,
|
||||
executorExportGrpcService,
|
||||
faviconGrpcService)
|
||||
);
|
||||
|
||||
this.executionInit = executionInit;
|
||||
|
||||
Spark.get("/transfer/file/:fid", fileTransferService::transferFile);
|
||||
Spark.head("/transfer/file/:fid", fileTransferService::transferFile);
|
||||
}
|
||||
|
||||
@MqRequest(endpoint="FIRST-BOOT")
|
||||
public void setUpDefaultActors(String message) throws Exception {
|
||||
logger.info("Initializing default actors");
|
||||
|
||||
executionInit.initDefaultActors();
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
@@ -1,10 +0,0 @@
|
||||
The executor service is a partitioned service responsible for executing and keeping
|
||||
track of long-running maintenance and operational tasks, such as crawling or data
|
||||
processing.
|
||||
|
||||
The executor service is closely linked to the [control-service](../control-service),
|
||||
which provides a user interface for much of the executor's functionality.
|
||||
|
||||
The service it itself relatively bare of code, but imports and exposes the [execution subsystem](../../execution),
|
||||
which is responsible for the actual execution of tasks.
|
||||
|
@@ -30,10 +30,16 @@ dependencies {
|
||||
implementation project(':code:common:db')
|
||||
implementation project(':code:common:linkdb')
|
||||
|
||||
implementation project(':code:execution')
|
||||
implementation project(':code:execution:api')
|
||||
implementation project(':code:functions:favicon')
|
||||
implementation project(':code:functions:favicon:api')
|
||||
|
||||
implementation project(':code:index')
|
||||
implementation project(':code:functions:link-graph:partition')
|
||||
implementation project(':code:functions:link-graph:api')
|
||||
implementation project(':code:functions:search-query:api')
|
||||
implementation project(':code:functions:nsfw-domain-filter')
|
||||
implementation project(':code:index:api')
|
||||
|
||||
testImplementation project(path: ':code:services-core:control-service')
|
||||
|
@@ -3,13 +3,14 @@ package nu.marginalia.index;
|
||||
import com.google.inject.Guice;
|
||||
import com.google.inject.Inject;
|
||||
import com.google.inject.Injector;
|
||||
import nu.marginalia.nsfw.NsfwFilterModule;
|
||||
import nu.marginalia.service.MainClass;
|
||||
import nu.marginalia.service.discovery.ServiceRegistryIf;
|
||||
import nu.marginalia.service.module.ServiceConfiguration;
|
||||
import nu.marginalia.service.module.ServiceDiscoveryModule;
|
||||
import nu.marginalia.service.ServiceId;
|
||||
import nu.marginalia.service.module.ServiceConfigurationModule;
|
||||
import nu.marginalia.service.discovery.ServiceRegistryIf;
|
||||
import nu.marginalia.service.module.DatabaseModule;
|
||||
import nu.marginalia.service.module.ServiceConfiguration;
|
||||
import nu.marginalia.service.module.ServiceConfigurationModule;
|
||||
import nu.marginalia.service.module.ServiceDiscoveryModule;
|
||||
import nu.marginalia.service.server.Initialization;
|
||||
import nu.marginalia.service.server.NodeStatusWatcher;
|
||||
|
||||
@@ -28,6 +29,7 @@ public class IndexMain extends MainClass {
|
||||
new IndexModule(),
|
||||
new DatabaseModule(false),
|
||||
new ServiceDiscoveryModule(),
|
||||
new NsfwFilterModule(),
|
||||
new ServiceConfigurationModule(ServiceId.Index)
|
||||
);
|
||||
|
||||
|
@@ -2,6 +2,8 @@ package nu.marginalia.index;
|
||||
|
||||
import com.google.inject.Inject;
|
||||
import nu.marginalia.IndexLocations;
|
||||
import nu.marginalia.execution.*;
|
||||
import nu.marginalia.functions.favicon.FaviconGrpcService;
|
||||
import nu.marginalia.index.api.IndexMqEndpoints;
|
||||
import nu.marginalia.index.index.StatefulIndex;
|
||||
import nu.marginalia.linkdb.docs.DocumentDbReader;
|
||||
@@ -14,9 +16,11 @@ import nu.marginalia.service.server.Initialization;
|
||||
import nu.marginalia.service.server.SparkService;
|
||||
import nu.marginalia.service.server.mq.MqRequest;
|
||||
import nu.marginalia.storage.FileStorageService;
|
||||
import nu.marginalia.svc.ExecutorFileTransferService;
|
||||
import org.jetbrains.annotations.NotNull;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import spark.Spark;
|
||||
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
@@ -38,6 +42,7 @@ public class IndexService extends SparkService {
|
||||
private final DomainLinks domainLinks;
|
||||
private final ServiceEventLog eventLog;
|
||||
|
||||
private final ExecutionInit executionInit;
|
||||
|
||||
@Inject
|
||||
public IndexService(BaseServiceParams params,
|
||||
@@ -48,13 +53,25 @@ public class IndexService extends SparkService {
|
||||
DocumentDbReader documentDbReader,
|
||||
DomainLinks domainLinks,
|
||||
PartitionLinkGraphService partitionLinkGraphService,
|
||||
ExecutorGrpcService executorGrpcService,
|
||||
ExecutorCrawlGrpcService executorCrawlGrpcService,
|
||||
ExecutorSideloadGrpcService executorSideloadGrpcService,
|
||||
ExecutorExportGrpcService executorExportGrpcService,
|
||||
FaviconGrpcService faviconGrpcService,
|
||||
ExecutionInit executionInit,
|
||||
ExecutorFileTransferService fileTransferService,
|
||||
ServiceEventLog eventLog)
|
||||
throws Exception
|
||||
{
|
||||
super(params,
|
||||
ServicePartition.partition(params.configuration.node()),
|
||||
List.of(indexQueryService,
|
||||
partitionLinkGraphService)
|
||||
partitionLinkGraphService,
|
||||
executorGrpcService,
|
||||
executorCrawlGrpcService,
|
||||
executorSideloadGrpcService,
|
||||
executorExportGrpcService,
|
||||
faviconGrpcService)
|
||||
);
|
||||
|
||||
this.opsService = opsService;
|
||||
@@ -62,15 +79,26 @@ public class IndexService extends SparkService {
|
||||
this.fileStorageService = fileStorageService;
|
||||
this.documentDbReader = documentDbReader;
|
||||
this.domainLinks = domainLinks;
|
||||
this.executionInit = executionInit;
|
||||
this.eventLog = eventLog;
|
||||
|
||||
this.init = params.initialization;
|
||||
|
||||
Spark.get("/transfer/file/:fid", fileTransferService::transferFile);
|
||||
Spark.head("/transfer/file/:fid", fileTransferService::transferFile);
|
||||
|
||||
Thread.ofPlatform().name("initialize-index").start(this::initialize);
|
||||
}
|
||||
|
||||
volatile boolean initialized = false;
|
||||
|
||||
@MqRequest(endpoint="FIRST-BOOT")
|
||||
public void setUpDefaultActors(String message) throws Exception {
|
||||
logger.info("Initializing default actors");
|
||||
|
||||
executionInit.initDefaultActors();
|
||||
}
|
||||
|
||||
@MqRequest(endpoint = IndexMqEndpoints.INDEX_RERANK)
|
||||
public String rerank(String message) {
|
||||
if (!opsService.rerank()) {
|
||||
|
@@ -24,7 +24,6 @@ dependencies {
|
||||
implementation project(':code:services-core:query-service')
|
||||
implementation project(':code:services-core:index-service')
|
||||
implementation project(':code:services-core:control-service')
|
||||
implementation project(':code:services-core:executor-service')
|
||||
|
||||
testImplementation libs.bundles.slf4j.test
|
||||
testImplementation libs.bundles.junit
|
||||
|
@@ -12,3 +12,5 @@
|
||||
2025-05-28: Deploy assistant and browserless.
|
||||
2025-06-06: Deploy assistant and browserless.
|
||||
2025-07-21: Deploy executor partition 1.
|
||||
2025-07-21: Deploy search.
|
||||
2025-07-23: Redeploy all.
|
||||
|
File diff suppressed because one or more lines are too long
Before Width: | Height: | Size: 60 KiB After Width: | Height: | Size: 60 KiB |
@@ -40,11 +40,6 @@ services:
|
||||
<<: *partition-1
|
||||
image: "marginalia/index-service"
|
||||
container_name: "index-service-1"
|
||||
executor-service-1:
|
||||
<<: *partition-1
|
||||
restart: always
|
||||
image: "marginalia/executor-service"
|
||||
container_name: "executor-service-1"
|
||||
query-service:
|
||||
<<: *service
|
||||
image: "marginalia/query-service"
|
||||
|
@@ -62,20 +62,10 @@ services:
|
||||
<<: *partition-1
|
||||
image: "marginalia/index-service"
|
||||
container_name: "index-service-1"
|
||||
executor-service-1:
|
||||
<<: *partition-1
|
||||
restart: always
|
||||
image: "marginalia/executor-service"
|
||||
container_name: "executor-service-1"
|
||||
index-service-2:
|
||||
<<: *partition-2
|
||||
image: "marginalia/index-service"
|
||||
container_name: "index-service-2"
|
||||
executor-service-2:
|
||||
<<: *partition-2
|
||||
restart: always
|
||||
image: "marginalia/executor-service"
|
||||
container_name: "executor-service-2"
|
||||
query-service:
|
||||
<<: *service
|
||||
image: "marginalia/query-service"
|
||||
|
@@ -62,20 +62,10 @@ services:
|
||||
<<: *partition-1
|
||||
image: "marginalia/index-service"
|
||||
container_name: "index-service-1"
|
||||
executor-service-1:
|
||||
<<: *partition-1
|
||||
restart: always
|
||||
image: "marginalia/executor-service"
|
||||
container_name: "executor-service-1"
|
||||
index-service-2:
|
||||
<<: *partition-2
|
||||
image: "marginalia/index-service"
|
||||
container_name: "index-service-2"
|
||||
executor-service-2:
|
||||
<<: *partition-2
|
||||
restart: always
|
||||
image: "marginalia/executor-service"
|
||||
container_name: "executor-service-2"
|
||||
query-service:
|
||||
<<: *service
|
||||
image: "marginalia/query-service"
|
||||
|
@@ -31,13 +31,11 @@ A working setup needs at all the services
|
||||
* control [ http port is the control GUI ]
|
||||
* query [ http port is the query GUI ]
|
||||
* index [ http port is internal ]
|
||||
* executor [ http port is internal ]
|
||||
|
||||
Since you will need to manage ports yourself, you must assign distinct ports-pairs to each service.
|
||||
|
||||
* An index and executor services should exist on the same partition e.g. index:1 and executor:1. The partition
|
||||
number is the last digit of the service name, and should be positive. You can have multiple pairs of index
|
||||
and executor partitions, but the pair should run on the same physical machine with the same install directory.
|
||||
* An index service should exist on the same partition e.g. index:1. The partition
|
||||
number is the last digit of the service name, and should be positive.
|
||||
|
||||
* The query service can use any partition number.
|
||||
|
||||
|
@@ -4,7 +4,6 @@ include 'code:services-core:index-service'
|
||||
include 'code:services-core:assistant-service'
|
||||
include 'code:services-core:control-service'
|
||||
include 'code:services-core:query-service'
|
||||
include 'code:services-core:executor-service'
|
||||
include 'code:services-core:single-service-runner'
|
||||
|
||||
include 'code:services-application:search-service'
|
||||
|
@@ -76,13 +76,6 @@ SERVICE_CONFIG = {
|
||||
deploy_tier=3,
|
||||
groups={"all", "index"}
|
||||
),
|
||||
'executor': ServiceConfig(
|
||||
gradle_target=':code:services-core:executor-service:docker',
|
||||
docker_name='executor-service',
|
||||
instances=10,
|
||||
deploy_tier=3,
|
||||
groups={"all", "executor"}
|
||||
),
|
||||
'control': ServiceConfig(
|
||||
gradle_target=':code:services-core:control-service:docker',
|
||||
docker_name='control-service',
|
||||
|
Reference in New Issue
Block a user