mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-10-05 21:22:39 +02:00
Compare commits
7 Commits
deploy-020
...
deploy-020
Author | SHA1 | Date | |
---|---|---|---|
|
a0fe070fe7 | ||
|
abe9da0fc6 | ||
|
56d0128b0a | ||
|
840b68ac55 | ||
|
c34ff6d6c3 | ||
|
32780967d8 | ||
|
7330bc489d |
@@ -12,6 +12,7 @@ import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.net.URI;
|
||||
import java.net.URISyntaxException;
|
||||
import java.time.Duration;
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
@@ -35,15 +36,22 @@ public class DomSampleService {
|
||||
|
||||
if (StringUtils.isEmpty(browserlessAddress) || serviceConfiguration.node() > 1) {
|
||||
logger.warn("Live capture service will not run");
|
||||
browserlessURI = null; // satisfy final
|
||||
browserlessURI = null;
|
||||
}
|
||||
else {
|
||||
browserlessURI = new URI(browserlessAddress);
|
||||
|
||||
Thread.ofPlatform().daemon().start(this::run);
|
||||
}
|
||||
}
|
||||
|
||||
public void start() {
|
||||
if (browserlessURI == null) {
|
||||
logger.warn("DomSampleService is not enabled due to missing browserless URI or multi-node configuration");
|
||||
return;
|
||||
}
|
||||
|
||||
Thread.ofPlatform().daemon().start(this::run);
|
||||
}
|
||||
|
||||
public void syncDomains() {
|
||||
Set<String> dbDomains = new HashSet<>();
|
||||
|
||||
@@ -102,8 +110,7 @@ public class DomSampleService {
|
||||
private void updateDomain(BrowserlessClient client, String domain) {
|
||||
var rootUrl = "https://" + domain + "/";
|
||||
try {
|
||||
var content = client.annotatedContent(rootUrl,
|
||||
BrowserlessClient.GotoOptions.defaultValues());
|
||||
var content = client.annotatedContent(rootUrl, new BrowserlessClient.GotoOptions("load", Duration.ofSeconds(10).toMillis()));
|
||||
|
||||
if (content.isPresent()) {
|
||||
db.saveSample(domain, rootUrl, content.get());
|
||||
|
@@ -26,7 +26,9 @@ public class DomSampleDb implements AutoCloseable {
|
||||
stmt.executeUpdate("CREATE TABLE IF NOT EXISTS samples (url TEXT PRIMARY KEY, domain TEXT, sample BLOB, requests BLOB, accepted_popover BOOLEAN DEFAULT FALSE)");
|
||||
stmt.executeUpdate("CREATE INDEX IF NOT EXISTS domain_index ON samples (domain)");
|
||||
stmt.executeUpdate("CREATE TABLE IF NOT EXISTS schedule (domain TEXT PRIMARY KEY, last_fetch TIMESTAMP DEFAULT NULL)");
|
||||
stmt.execute("PRAGMA journal_mode=WAL");
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public void syncDomains(Set<String> domains) {
|
||||
@@ -151,8 +153,6 @@ public class DomSampleDb implements AutoCloseable {
|
||||
|
||||
}
|
||||
|
||||
record Request(String url, String method, String timestamp, boolean acceptedPopover) {}
|
||||
|
||||
public void saveSampleRaw(String domain, String url, String sample, String requests, boolean acceptedPopover) throws SQLException {
|
||||
try (var stmt = connection.prepareStatement("""
|
||||
INSERT OR REPLACE
|
||||
|
@@ -141,7 +141,7 @@ public class BrowserlessClient implements AutoCloseable {
|
||||
|
||||
public record GotoOptions(String waitUntil, long timeout) {
|
||||
public static GotoOptions defaultValues() {
|
||||
return new GotoOptions("load", Duration.ofSeconds(10).toMillis());
|
||||
return new GotoOptions("networkidle2", Duration.ofSeconds(10).toMillis());
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -126,7 +126,6 @@ public class LiveCaptureGrpcService
|
||||
}
|
||||
else {
|
||||
EdgeDomain domain = domainNameOpt.get();
|
||||
String domainNameStr = domain.toString();
|
||||
|
||||
if (!isValidDomainForCapture(domain)) {
|
||||
ScreenshotDbOperations.flagDomainAsFetched(conn, domain);
|
||||
|
@@ -108,7 +108,7 @@ public class BrowserlessClientTest {
|
||||
DomSampleDb dbop = new DomSampleDb(Path.of("/tmp/dom-sample.db"))
|
||||
) {
|
||||
var content = client.annotatedContent("https://marginalia.nu/", BrowserlessClient.GotoOptions.defaultValues()).orElseThrow();
|
||||
dbop.saveSample("marginalia.nu", "https://www.thesodacanstove.com/alcohol-stove/how-to-build/", content);
|
||||
dbop.saveSample("marginalia.nu", "https://marginalia.nu/", content);
|
||||
System.out.println(content);
|
||||
Assertions.assertFalse(content.isBlank(), "Content should not be empty");
|
||||
|
||||
|
@@ -18,6 +18,7 @@ import nu.marginalia.service.server.JoobyService;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.List;
|
||||
import java.util.NoSuchElementException;
|
||||
|
||||
@@ -41,6 +42,8 @@ public class SearchService extends JoobyService {
|
||||
.help("Search service error count")
|
||||
.register();
|
||||
|
||||
private final String openSearchXML;
|
||||
|
||||
@Inject
|
||||
public SearchService(BaseServiceParams params,
|
||||
WebsiteUrl websiteUrl,
|
||||
@@ -69,6 +72,13 @@ public class SearchService extends JoobyService {
|
||||
this.siteSubscriptionService = siteSubscriptionService;
|
||||
this.faviconClient = faviconClient;
|
||||
this.domainQueries = domainQueries;
|
||||
|
||||
try (var is = ClassLoader.getSystemResourceAsStream("static/opensearch.xml")) {
|
||||
openSearchXML = new String(is.readAllBytes(), StandardCharsets.UTF_8);
|
||||
}
|
||||
catch (Exception e) {
|
||||
throw new RuntimeException("Failed to load OpenSearch XML", e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
@@ -82,6 +92,11 @@ public class SearchService extends JoobyService {
|
||||
jooby.get("/site/https://*", this::handleSiteUrlRedirect);
|
||||
jooby.get("/site/http://*", this::handleSiteUrlRedirect);
|
||||
|
||||
jooby.get("/opensearch.xml", ctx -> {
|
||||
ctx.setResponseType(MediaType.valueOf("application/opensearchdescription+xml"));
|
||||
return openSearchXML;
|
||||
});
|
||||
|
||||
String emptySvg = "<svg xmlns=\"http://www.w3.org/2000/svg\"></svg>";
|
||||
jooby.get("/site/{domain}/favicon", ctx -> {
|
||||
String domain = ctx.path("domain").value();
|
||||
|
@@ -5,6 +5,7 @@ import com.google.inject.Inject;
|
||||
import io.jooby.Context;
|
||||
import io.jooby.Jooby;
|
||||
import nu.marginalia.assistant.suggest.Suggestions;
|
||||
import nu.marginalia.domsample.DomSampleService;
|
||||
import nu.marginalia.functions.domains.DomainInfoGrpcService;
|
||||
import nu.marginalia.functions.math.MathGrpcService;
|
||||
import nu.marginalia.livecapture.LiveCaptureGrpcService;
|
||||
@@ -30,6 +31,7 @@ public class AssistantService extends JoobyService {
|
||||
ScreenshotService screenshotService,
|
||||
DomainInfoGrpcService domainInfoGrpcService,
|
||||
LiveCaptureGrpcService liveCaptureGrpcService,
|
||||
DomSampleService domSampleService,
|
||||
FeedsGrpcService feedsGrpcService,
|
||||
MathGrpcService mathGrpcService,
|
||||
Suggestions suggestions)
|
||||
@@ -41,10 +43,11 @@ public class AssistantService extends JoobyService {
|
||||
liveCaptureGrpcService,
|
||||
feedsGrpcService),
|
||||
List.of());
|
||||
this.screenshotService = screenshotService;
|
||||
|
||||
this.screenshotService = screenshotService;
|
||||
this.suggestions = suggestions;
|
||||
|
||||
domSampleService.start();
|
||||
}
|
||||
|
||||
public void startJooby(Jooby jooby) {
|
||||
|
@@ -10,3 +10,4 @@
|
||||
2025-05-08: Deploy assistant.
|
||||
2025-05-17: Redeploy all.
|
||||
2025-05-28: Deploy assistant and browserless.
|
||||
2025-06-06: Deploy assistant and browserless.
|
@@ -1,61 +0,0 @@
|
||||
# This docker-compose file is for the screenshot-capture-tool service.
|
||||
#
|
||||
# It is a standalone daemon that captures screenshots of web pages, based
|
||||
# on the domain database of Marginalia Search.
|
||||
#
|
||||
# It does not start the search engine itself.
|
||||
#
|
||||
|
||||
x-svc: &service
|
||||
env_file:
|
||||
- "run/env/service.env"
|
||||
volumes:
|
||||
- conf:/wmsa/conf:ro
|
||||
- data:/wmsa/data
|
||||
- logs:/var/log/wmsa
|
||||
networks:
|
||||
- wmsa
|
||||
services:
|
||||
screenshot-capture-tool:
|
||||
<<: *service
|
||||
image: "marginalia/screenshot-capture-tool"
|
||||
container_name: "screenshot-capture-tool"
|
||||
networks:
|
||||
- wmsa
|
||||
- headlesschrome
|
||||
depends_on:
|
||||
- browserless
|
||||
browserless:
|
||||
<<: *service
|
||||
image: "browserless/chrome"
|
||||
container_name: "headlesschrome"
|
||||
env_file:
|
||||
- "run/env/browserless.env"
|
||||
ports:
|
||||
- "3000:3000"
|
||||
networks:
|
||||
- wmsa
|
||||
- headlesschrome
|
||||
|
||||
networks:
|
||||
wmsa:
|
||||
headlesschrome:
|
||||
volumes:
|
||||
logs:
|
||||
driver: local
|
||||
driver_opts:
|
||||
type: none
|
||||
o: bind
|
||||
device: run/logs
|
||||
conf:
|
||||
driver: local
|
||||
driver_opts:
|
||||
type: none
|
||||
o: bind
|
||||
device: run/conf
|
||||
data:
|
||||
driver: local
|
||||
driver_opts:
|
||||
type: none
|
||||
o: bind
|
||||
device: run/data
|
@@ -272,9 +272,9 @@ if __name__ == '__main__':
|
||||
deploy_tier=1,
|
||||
groups={"all", "core"}
|
||||
),
|
||||
'headlesschrome': ServiceConfig(
|
||||
'browserless': ServiceConfig(
|
||||
gradle_target=':code:tools:browserless:docker',
|
||||
docker_name='headlesschrome',
|
||||
docker_name='browserless',
|
||||
instances=None,
|
||||
deploy_tier=2,
|
||||
groups={"all", "core"}
|
||||
|
Reference in New Issue
Block a user