mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-10-05 21:22:39 +02:00
Compare commits
4 Commits
deploy-020
...
deploy-020
Author | SHA1 | Date | |
---|---|---|---|
|
abe9da0fc6 | ||
|
56d0128b0a | ||
|
840b68ac55 | ||
|
c34ff6d6c3 |
@@ -12,6 +12,7 @@ import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.net.URI;
|
||||
import java.net.URISyntaxException;
|
||||
import java.time.Duration;
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
@@ -109,8 +110,7 @@ public class DomSampleService {
|
||||
private void updateDomain(BrowserlessClient client, String domain) {
|
||||
var rootUrl = "https://" + domain + "/";
|
||||
try {
|
||||
var content = client.annotatedContent(rootUrl,
|
||||
BrowserlessClient.GotoOptions.defaultValues());
|
||||
var content = client.annotatedContent(rootUrl, new BrowserlessClient.GotoOptions("load", Duration.ofSeconds(10).toMillis()));
|
||||
|
||||
if (content.isPresent()) {
|
||||
db.saveSample(domain, rootUrl, content.get());
|
||||
|
@@ -26,7 +26,9 @@ public class DomSampleDb implements AutoCloseable {
|
||||
stmt.executeUpdate("CREATE TABLE IF NOT EXISTS samples (url TEXT PRIMARY KEY, domain TEXT, sample BLOB, requests BLOB, accepted_popover BOOLEAN DEFAULT FALSE)");
|
||||
stmt.executeUpdate("CREATE INDEX IF NOT EXISTS domain_index ON samples (domain)");
|
||||
stmt.executeUpdate("CREATE TABLE IF NOT EXISTS schedule (domain TEXT PRIMARY KEY, last_fetch TIMESTAMP DEFAULT NULL)");
|
||||
stmt.execute("PRAGMA journal_mode=WAL");
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public void syncDomains(Set<String> domains) {
|
||||
@@ -151,8 +153,6 @@ public class DomSampleDb implements AutoCloseable {
|
||||
|
||||
}
|
||||
|
||||
record Request(String url, String method, String timestamp, boolean acceptedPopover) {}
|
||||
|
||||
public void saveSampleRaw(String domain, String url, String sample, String requests, boolean acceptedPopover) throws SQLException {
|
||||
try (var stmt = connection.prepareStatement("""
|
||||
INSERT OR REPLACE
|
||||
|
@@ -141,7 +141,7 @@ public class BrowserlessClient implements AutoCloseable {
|
||||
|
||||
public record GotoOptions(String waitUntil, long timeout) {
|
||||
public static GotoOptions defaultValues() {
|
||||
return new GotoOptions("load", Duration.ofSeconds(10).toMillis());
|
||||
return new GotoOptions("networkidle2", Duration.ofSeconds(10).toMillis());
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -126,7 +126,6 @@ public class LiveCaptureGrpcService
|
||||
}
|
||||
else {
|
||||
EdgeDomain domain = domainNameOpt.get();
|
||||
String domainNameStr = domain.toString();
|
||||
|
||||
if (!isValidDomainForCapture(domain)) {
|
||||
ScreenshotDbOperations.flagDomainAsFetched(conn, domain);
|
||||
|
@@ -108,7 +108,7 @@ public class BrowserlessClientTest {
|
||||
DomSampleDb dbop = new DomSampleDb(Path.of("/tmp/dom-sample.db"))
|
||||
) {
|
||||
var content = client.annotatedContent("https://marginalia.nu/", BrowserlessClient.GotoOptions.defaultValues()).orElseThrow();
|
||||
dbop.saveSample("marginalia.nu", "https://www.thesodacanstove.com/alcohol-stove/how-to-build/", content);
|
||||
dbop.saveSample("marginalia.nu", "https://marginalia.nu/", content);
|
||||
System.out.println(content);
|
||||
Assertions.assertFalse(content.isBlank(), "Content should not be empty");
|
||||
|
||||
|
@@ -18,6 +18,7 @@ import nu.marginalia.service.server.JoobyService;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.List;
|
||||
import java.util.NoSuchElementException;
|
||||
|
||||
@@ -41,6 +42,8 @@ public class SearchService extends JoobyService {
|
||||
.help("Search service error count")
|
||||
.register();
|
||||
|
||||
private final String openSearchXML;
|
||||
|
||||
@Inject
|
||||
public SearchService(BaseServiceParams params,
|
||||
WebsiteUrl websiteUrl,
|
||||
@@ -69,6 +72,13 @@ public class SearchService extends JoobyService {
|
||||
this.siteSubscriptionService = siteSubscriptionService;
|
||||
this.faviconClient = faviconClient;
|
||||
this.domainQueries = domainQueries;
|
||||
|
||||
try (var is = ClassLoader.getSystemResourceAsStream("static/opensearch.xml")) {
|
||||
openSearchXML = new String(is.readAllBytes(), StandardCharsets.UTF_8);
|
||||
}
|
||||
catch (Exception e) {
|
||||
throw new RuntimeException("Failed to load OpenSearch XML", e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
@@ -82,6 +92,11 @@ public class SearchService extends JoobyService {
|
||||
jooby.get("/site/https://*", this::handleSiteUrlRedirect);
|
||||
jooby.get("/site/http://*", this::handleSiteUrlRedirect);
|
||||
|
||||
jooby.get("/opensearch.xml", ctx -> {
|
||||
ctx.setResponseType(MediaType.valueOf("application/opensearchdescription+xml"));
|
||||
return openSearchXML;
|
||||
});
|
||||
|
||||
String emptySvg = "<svg xmlns=\"http://www.w3.org/2000/svg\"></svg>";
|
||||
jooby.get("/site/{domain}/favicon", ctx -> {
|
||||
String domain = ctx.path("domain").value();
|
||||
|
@@ -1,61 +0,0 @@
|
||||
# This docker-compose file is for the screenshot-capture-tool service.
|
||||
#
|
||||
# It is a standalone daemon that captures screenshots of web pages, based
|
||||
# on the domain database of Marginalia Search.
|
||||
#
|
||||
# It does not start the search engine itself.
|
||||
#
|
||||
|
||||
x-svc: &service
|
||||
env_file:
|
||||
- "run/env/service.env"
|
||||
volumes:
|
||||
- conf:/wmsa/conf:ro
|
||||
- data:/wmsa/data
|
||||
- logs:/var/log/wmsa
|
||||
networks:
|
||||
- wmsa
|
||||
services:
|
||||
screenshot-capture-tool:
|
||||
<<: *service
|
||||
image: "marginalia/screenshot-capture-tool"
|
||||
container_name: "screenshot-capture-tool"
|
||||
networks:
|
||||
- wmsa
|
||||
- headlesschrome
|
||||
depends_on:
|
||||
- browserless
|
||||
browserless:
|
||||
<<: *service
|
||||
image: "browserless/chrome"
|
||||
container_name: "headlesschrome"
|
||||
env_file:
|
||||
- "run/env/browserless.env"
|
||||
ports:
|
||||
- "3000:3000"
|
||||
networks:
|
||||
- wmsa
|
||||
- headlesschrome
|
||||
|
||||
networks:
|
||||
wmsa:
|
||||
headlesschrome:
|
||||
volumes:
|
||||
logs:
|
||||
driver: local
|
||||
driver_opts:
|
||||
type: none
|
||||
o: bind
|
||||
device: run/logs
|
||||
conf:
|
||||
driver: local
|
||||
driver_opts:
|
||||
type: none
|
||||
o: bind
|
||||
device: run/conf
|
||||
data:
|
||||
driver: local
|
||||
driver_opts:
|
||||
type: none
|
||||
o: bind
|
||||
device: run/data
|
Reference in New Issue
Block a user