1
1
mirror of https://github.com/MarginaliaSearch/MarginaliaSearch.git synced 2025-10-06 07:32:38 +02:00

Compare commits

...

3 Commits

Author SHA1 Message Date
Viktor Lofgren
abe9da0fc6 (search) Ensure the new search UI sets the correct content-type for opensearch.xml 2025-05-29 12:44:55 +02:00
Viktor Lofgren
56d0128b0a (dom-sample) Remove redundant code 2025-05-28 17:43:46 +02:00
Viktor Lofgren
840b68ac55 (dom-sample) Minor cleanups 2025-05-28 16:27:27 +02:00
7 changed files with 19 additions and 68 deletions

View File

@@ -12,6 +12,7 @@ import org.slf4j.LoggerFactory;
import java.net.URI;
import java.net.URISyntaxException;
import java.time.Duration;
import java.util.HashSet;
import java.util.Set;
import java.util.concurrent.TimeUnit;
@@ -109,8 +110,7 @@ public class DomSampleService {
private void updateDomain(BrowserlessClient client, String domain) {
var rootUrl = "https://" + domain + "/";
try {
var content = client.annotatedContent(rootUrl,
BrowserlessClient.GotoOptions.defaultValues());
var content = client.annotatedContent(rootUrl, new BrowserlessClient.GotoOptions("load", Duration.ofSeconds(10).toMillis()));
if (content.isPresent()) {
db.saveSample(domain, rootUrl, content.get());

View File

@@ -153,8 +153,6 @@ public class DomSampleDb implements AutoCloseable {
}
record Request(String url, String method, String timestamp, boolean acceptedPopover) {}
public void saveSampleRaw(String domain, String url, String sample, String requests, boolean acceptedPopover) throws SQLException {
try (var stmt = connection.prepareStatement("""
INSERT OR REPLACE

View File

@@ -141,7 +141,7 @@ public class BrowserlessClient implements AutoCloseable {
public record GotoOptions(String waitUntil, long timeout) {
public static GotoOptions defaultValues() {
return new GotoOptions("load", Duration.ofSeconds(10).toMillis());
return new GotoOptions("networkidle2", Duration.ofSeconds(10).toMillis());
}
}

View File

@@ -126,7 +126,6 @@ public class LiveCaptureGrpcService
}
else {
EdgeDomain domain = domainNameOpt.get();
String domainNameStr = domain.toString();
if (!isValidDomainForCapture(domain)) {
ScreenshotDbOperations.flagDomainAsFetched(conn, domain);

View File

@@ -108,7 +108,7 @@ public class BrowserlessClientTest {
DomSampleDb dbop = new DomSampleDb(Path.of("/tmp/dom-sample.db"))
) {
var content = client.annotatedContent("https://marginalia.nu/", BrowserlessClient.GotoOptions.defaultValues()).orElseThrow();
dbop.saveSample("marginalia.nu", "https://www.thesodacanstove.com/alcohol-stove/how-to-build/", content);
dbop.saveSample("marginalia.nu", "https://marginalia.nu/", content);
System.out.println(content);
Assertions.assertFalse(content.isBlank(), "Content should not be empty");

View File

@@ -18,6 +18,7 @@ import nu.marginalia.service.server.JoobyService;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.nio.charset.StandardCharsets;
import java.util.List;
import java.util.NoSuchElementException;
@@ -41,6 +42,8 @@ public class SearchService extends JoobyService {
.help("Search service error count")
.register();
private final String openSearchXML;
@Inject
public SearchService(BaseServiceParams params,
WebsiteUrl websiteUrl,
@@ -69,6 +72,13 @@ public class SearchService extends JoobyService {
this.siteSubscriptionService = siteSubscriptionService;
this.faviconClient = faviconClient;
this.domainQueries = domainQueries;
try (var is = ClassLoader.getSystemResourceAsStream("static/opensearch.xml")) {
openSearchXML = new String(is.readAllBytes(), StandardCharsets.UTF_8);
}
catch (Exception e) {
throw new RuntimeException("Failed to load OpenSearch XML", e);
}
}
@Override
@@ -82,6 +92,11 @@ public class SearchService extends JoobyService {
jooby.get("/site/https://*", this::handleSiteUrlRedirect);
jooby.get("/site/http://*", this::handleSiteUrlRedirect);
jooby.get("/opensearch.xml", ctx -> {
ctx.setResponseType(MediaType.valueOf("application/opensearchdescription+xml"));
return openSearchXML;
});
String emptySvg = "<svg xmlns=\"http://www.w3.org/2000/svg\"></svg>";
jooby.get("/site/{domain}/favicon", ctx -> {
String domain = ctx.path("domain").value();

View File

@@ -1,61 +0,0 @@
# This docker-compose file is for the screenshot-capture-tool service.
#
# It is a standalone daemon that captures screenshots of web pages, based
# on the domain database of Marginalia Search.
#
# It does not start the search engine itself.
#
x-svc: &service
env_file:
- "run/env/service.env"
volumes:
- conf:/wmsa/conf:ro
- data:/wmsa/data
- logs:/var/log/wmsa
networks:
- wmsa
services:
screenshot-capture-tool:
<<: *service
image: "marginalia/screenshot-capture-tool"
container_name: "screenshot-capture-tool"
networks:
- wmsa
- headlesschrome
depends_on:
- browserless
browserless:
<<: *service
image: "browserless/chrome"
container_name: "headlesschrome"
env_file:
- "run/env/browserless.env"
ports:
- "3000:3000"
networks:
- wmsa
- headlesschrome
networks:
wmsa:
headlesschrome:
volumes:
logs:
driver: local
driver_opts:
type: none
o: bind
device: run/logs
conf:
driver: local
driver_opts:
type: none
o: bind
device: run/conf
data:
driver: local
driver_opts:
type: none
o: bind
device: run/data