mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-10-06 17:32:39 +02:00
Compare commits
23 Commits
deploy-008
...
deploy-009
Author | SHA1 | Date | |
---|---|---|---|
|
3ceea17c1d | ||
|
b34527c1a3 | ||
|
185bf28fca | ||
|
78cc25584a | ||
|
62ba30bacf | ||
|
3bb84eb206 | ||
|
be7d13ccce | ||
|
8c088a7c0b | ||
|
ea9a642b9b | ||
|
27f528af6a | ||
|
20ca41ec95 | ||
|
7671f0d9e4 | ||
|
44d6bc71b7 | ||
|
9d302e2973 | ||
|
f553701224 | ||
|
f076d05595 | ||
|
b513809710 | ||
|
7519b28e21 | ||
|
3eac4dd57f | ||
|
4c2810720a | ||
|
8480ba8daa | ||
|
fbba392491 | ||
|
530eb35949 |
@@ -6,6 +6,7 @@ import nu.marginalia.service.ServiceId;
|
|||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
import java.net.InetAddress;
|
import java.net.InetAddress;
|
||||||
import java.net.NetworkInterface;
|
import java.net.NetworkInterface;
|
||||||
import java.util.Enumeration;
|
import java.util.Enumeration;
|
||||||
@@ -115,11 +116,12 @@ public class ServiceConfigurationModule extends AbstractModule {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String getLocalNetworkIP() throws Exception {
|
public static String getLocalNetworkIP() throws IOException {
|
||||||
Enumeration<NetworkInterface> nets = NetworkInterface.getNetworkInterfaces();
|
Enumeration<NetworkInterface> nets = NetworkInterface.getNetworkInterfaces();
|
||||||
|
|
||||||
while (nets.hasMoreElements()) {
|
while (nets.hasMoreElements()) {
|
||||||
NetworkInterface netif = nets.nextElement();
|
NetworkInterface netif = nets.nextElement();
|
||||||
|
logger.info("Considering network interface {}: Up? {}, Loopback? {}", netif.getDisplayName(), netif.isUp(), netif.isLoopback());
|
||||||
if (!netif.isUp() || netif.isLoopback()) {
|
if (!netif.isUp() || netif.isLoopback()) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@@ -127,6 +129,7 @@ public class ServiceConfigurationModule extends AbstractModule {
|
|||||||
Enumeration<InetAddress> inetAddresses = netif.getInetAddresses();
|
Enumeration<InetAddress> inetAddresses = netif.getInetAddresses();
|
||||||
while (inetAddresses.hasMoreElements()) {
|
while (inetAddresses.hasMoreElements()) {
|
||||||
InetAddress addr = inetAddresses.nextElement();
|
InetAddress addr = inetAddresses.nextElement();
|
||||||
|
logger.info("Considering address {}: SiteLocal? {}, Loopback? {}", addr.getHostAddress(), addr.isSiteLocalAddress(), addr.isLoopbackAddress());
|
||||||
if (addr.isSiteLocalAddress() && !addr.isLoopbackAddress()) {
|
if (addr.isSiteLocalAddress() && !addr.isLoopbackAddress()) {
|
||||||
return addr.getHostAddress();
|
return addr.getHostAddress();
|
||||||
}
|
}
|
||||||
|
@@ -15,6 +15,7 @@ import org.slf4j.LoggerFactory;
|
|||||||
import org.slf4j.Marker;
|
import org.slf4j.Marker;
|
||||||
import org.slf4j.MarkerFactory;
|
import org.slf4j.MarkerFactory;
|
||||||
|
|
||||||
|
import java.nio.file.Files;
|
||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
import java.nio.file.Paths;
|
import java.nio.file.Paths;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
@@ -106,9 +107,12 @@ public class JoobyService {
|
|||||||
config.externalAddress());
|
config.externalAddress());
|
||||||
|
|
||||||
// FIXME: This won't work outside of docker, may need to submit a PR to jooby to allow classpaths here
|
// FIXME: This won't work outside of docker, may need to submit a PR to jooby to allow classpaths here
|
||||||
|
if (Files.exists(Path.of("/app/resources/jte")) || Files.exists(Path.of("/app/classes/jte-precompiled"))) {
|
||||||
jooby.install(new JteModule(Path.of("/app/resources/jte"), Path.of("/app/classes/jte-precompiled")));
|
jooby.install(new JteModule(Path.of("/app/resources/jte"), Path.of("/app/classes/jte-precompiled")));
|
||||||
|
}
|
||||||
|
if (Files.exists(Path.of("/app/resources/static"))) {
|
||||||
jooby.assets("/*", Paths.get("/app/resources/static"));
|
jooby.assets("/*", Paths.get("/app/resources/static"));
|
||||||
|
}
|
||||||
var options = new ServerOptions();
|
var options = new ServerOptions();
|
||||||
options.setHost(config.bindAddress());
|
options.setHost(config.bindAddress());
|
||||||
options.setPort(restEndpoint.port());
|
options.setPort(restEndpoint.port());
|
||||||
|
@@ -6,17 +6,22 @@ import nu.marginalia.service.module.ServiceConfiguration;
|
|||||||
import org.eclipse.jetty.server.Server;
|
import org.eclipse.jetty.server.Server;
|
||||||
import org.eclipse.jetty.servlet.ServletContextHandler;
|
import org.eclipse.jetty.servlet.ServletContextHandler;
|
||||||
import org.eclipse.jetty.servlet.ServletHolder;
|
import org.eclipse.jetty.servlet.ServletHolder;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import java.net.InetSocketAddress;
|
import java.net.InetSocketAddress;
|
||||||
|
|
||||||
public class MetricsServer {
|
public class MetricsServer {
|
||||||
|
|
||||||
|
private static final Logger logger = LoggerFactory.getLogger(MetricsServer.class);
|
||||||
|
|
||||||
@Inject
|
@Inject
|
||||||
public MetricsServer(ServiceConfiguration configuration) throws Exception {
|
public MetricsServer(ServiceConfiguration configuration) {
|
||||||
// If less than zero, we forego setting up a metrics server
|
// If less than zero, we forego setting up a metrics server
|
||||||
if (configuration.metricsPort() < 0)
|
if (configuration.metricsPort() < 0)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
try {
|
||||||
Server server = new Server(new InetSocketAddress(configuration.bindAddress(), configuration.metricsPort()));
|
Server server = new Server(new InetSocketAddress(configuration.bindAddress(), configuration.metricsPort()));
|
||||||
|
|
||||||
ServletContextHandler context = new ServletContextHandler();
|
ServletContextHandler context = new ServletContextHandler();
|
||||||
@@ -25,6 +30,12 @@ public class MetricsServer {
|
|||||||
|
|
||||||
context.addServlet(new ServletHolder(new MetricsServlet()), "/metrics");
|
context.addServlet(new ServletHolder(new MetricsServlet()), "/metrics");
|
||||||
|
|
||||||
|
logger.info("MetricsServer listening on {}:{}", configuration.bindAddress(), configuration.metricsPort());
|
||||||
|
|
||||||
server.start();
|
server.start();
|
||||||
}
|
}
|
||||||
|
catch (Exception|NoSuchMethodError ex) {
|
||||||
|
logger.error("Failed to set up metrics server", ex);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@@ -14,6 +14,8 @@ import nu.marginalia.mq.persistence.MqPersistence;
|
|||||||
import nu.marginalia.nodecfg.NodeConfigurationService;
|
import nu.marginalia.nodecfg.NodeConfigurationService;
|
||||||
import nu.marginalia.nodecfg.model.NodeProfile;
|
import nu.marginalia.nodecfg.model.NodeProfile;
|
||||||
import nu.marginalia.service.module.ServiceConfiguration;
|
import nu.marginalia.service.module.ServiceConfiguration;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import java.time.Duration;
|
import java.time.Duration;
|
||||||
import java.time.LocalDateTime;
|
import java.time.LocalDateTime;
|
||||||
@@ -29,6 +31,7 @@ public class UpdateRssActor extends RecordActorPrototype {
|
|||||||
|
|
||||||
private final NodeConfigurationService nodeConfigurationService;
|
private final NodeConfigurationService nodeConfigurationService;
|
||||||
private final MqPersistence persistence;
|
private final MqPersistence persistence;
|
||||||
|
private static final Logger logger = LoggerFactory.getLogger(UpdateRssActor.class);
|
||||||
|
|
||||||
@Inject
|
@Inject
|
||||||
public UpdateRssActor(Gson gson,
|
public UpdateRssActor(Gson gson,
|
||||||
@@ -101,8 +104,8 @@ public class UpdateRssActor extends RecordActorPrototype {
|
|||||||
case UpdateRefresh(int count, long msgId) -> {
|
case UpdateRefresh(int count, long msgId) -> {
|
||||||
MqMessage msg = persistence.waitForMessageTerminalState(msgId, Duration.ofSeconds(10), Duration.ofHours(12));
|
MqMessage msg = persistence.waitForMessageTerminalState(msgId, Duration.ofSeconds(10), Duration.ofHours(12));
|
||||||
if (msg == null) {
|
if (msg == null) {
|
||||||
// Retry the update
|
logger.warn("UpdateRefresh is taking a very long time");
|
||||||
yield new Error("Failed to update feeds: message not found");
|
yield new UpdateRefresh(count, msgId);
|
||||||
} else if (msg.state() != MqMessageState.OK) {
|
} else if (msg.state() != MqMessageState.OK) {
|
||||||
// Retry the update
|
// Retry the update
|
||||||
yield new Error("Failed to update feeds: " + msg.state());
|
yield new Error("Failed to update feeds: " + msg.state());
|
||||||
@@ -119,8 +122,8 @@ public class UpdateRssActor extends RecordActorPrototype {
|
|||||||
case UpdateClean(long msgId) -> {
|
case UpdateClean(long msgId) -> {
|
||||||
MqMessage msg = persistence.waitForMessageTerminalState(msgId, Duration.ofSeconds(10), Duration.ofHours(12));
|
MqMessage msg = persistence.waitForMessageTerminalState(msgId, Duration.ofSeconds(10), Duration.ofHours(12));
|
||||||
if (msg == null) {
|
if (msg == null) {
|
||||||
// Retry the update
|
logger.warn("UpdateClean is taking a very long time");
|
||||||
yield new Error("Failed to update feeds: message not found");
|
yield new UpdateClean(msgId);
|
||||||
} else if (msg.state() != MqMessageState.OK) {
|
} else if (msg.state() != MqMessageState.OK) {
|
||||||
// Retry the update
|
// Retry the update
|
||||||
yield new Error("Failed to update feeds: " + msg.state());
|
yield new Error("Failed to update feeds: " + msg.state());
|
||||||
|
@@ -34,6 +34,7 @@ dependencies {
|
|||||||
implementation libs.bundles.slf4j
|
implementation libs.bundles.slf4j
|
||||||
implementation libs.commons.lang3
|
implementation libs.commons.lang3
|
||||||
implementation libs.commons.io
|
implementation libs.commons.io
|
||||||
|
implementation libs.wiremock
|
||||||
|
|
||||||
implementation libs.prometheus
|
implementation libs.prometheus
|
||||||
implementation libs.guava
|
implementation libs.guava
|
||||||
|
@@ -1,6 +1,7 @@
|
|||||||
package nu.marginalia.livecapture;
|
package nu.marginalia.livecapture;
|
||||||
|
|
||||||
import com.google.gson.Gson;
|
import com.google.gson.Gson;
|
||||||
|
import nu.marginalia.WmsaHome;
|
||||||
import nu.marginalia.model.gson.GsonFactory;
|
import nu.marginalia.model.gson.GsonFactory;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
@@ -12,6 +13,7 @@ import java.net.http.HttpRequest;
|
|||||||
import java.net.http.HttpResponse;
|
import java.net.http.HttpResponse;
|
||||||
import java.time.Duration;
|
import java.time.Duration;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
import java.util.Optional;
|
||||||
|
|
||||||
/** Client for local browserless.io API */
|
/** Client for local browserless.io API */
|
||||||
public class BrowserlessClient implements AutoCloseable {
|
public class BrowserlessClient implements AutoCloseable {
|
||||||
@@ -27,13 +29,16 @@ public class BrowserlessClient implements AutoCloseable {
|
|||||||
private final URI browserlessURI;
|
private final URI browserlessURI;
|
||||||
private final Gson gson = GsonFactory.get();
|
private final Gson gson = GsonFactory.get();
|
||||||
|
|
||||||
|
private final String userAgent = WmsaHome.getUserAgent().uaString();
|
||||||
|
|
||||||
public BrowserlessClient(URI browserlessURI) {
|
public BrowserlessClient(URI browserlessURI) {
|
||||||
this.browserlessURI = browserlessURI;
|
this.browserlessURI = browserlessURI;
|
||||||
}
|
}
|
||||||
|
|
||||||
public String content(String url, GotoOptions gotoOptions) throws IOException, InterruptedException {
|
public Optional<String> content(String url, GotoOptions gotoOptions) throws IOException, InterruptedException {
|
||||||
Map<String, Object> requestData = Map.of(
|
Map<String, Object> requestData = Map.of(
|
||||||
"url", url,
|
"url", url,
|
||||||
|
"userAgent", userAgent,
|
||||||
"gotoOptions", gotoOptions
|
"gotoOptions", gotoOptions
|
||||||
);
|
);
|
||||||
|
|
||||||
@@ -49,10 +54,10 @@ public class BrowserlessClient implements AutoCloseable {
|
|||||||
|
|
||||||
if (rsp.statusCode() >= 300) {
|
if (rsp.statusCode() >= 300) {
|
||||||
logger.info("Failed to fetch content for {}, status {}", url, rsp.statusCode());
|
logger.info("Failed to fetch content for {}, status {}", url, rsp.statusCode());
|
||||||
return null;
|
return Optional.empty();
|
||||||
}
|
}
|
||||||
|
|
||||||
return rsp.body();
|
return Optional.of(rsp.body());
|
||||||
}
|
}
|
||||||
|
|
||||||
public byte[] screenshot(String url, GotoOptions gotoOptions, ScreenshotOptions screenshotOptions)
|
public byte[] screenshot(String url, GotoOptions gotoOptions, ScreenshotOptions screenshotOptions)
|
||||||
@@ -60,6 +65,7 @@ public class BrowserlessClient implements AutoCloseable {
|
|||||||
|
|
||||||
Map<String, Object> requestData = Map.of(
|
Map<String, Object> requestData = Map.of(
|
||||||
"url", url,
|
"url", url,
|
||||||
|
"userAgent", userAgent,
|
||||||
"options", screenshotOptions,
|
"options", screenshotOptions,
|
||||||
"gotoOptions", gotoOptions
|
"gotoOptions", gotoOptions
|
||||||
);
|
);
|
||||||
@@ -84,7 +90,7 @@ public class BrowserlessClient implements AutoCloseable {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void close() throws Exception {
|
public void close() {
|
||||||
httpClient.shutdownNow();
|
httpClient.shutdownNow();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -1,5 +1,9 @@
|
|||||||
package nu.marginalia.livecapture;
|
package nu.marginalia.livecapture;
|
||||||
|
|
||||||
|
import com.github.tomakehurst.wiremock.WireMockServer;
|
||||||
|
import com.github.tomakehurst.wiremock.core.WireMockConfiguration;
|
||||||
|
import nu.marginalia.WmsaHome;
|
||||||
|
import nu.marginalia.service.module.ServiceConfigurationModule;
|
||||||
import org.junit.jupiter.api.Assertions;
|
import org.junit.jupiter.api.Assertions;
|
||||||
import org.junit.jupiter.api.BeforeAll;
|
import org.junit.jupiter.api.BeforeAll;
|
||||||
import org.junit.jupiter.api.Tag;
|
import org.junit.jupiter.api.Tag;
|
||||||
@@ -8,34 +12,86 @@ import org.testcontainers.containers.GenericContainer;
|
|||||||
import org.testcontainers.junit.jupiter.Testcontainers;
|
import org.testcontainers.junit.jupiter.Testcontainers;
|
||||||
import org.testcontainers.utility.DockerImageName;
|
import org.testcontainers.utility.DockerImageName;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
import java.net.URI;
|
import java.net.URI;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
import static com.github.tomakehurst.wiremock.client.WireMock.*;
|
||||||
|
|
||||||
|
|
||||||
@Testcontainers
|
@Testcontainers
|
||||||
@Tag("slow")
|
@Tag("slow")
|
||||||
public class BrowserlessClientTest {
|
public class BrowserlessClientTest {
|
||||||
static GenericContainer<?> container = new GenericContainer<>(DockerImageName.parse("browserless/chrome"))
|
static GenericContainer<?> container = new GenericContainer<>(DockerImageName.parse("browserless/chrome"))
|
||||||
.withEnv(Map.of("TOKEN", "BROWSERLESS_TOKEN"))
|
.withEnv(Map.of("TOKEN", "BROWSERLESS_TOKEN"))
|
||||||
|
.withNetworkMode("bridge")
|
||||||
.withExposedPorts(3000);
|
.withExposedPorts(3000);
|
||||||
|
|
||||||
|
static WireMockServer wireMockServer =
|
||||||
|
new WireMockServer(WireMockConfiguration.wireMockConfig()
|
||||||
|
.port(18089));
|
||||||
|
|
||||||
|
static String localIp;
|
||||||
|
|
||||||
|
static URI browserlessURI;
|
||||||
|
|
||||||
@BeforeAll
|
@BeforeAll
|
||||||
public static void setup() {
|
public static void setup() throws IOException {
|
||||||
container.start();
|
container.start();
|
||||||
|
|
||||||
|
browserlessURI = URI.create(String.format("http://%s:%d/",
|
||||||
|
container.getHost(),
|
||||||
|
container.getMappedPort(3000))
|
||||||
|
);
|
||||||
|
|
||||||
|
wireMockServer.start();
|
||||||
|
wireMockServer.stubFor(get("/").willReturn(aResponse().withStatus(200).withBody("Ok")));
|
||||||
|
|
||||||
|
localIp = ServiceConfigurationModule.getLocalNetworkIP();
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Tag("flaky")
|
||||||
|
@Test
|
||||||
|
public void testInspectContentUA__Flaky() throws Exception {
|
||||||
|
try (var client = new BrowserlessClient(browserlessURI)) {
|
||||||
|
client.content("http://" + localIp + ":18089/",
|
||||||
|
BrowserlessClient.GotoOptions.defaultValues()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
wireMockServer.verify(getRequestedFor(urlEqualTo("/")).withHeader("User-Agent", equalTo(WmsaHome.getUserAgent().uaString())));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Tag("flaky")
|
||||||
|
@Test
|
||||||
|
public void testInspectScreenshotUA__Flaky() throws Exception {
|
||||||
|
try (var client = new BrowserlessClient(browserlessURI)) {
|
||||||
|
client.screenshot("http://" + localIp + ":18089/",
|
||||||
|
BrowserlessClient.GotoOptions.defaultValues(),
|
||||||
|
BrowserlessClient.ScreenshotOptions.defaultValues()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
wireMockServer.verify(getRequestedFor(urlEqualTo("/")).withHeader("User-Agent", equalTo(WmsaHome.getUserAgent().uaString())));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testContent() throws Exception {
|
public void testContent() throws Exception {
|
||||||
try (var client = new BrowserlessClient(URI.create("http://" + container.getHost() + ":" + container.getMappedPort(3000)))) {
|
try (var client = new BrowserlessClient(browserlessURI)) {
|
||||||
var content = client.content("https://www.marginalia.nu/", BrowserlessClient.GotoOptions.defaultValues());
|
var content = client.content("https://www.marginalia.nu/", BrowserlessClient.GotoOptions.defaultValues()).orElseThrow();
|
||||||
Assertions.assertNotNull(content, "Content should not be null");
|
|
||||||
Assertions.assertFalse(content.isBlank(), "Content should not be empty");
|
Assertions.assertFalse(content.isBlank(), "Content should not be empty");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testScreenshot() throws Exception {
|
public void testScreenshot() throws Exception {
|
||||||
try (var client = new BrowserlessClient(URI.create("http://" + container.getHost() + ":" + container.getMappedPort(3000)))) {
|
try (var client = new BrowserlessClient(browserlessURI)) {
|
||||||
var screenshot = client.screenshot("https://www.marginalia.nu/", BrowserlessClient.GotoOptions.defaultValues(), BrowserlessClient.ScreenshotOptions.defaultValues());
|
var screenshot = client.screenshot("https://www.marginalia.nu/",
|
||||||
|
BrowserlessClient.GotoOptions.defaultValues(),
|
||||||
|
BrowserlessClient.ScreenshotOptions.defaultValues());
|
||||||
|
|
||||||
Assertions.assertNotNull(screenshot, "Screenshot should not be null");
|
Assertions.assertNotNull(screenshot, "Screenshot should not be null");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -11,7 +11,6 @@ import nu.marginalia.slop.column.primitive.IntColumn;
|
|||||||
import nu.marginalia.slop.column.primitive.LongColumn;
|
import nu.marginalia.slop.column.primitive.LongColumn;
|
||||||
import nu.marginalia.slop.column.string.EnumColumn;
|
import nu.marginalia.slop.column.string.EnumColumn;
|
||||||
import nu.marginalia.slop.column.string.StringColumn;
|
import nu.marginalia.slop.column.string.StringColumn;
|
||||||
import nu.marginalia.slop.column.string.TxtStringColumn;
|
|
||||||
import nu.marginalia.slop.desc.StorageType;
|
import nu.marginalia.slop.desc.StorageType;
|
||||||
import org.jetbrains.annotations.Nullable;
|
import org.jetbrains.annotations.Nullable;
|
||||||
|
|
||||||
@@ -182,8 +181,8 @@ public record SlopDocumentRecord(
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Basic information
|
// Basic information
|
||||||
private static final TxtStringColumn domainsColumn = new TxtStringColumn("domain", StandardCharsets.UTF_8, StorageType.GZIP);
|
private static final StringColumn domainsColumn = new StringColumn("domain", StandardCharsets.UTF_8, StorageType.GZIP);
|
||||||
private static final TxtStringColumn urlsColumn = new TxtStringColumn("url", StandardCharsets.UTF_8, StorageType.GZIP);
|
private static final StringColumn urlsColumn = new StringColumn("url", StandardCharsets.UTF_8, StorageType.GZIP);
|
||||||
private static final VarintColumn ordinalsColumn = new VarintColumn("ordinal", StorageType.PLAIN);
|
private static final VarintColumn ordinalsColumn = new VarintColumn("ordinal", StorageType.PLAIN);
|
||||||
private static final EnumColumn statesColumn = new EnumColumn("state", StandardCharsets.US_ASCII, StorageType.PLAIN);
|
private static final EnumColumn statesColumn = new EnumColumn("state", StandardCharsets.US_ASCII, StorageType.PLAIN);
|
||||||
private static final StringColumn stateReasonsColumn = new StringColumn("stateReason", StandardCharsets.US_ASCII, StorageType.GZIP);
|
private static final StringColumn stateReasonsColumn = new StringColumn("stateReason", StandardCharsets.US_ASCII, StorageType.GZIP);
|
||||||
@@ -211,7 +210,7 @@ public record SlopDocumentRecord(
|
|||||||
private static final VarintCodedSequenceArrayColumn spansColumn = new VarintCodedSequenceArrayColumn("spans", StorageType.ZSTD);
|
private static final VarintCodedSequenceArrayColumn spansColumn = new VarintCodedSequenceArrayColumn("spans", StorageType.ZSTD);
|
||||||
|
|
||||||
public static class KeywordsProjectionReader extends SlopTable {
|
public static class KeywordsProjectionReader extends SlopTable {
|
||||||
private final TxtStringColumn.Reader domainsReader;
|
private final StringColumn.Reader domainsReader;
|
||||||
private final VarintColumn.Reader ordinalsReader;
|
private final VarintColumn.Reader ordinalsReader;
|
||||||
private final IntColumn.Reader htmlFeaturesReader;
|
private final IntColumn.Reader htmlFeaturesReader;
|
||||||
private final LongColumn.Reader domainMetadataReader;
|
private final LongColumn.Reader domainMetadataReader;
|
||||||
@@ -275,8 +274,8 @@ public record SlopDocumentRecord(
|
|||||||
}
|
}
|
||||||
|
|
||||||
public static class MetadataReader extends SlopTable {
|
public static class MetadataReader extends SlopTable {
|
||||||
private final TxtStringColumn.Reader domainsReader;
|
private final StringColumn.Reader domainsReader;
|
||||||
private final TxtStringColumn.Reader urlsReader;
|
private final StringColumn.Reader urlsReader;
|
||||||
private final VarintColumn.Reader ordinalsReader;
|
private final VarintColumn.Reader ordinalsReader;
|
||||||
private final StringColumn.Reader titlesReader;
|
private final StringColumn.Reader titlesReader;
|
||||||
private final StringColumn.Reader descriptionsReader;
|
private final StringColumn.Reader descriptionsReader;
|
||||||
@@ -332,8 +331,8 @@ public record SlopDocumentRecord(
|
|||||||
}
|
}
|
||||||
|
|
||||||
public static class Writer extends SlopTable {
|
public static class Writer extends SlopTable {
|
||||||
private final TxtStringColumn.Writer domainsWriter;
|
private final StringColumn.Writer domainsWriter;
|
||||||
private final TxtStringColumn.Writer urlsWriter;
|
private final StringColumn.Writer urlsWriter;
|
||||||
private final VarintColumn.Writer ordinalsWriter;
|
private final VarintColumn.Writer ordinalsWriter;
|
||||||
private final EnumColumn.Writer statesWriter;
|
private final EnumColumn.Writer statesWriter;
|
||||||
private final StringColumn.Writer stateReasonsWriter;
|
private final StringColumn.Writer stateReasonsWriter;
|
||||||
|
@@ -41,10 +41,7 @@ import java.nio.file.Files;
|
|||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
import java.nio.file.StandardCopyOption;
|
import java.nio.file.StandardCopyOption;
|
||||||
import java.security.Security;
|
import java.security.Security;
|
||||||
import java.util.ArrayList;
|
import java.util.*;
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map;
|
|
||||||
import java.util.concurrent.ConcurrentHashMap;
|
import java.util.concurrent.ConcurrentHashMap;
|
||||||
import java.util.concurrent.TimeUnit;
|
import java.util.concurrent.TimeUnit;
|
||||||
import java.util.concurrent.atomic.AtomicInteger;
|
import java.util.concurrent.atomic.AtomicInteger;
|
||||||
@@ -248,22 +245,47 @@ public class CrawlerMain extends ProcessMainClass {
|
|||||||
// (this happens when the process is restarted after a crash or a shutdown)
|
// (this happens when the process is restarted after a crash or a shutdown)
|
||||||
tasksDone.set(workLog.countFinishedJobs());
|
tasksDone.set(workLog.countFinishedJobs());
|
||||||
|
|
||||||
|
// List of deferred tasks used to ensure beneficial scheduling of domains with regard to DomainLocks,
|
||||||
|
// merely shuffling the domains tends to lead to a lot of threads being blocked waiting for a semphore,
|
||||||
|
// this will more aggressively attempt to schedule the jobs to avoid blocking
|
||||||
|
List<CrawlTask> deferredTasks = new LinkedList<>();
|
||||||
|
|
||||||
// Create crawl tasks and submit them to the pool for execution
|
// Create crawl tasks and submit them to the pool for execution
|
||||||
for (CrawlSpecRecord crawlSpec : crawlSpecRecords) {
|
for (CrawlSpecRecord crawlSpec : crawlSpecRecords) {
|
||||||
if (workLog.isJobFinished(crawlSpec.domain()))
|
if (workLog.isJobFinished(crawlSpec.domain()))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
var task = new CrawlTask(
|
// Add to the end of the deferral list
|
||||||
|
deferredTasks.addLast(new CrawlTask(
|
||||||
crawlSpec,
|
crawlSpec,
|
||||||
anchorTagsSource,
|
anchorTagsSource,
|
||||||
outputDir,
|
outputDir,
|
||||||
warcArchiver,
|
warcArchiver,
|
||||||
domainStateDb,
|
domainStateDb,
|
||||||
workLog);
|
workLog));
|
||||||
|
|
||||||
if (pendingCrawlTasks.putIfAbsent(crawlSpec.domain(), task) == null) {
|
// Start every task we currently can from the deferral list
|
||||||
pool.submitQuietly(task);
|
deferredTasks.removeIf(task -> {
|
||||||
|
if (task.canRun()) {
|
||||||
|
if (pendingCrawlTasks.putIfAbsent(crawlSpec.domain(), task) != null) {
|
||||||
|
return true; // task has already run, duplicate in crawl specs
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// This blocks the caller when the pool is full
|
||||||
|
pool.submitQuietly(task);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// Schedule any lingering tasks for immediate execution
|
||||||
|
for (var task : deferredTasks) {
|
||||||
|
if (pendingCrawlTasks.putIfAbsent(task.domain, task) != null)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
pool.submitQuietly(task);
|
||||||
}
|
}
|
||||||
|
|
||||||
logger.info("Shutting down the pool, waiting for tasks to complete...");
|
logger.info("Shutting down the pool, waiting for tasks to complete...");
|
||||||
@@ -346,6 +368,12 @@ public class CrawlerMain extends ProcessMainClass {
|
|||||||
this.id = Integer.toHexString(domain.hashCode());
|
this.id = Integer.toHexString(domain.hashCode());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Best effort indicator whether we could start this now without getting stuck in
|
||||||
|
* DomainLocks purgatory */
|
||||||
|
public boolean canRun() {
|
||||||
|
return domainLocks.canLock(new EdgeDomain(domain));
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void run() throws Exception {
|
public void run() throws Exception {
|
||||||
|
|
||||||
@@ -494,7 +522,7 @@ public class CrawlerMain extends ProcessMainClass {
|
|||||||
//
|
//
|
||||||
// This must be synchronized as chewing through parquet files in parallel leads to enormous memory overhead
|
// This must be synchronized as chewing through parquet files in parallel leads to enormous memory overhead
|
||||||
private synchronized Path migrateParquetData(Path inputPath, String domain, Path crawlDataRoot) throws IOException {
|
private synchronized Path migrateParquetData(Path inputPath, String domain, Path crawlDataRoot) throws IOException {
|
||||||
if (!inputPath.endsWith(".parquet")) {
|
if (!inputPath.toString().endsWith(".parquet")) {
|
||||||
return inputPath;
|
return inputPath;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -251,6 +251,7 @@ public class HttpFetcherImpl implements HttpFetcher {
|
|||||||
return new SitemapRetriever();
|
return new SitemapRetriever();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Recursively fetch sitemaps */
|
||||||
@Override
|
@Override
|
||||||
public List<EdgeUrl> fetchSitemapUrls(String root, CrawlDelayTimer delayTimer) {
|
public List<EdgeUrl> fetchSitemapUrls(String root, CrawlDelayTimer delayTimer) {
|
||||||
try {
|
try {
|
||||||
@@ -270,7 +271,7 @@ public class HttpFetcherImpl implements HttpFetcher {
|
|||||||
while (!sitemapQueue.isEmpty() && ret.size() < 20_000 && ++fetchedSitemaps < 10) {
|
while (!sitemapQueue.isEmpty() && ret.size() < 20_000 && ++fetchedSitemaps < 10) {
|
||||||
var head = sitemapQueue.removeFirst();
|
var head = sitemapQueue.removeFirst();
|
||||||
|
|
||||||
switch (fetchSitemap(head)) {
|
switch (fetchSingleSitemap(head)) {
|
||||||
case SitemapResult.SitemapUrls(List<String> urls) -> {
|
case SitemapResult.SitemapUrls(List<String> urls) -> {
|
||||||
|
|
||||||
for (var url : urls) {
|
for (var url : urls) {
|
||||||
@@ -306,7 +307,7 @@ public class HttpFetcherImpl implements HttpFetcher {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private SitemapResult fetchSitemap(EdgeUrl sitemapUrl) throws URISyntaxException, IOException, InterruptedException {
|
private SitemapResult fetchSingleSitemap(EdgeUrl sitemapUrl) throws URISyntaxException, IOException, InterruptedException {
|
||||||
HttpRequest getRequest = HttpRequest.newBuilder()
|
HttpRequest getRequest = HttpRequest.newBuilder()
|
||||||
.GET()
|
.GET()
|
||||||
.uri(sitemapUrl.asURI())
|
.uri(sitemapUrl.asURI())
|
||||||
|
@@ -44,6 +44,14 @@ public class DomainLocks {
|
|||||||
return new Semaphore(2);
|
return new Semaphore(2);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public boolean canLock(EdgeDomain domain) {
|
||||||
|
Semaphore sem = locks.get(domain.topDomain.toLowerCase());
|
||||||
|
if (null == sem)
|
||||||
|
return true;
|
||||||
|
else
|
||||||
|
return sem.availablePermits() > 0;
|
||||||
|
}
|
||||||
|
|
||||||
public static class DomainLock implements AutoCloseable {
|
public static class DomainLock implements AutoCloseable {
|
||||||
private final String domainName;
|
private final String domainName;
|
||||||
private final Semaphore semaphore;
|
private final Semaphore semaphore;
|
||||||
|
@@ -42,18 +42,20 @@ public interface SerializableCrawlDataStream extends AutoCloseable {
|
|||||||
{
|
{
|
||||||
|
|
||||||
String fileName = fullPath.getFileName().toString();
|
String fileName = fullPath.getFileName().toString();
|
||||||
if (fileName.endsWith(".parquet")) {
|
|
||||||
|
if (fileName.endsWith(".slop.zip")) {
|
||||||
try {
|
try {
|
||||||
return new ParquetSerializableCrawlDataStream(fullPath);
|
return new SlopSerializableCrawlDataStream(fullPath);
|
||||||
} catch (Exception ex) {
|
} catch (Exception ex) {
|
||||||
logger.error("Error reading domain data from " + fullPath, ex);
|
logger.error("Error reading domain data from " + fullPath, ex);
|
||||||
return SerializableCrawlDataStream.empty();
|
return SerializableCrawlDataStream.empty();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (fileName.endsWith(".slop.zip")) {
|
else if (fileName.endsWith(".parquet")) {
|
||||||
|
logger.error("Opening deprecated parquet-style crawl data stream", new Exception());
|
||||||
try {
|
try {
|
||||||
return new SlopSerializableCrawlDataStream(fullPath);
|
return new ParquetSerializableCrawlDataStream(fullPath);
|
||||||
} catch (Exception ex) {
|
} catch (Exception ex) {
|
||||||
logger.error("Error reading domain data from " + fullPath, ex);
|
logger.error("Error reading domain data from " + fullPath, ex);
|
||||||
return SerializableCrawlDataStream.empty();
|
return SerializableCrawlDataStream.empty();
|
||||||
|
@@ -7,8 +7,7 @@ import java.util.Arrays;
|
|||||||
|
|
||||||
public enum SearchJsParameter {
|
public enum SearchJsParameter {
|
||||||
DEFAULT("default"),
|
DEFAULT("default"),
|
||||||
DENY_JS("no-js", "js:true"),
|
DENY_JS("no-js", "special:scripts");
|
||||||
REQUIRE_JS("yes-js", "js:false");
|
|
||||||
|
|
||||||
public final String value;
|
public final String value;
|
||||||
public final String[] implictExcludeSearchTerms;
|
public final String[] implictExcludeSearchTerms;
|
||||||
@@ -20,7 +19,6 @@ public enum SearchJsParameter {
|
|||||||
|
|
||||||
public static SearchJsParameter parse(@Nullable String value) {
|
public static SearchJsParameter parse(@Nullable String value) {
|
||||||
if (DENY_JS.value.equals(value)) return DENY_JS;
|
if (DENY_JS.value.equals(value)) return DENY_JS;
|
||||||
if (REQUIRE_JS.value.equals(value)) return REQUIRE_JS;
|
|
||||||
|
|
||||||
return DEFAULT;
|
return DEFAULT;
|
||||||
}
|
}
|
||||||
|
@@ -3,8 +3,10 @@ package nu.marginalia.search;
|
|||||||
import com.google.inject.Inject;
|
import com.google.inject.Inject;
|
||||||
import io.jooby.Context;
|
import io.jooby.Context;
|
||||||
import io.jooby.Jooby;
|
import io.jooby.Jooby;
|
||||||
|
import io.jooby.StatusCode;
|
||||||
import io.prometheus.client.Counter;
|
import io.prometheus.client.Counter;
|
||||||
import io.prometheus.client.Histogram;
|
import io.prometheus.client.Histogram;
|
||||||
|
import nu.marginalia.WebsiteUrl;
|
||||||
import nu.marginalia.search.svc.*;
|
import nu.marginalia.search.svc.*;
|
||||||
import nu.marginalia.service.discovery.property.ServicePartition;
|
import nu.marginalia.service.discovery.property.ServicePartition;
|
||||||
import nu.marginalia.service.server.BaseServiceParams;
|
import nu.marginalia.service.server.BaseServiceParams;
|
||||||
@@ -16,6 +18,7 @@ import java.util.List;
|
|||||||
|
|
||||||
public class SearchService extends JoobyService {
|
public class SearchService extends JoobyService {
|
||||||
|
|
||||||
|
private final WebsiteUrl websiteUrl;
|
||||||
private final SearchSiteSubscriptionService siteSubscriptionService;
|
private final SearchSiteSubscriptionService siteSubscriptionService;
|
||||||
|
|
||||||
private static final Logger logger = LoggerFactory.getLogger(SearchService.class);
|
private static final Logger logger = LoggerFactory.getLogger(SearchService.class);
|
||||||
@@ -33,6 +36,7 @@ public class SearchService extends JoobyService {
|
|||||||
|
|
||||||
@Inject
|
@Inject
|
||||||
public SearchService(BaseServiceParams params,
|
public SearchService(BaseServiceParams params,
|
||||||
|
WebsiteUrl websiteUrl,
|
||||||
SearchFrontPageService frontPageService,
|
SearchFrontPageService frontPageService,
|
||||||
SearchAddToCrawlQueueService addToCrawlQueueService,
|
SearchAddToCrawlQueueService addToCrawlQueueService,
|
||||||
SearchSiteSubscriptionService siteSubscriptionService,
|
SearchSiteSubscriptionService siteSubscriptionService,
|
||||||
@@ -51,6 +55,7 @@ public class SearchService extends JoobyService {
|
|||||||
new SearchAddToCrawlQueueService_(addToCrawlQueueService),
|
new SearchAddToCrawlQueueService_(addToCrawlQueueService),
|
||||||
new SearchBrowseService_(searchBrowseService)
|
new SearchBrowseService_(searchBrowseService)
|
||||||
));
|
));
|
||||||
|
this.websiteUrl = websiteUrl;
|
||||||
|
|
||||||
this.siteSubscriptionService = siteSubscriptionService;
|
this.siteSubscriptionService = siteSubscriptionService;
|
||||||
}
|
}
|
||||||
@@ -62,6 +67,10 @@ public class SearchService extends JoobyService {
|
|||||||
final String startTimeAttribute = "start-time";
|
final String startTimeAttribute = "start-time";
|
||||||
|
|
||||||
jooby.get("/export-opml", siteSubscriptionService::exportOpml);
|
jooby.get("/export-opml", siteSubscriptionService::exportOpml);
|
||||||
|
|
||||||
|
jooby.get("/site/https://*", this::handleSiteUrlRedirect);
|
||||||
|
jooby.get("/site/http://*", this::handleSiteUrlRedirect);
|
||||||
|
|
||||||
jooby.before((Context ctx) -> {
|
jooby.before((Context ctx) -> {
|
||||||
ctx.setAttribute(startTimeAttribute, System.nanoTime());
|
ctx.setAttribute(startTimeAttribute, System.nanoTime());
|
||||||
});
|
});
|
||||||
@@ -80,5 +89,19 @@ public class SearchService extends JoobyService {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Redirect handler for the case when the user passes
|
||||||
|
* an url like /site/https://example.com/, in this
|
||||||
|
* scenario we want to extract the domain name and redirect
|
||||||
|
* to /site/example.com/
|
||||||
|
*/
|
||||||
|
private Context handleSiteUrlRedirect(Context ctx) {
|
||||||
|
var pv = ctx.path("*").value();
|
||||||
|
int trailSlash = pv.indexOf('/');
|
||||||
|
if (trailSlash > 0) {
|
||||||
|
pv = pv.substring(0, trailSlash);
|
||||||
|
}
|
||||||
|
ctx.sendRedirect(StatusCode.TEMPORARY_REDIRECT, websiteUrl.withPath("site/" + pv));
|
||||||
|
return ctx;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@@ -7,9 +7,7 @@ import java.util.Arrays;
|
|||||||
|
|
||||||
public enum SearchJsParameter {
|
public enum SearchJsParameter {
|
||||||
DEFAULT("default"),
|
DEFAULT("default"),
|
||||||
DENY_JS("no-js", "js:true"),
|
DENY_JS("no-js", "special:scripts");
|
||||||
REQUIRE_JS("yes-js", "js:false");
|
|
||||||
|
|
||||||
public final String value;
|
public final String value;
|
||||||
public final String[] implictExcludeSearchTerms;
|
public final String[] implictExcludeSearchTerms;
|
||||||
|
|
||||||
@@ -20,7 +18,6 @@ public enum SearchJsParameter {
|
|||||||
|
|
||||||
public static SearchJsParameter parse(@Nullable String value) {
|
public static SearchJsParameter parse(@Nullable String value) {
|
||||||
if (DENY_JS.value.equals(value)) return DENY_JS;
|
if (DENY_JS.value.equals(value)) return DENY_JS;
|
||||||
if (REQUIRE_JS.value.equals(value)) return REQUIRE_JS;
|
|
||||||
|
|
||||||
return DEFAULT;
|
return DEFAULT;
|
||||||
}
|
}
|
||||||
|
@@ -86,8 +86,10 @@ public record SearchParameters(WebsiteUrl url,
|
|||||||
public String renderUrl() {
|
public String renderUrl() {
|
||||||
|
|
||||||
StringBuilder pathBuilder = new StringBuilder("/search?");
|
StringBuilder pathBuilder = new StringBuilder("/search?");
|
||||||
pathBuilder.append("query=").append(URLEncoder.encode(query, StandardCharsets.UTF_8));
|
|
||||||
|
|
||||||
|
if (query != null) {
|
||||||
|
pathBuilder.append("query=").append(URLEncoder.encode(query, StandardCharsets.UTF_8));
|
||||||
|
}
|
||||||
if (profile != SearchProfile.NO_FILTER) {
|
if (profile != SearchProfile.NO_FILTER) {
|
||||||
pathBuilder.append("&profile=").append(URLEncoder.encode(profile.filterId, StandardCharsets.UTF_8));
|
pathBuilder.append("&profile=").append(URLEncoder.encode(profile.filterId, StandardCharsets.UTF_8));
|
||||||
}
|
}
|
||||||
|
@@ -67,6 +67,10 @@ public class DecoratedSearchResults {
|
|||||||
return focusDomainId >= 0;
|
return focusDomainId >= 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public boolean isEmpty() {
|
||||||
|
return results.isEmpty();
|
||||||
|
}
|
||||||
|
|
||||||
public SearchFilters getFilters() {
|
public SearchFilters getFilters() {
|
||||||
return filters;
|
return filters;
|
||||||
}
|
}
|
||||||
|
@@ -81,6 +81,7 @@ public class SearchFilters {
|
|||||||
),
|
),
|
||||||
List.of(
|
List.of(
|
||||||
new Filter("Vintage", "fa-clock-rotate-left", SearchProfile.VINTAGE, parameters),
|
new Filter("Vintage", "fa-clock-rotate-left", SearchProfile.VINTAGE, parameters),
|
||||||
|
new Filter("Small Web", "fa-user-minus", SearchProfile.SMALLWEB, parameters),
|
||||||
new Filter("Plain Text", "fa-file", SearchProfile.PLAIN_TEXT, parameters),
|
new Filter("Plain Text", "fa-file", SearchProfile.PLAIN_TEXT, parameters),
|
||||||
new Filter("Tilde", "fa-house", SearchProfile.TILDE, parameters)
|
new Filter("Tilde", "fa-house", SearchProfile.TILDE, parameters)
|
||||||
),
|
),
|
||||||
|
@@ -56,7 +56,9 @@ public class SearchQueryService {
|
|||||||
}
|
}
|
||||||
catch (Exception ex) {
|
catch (Exception ex) {
|
||||||
logger.error("Error", ex);
|
logger.error("Error", ex);
|
||||||
return errorPageService.serveError(SearchParameters.defaultsForQuery(websiteUrl, query, page));
|
return errorPageService.serveError(
|
||||||
|
SearchParameters.defaultsForQuery(websiteUrl, query, Objects.requireNonNullElse(page, 1))
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -9,6 +9,14 @@
|
|||||||
nicotine: '#f8f8ee',
|
nicotine: '#f8f8ee',
|
||||||
margeblue: '#3e5f6f',
|
margeblue: '#3e5f6f',
|
||||||
liteblue: '#0066cc',
|
liteblue: '#0066cc',
|
||||||
|
},
|
||||||
|
screens: {
|
||||||
|
'coarsepointer': {
|
||||||
|
'raw': '(pointer: coarse)'
|
||||||
|
},
|
||||||
|
'finepointer': {
|
||||||
|
'raw': '(pointer: fine)'
|
||||||
|
},
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
screens: {
|
screens: {
|
||||||
|
@@ -23,7 +23,7 @@
|
|||||||
@template.serp.part.searchform(query = results.getParams().query(), profile = results.getProfile(), filters = results.getFilters())
|
@template.serp.part.searchform(query = results.getParams().query(), profile = results.getProfile(), filters = results.getFilters())
|
||||||
</div>
|
</div>
|
||||||
<div class="grow"></div>
|
<div class="grow"></div>
|
||||||
<button class="fixed bottom-10 right-5 sm:hidden text-sm bg-margeblue text-white p-4 rounded-xl active:text-slate-200" id="filter-button">
|
<button class="fixed bottom-10 right-5 finepointer:hidden md:hidden text-sm bg-margeblue text-white p-4 rounded-xl active:text-slate-200" id="filter-button">
|
||||||
<i class="fas fa-filter mr-3"></i>
|
<i class="fas fa-filter mr-3"></i>
|
||||||
Filters
|
Filters
|
||||||
</button>
|
</button>
|
||||||
@@ -44,6 +44,11 @@
|
|||||||
<div class="grow"></div>
|
<div class="grow"></div>
|
||||||
<a href="${results.getParams().renderUrlWithoutSiteFocus()}" class="fa fa-remove"></a>
|
<a href="${results.getParams().renderUrlWithoutSiteFocus()}" class="fa fa-remove"></a>
|
||||||
</div>
|
</div>
|
||||||
|
@elseif (results.isEmpty())
|
||||||
|
<div class="border dark:border-gray-600 rounded flex space-x-4 bg-white dark:bg-gray-800 text-gray-600 dark:text-gray-100 text-sm p-4 items-center">
|
||||||
|
No search results found. Try different search terms, or spelling variations. The search engine currently
|
||||||
|
only supports queries in the English language.
|
||||||
|
</div>
|
||||||
@endif
|
@endif
|
||||||
|
|
||||||
<div class="space-y-4 sm:space-y-6">
|
<div class="space-y-4 sm:space-y-6">
|
||||||
|
@@ -3,7 +3,7 @@
|
|||||||
|
|
||||||
@param SearchFilters filters
|
@param SearchFilters filters
|
||||||
|
|
||||||
<aside class="md:w-64 py-4 shrink-0 hidden sm:block">
|
<aside class="md:w-64 py-4 shrink-0 hidden md:block finepointer:block">
|
||||||
<div class="space-y-6 sticky top-4">
|
<div class="space-y-6 sticky top-4">
|
||||||
<div class="bg-white dark:bg-gray-800 p-4 border dark:border-gray-600 border-gray-300">
|
<div class="bg-white dark:bg-gray-800 p-4 border dark:border-gray-600 border-gray-300">
|
||||||
<h2 class="font-medium mb-3 flex items-center font-serif hidden md:block">
|
<h2 class="font-medium mb-3 flex items-center font-serif hidden md:block">
|
||||||
|
@@ -9,6 +9,14 @@ module.exports = {
|
|||||||
nicotine: '#f8f8ee',
|
nicotine: '#f8f8ee',
|
||||||
margeblue: '#3e5f6f',
|
margeblue: '#3e5f6f',
|
||||||
liteblue: '#0066cc',
|
liteblue: '#0066cc',
|
||||||
|
},
|
||||||
|
screens: {
|
||||||
|
'coarsepointer': {
|
||||||
|
'raw': '(pointer: coarse)'
|
||||||
|
},
|
||||||
|
'finepointer': {
|
||||||
|
'raw': '(pointer: fine)'
|
||||||
|
},
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
screens: {
|
screens: {
|
||||||
|
@@ -23,7 +23,12 @@ apply from: "$rootProject.projectDir/srcsets.gradle"
|
|||||||
apply from: "$rootProject.projectDir/docker.gradle"
|
apply from: "$rootProject.projectDir/docker.gradle"
|
||||||
|
|
||||||
dependencies {
|
dependencies {
|
||||||
implementation project(':third-party:symspell')
|
|
||||||
|
|
||||||
|
implementation project(':code:common:db')
|
||||||
|
implementation project(':code:common:model')
|
||||||
|
implementation project(':code:common:service')
|
||||||
|
implementation project(':code:common:config')
|
||||||
|
|
||||||
implementation project(':code:functions:live-capture')
|
implementation project(':code:functions:live-capture')
|
||||||
implementation project(':code:functions:live-capture:api')
|
implementation project(':code:functions:live-capture:api')
|
||||||
@@ -32,20 +37,16 @@ dependencies {
|
|||||||
implementation project(':code:functions:domain-info')
|
implementation project(':code:functions:domain-info')
|
||||||
implementation project(':code:functions:domain-info:api')
|
implementation project(':code:functions:domain-info:api')
|
||||||
|
|
||||||
implementation project(':code:common:config')
|
|
||||||
implementation project(':code:common:service')
|
|
||||||
implementation project(':code:common:model')
|
|
||||||
implementation project(':code:common:db')
|
|
||||||
|
|
||||||
implementation project(':code:features-search:screenshots')
|
|
||||||
|
|
||||||
implementation project(':code:libraries:geo-ip')
|
implementation project(':code:libraries:geo-ip')
|
||||||
implementation project(':code:libraries:language-processing')
|
implementation project(':code:libraries:language-processing')
|
||||||
implementation project(':code:libraries:term-frequency-dict')
|
implementation project(':code:libraries:term-frequency-dict')
|
||||||
|
|
||||||
implementation libs.bundles.slf4j
|
implementation project(':third-party:symspell')
|
||||||
|
|
||||||
|
|
||||||
|
implementation libs.bundles.slf4j
|
||||||
implementation libs.prometheus
|
implementation libs.prometheus
|
||||||
|
implementation libs.commons.io
|
||||||
implementation libs.guava
|
implementation libs.guava
|
||||||
libs.bundles.grpc.get().each {
|
libs.bundles.grpc.get().each {
|
||||||
implementation dependencies.create(it) {
|
implementation dependencies.create(it) {
|
||||||
@@ -59,9 +60,7 @@ dependencies {
|
|||||||
implementation dependencies.create(libs.guice.get()) {
|
implementation dependencies.create(libs.guice.get()) {
|
||||||
exclude group: 'com.google.guava'
|
exclude group: 'com.google.guava'
|
||||||
}
|
}
|
||||||
implementation dependencies.create(libs.spark.get()) {
|
implementation libs.bundles.jooby
|
||||||
exclude group: 'org.eclipse.jetty'
|
|
||||||
}
|
|
||||||
implementation libs.bundles.jetty
|
implementation libs.bundles.jetty
|
||||||
implementation libs.opencsv
|
implementation libs.opencsv
|
||||||
implementation libs.trove
|
implementation libs.trove
|
||||||
|
@@ -3,6 +3,8 @@ package nu.marginalia.assistant;
|
|||||||
import com.google.inject.Guice;
|
import com.google.inject.Guice;
|
||||||
import com.google.inject.Inject;
|
import com.google.inject.Inject;
|
||||||
import com.google.inject.Injector;
|
import com.google.inject.Injector;
|
||||||
|
import io.jooby.ExecutionMode;
|
||||||
|
import io.jooby.Jooby;
|
||||||
import nu.marginalia.livecapture.LivecaptureModule;
|
import nu.marginalia.livecapture.LivecaptureModule;
|
||||||
import nu.marginalia.service.MainClass;
|
import nu.marginalia.service.MainClass;
|
||||||
import nu.marginalia.service.ServiceId;
|
import nu.marginalia.service.ServiceId;
|
||||||
@@ -38,8 +40,17 @@ public class AssistantMain extends MainClass {
|
|||||||
var configuration = injector.getInstance(ServiceConfiguration.class);
|
var configuration = injector.getInstance(ServiceConfiguration.class);
|
||||||
orchestrateBoot(registry, configuration);
|
orchestrateBoot(registry, configuration);
|
||||||
|
|
||||||
injector.getInstance(AssistantMain.class);
|
var main = injector.getInstance(AssistantMain.class);
|
||||||
injector.getInstance(Initialization.class).setReady();
|
injector.getInstance(Initialization.class).setReady();
|
||||||
|
|
||||||
|
Jooby.runApp(new String[] { "application.env=prod" }, ExecutionMode.WORKER, () -> new Jooby() {
|
||||||
|
{
|
||||||
|
main.start(this);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
public void start(Jooby jooby) {
|
||||||
|
service.startJooby(jooby);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -2,27 +2,27 @@ package nu.marginalia.assistant;
|
|||||||
|
|
||||||
import com.google.gson.Gson;
|
import com.google.gson.Gson;
|
||||||
import com.google.inject.Inject;
|
import com.google.inject.Inject;
|
||||||
|
import io.jooby.Context;
|
||||||
|
import io.jooby.Jooby;
|
||||||
import nu.marginalia.assistant.suggest.Suggestions;
|
import nu.marginalia.assistant.suggest.Suggestions;
|
||||||
import nu.marginalia.functions.domains.DomainInfoGrpcService;
|
import nu.marginalia.functions.domains.DomainInfoGrpcService;
|
||||||
import nu.marginalia.functions.math.MathGrpcService;
|
import nu.marginalia.functions.math.MathGrpcService;
|
||||||
import nu.marginalia.livecapture.LiveCaptureGrpcService;
|
import nu.marginalia.livecapture.LiveCaptureGrpcService;
|
||||||
import nu.marginalia.model.gson.GsonFactory;
|
import nu.marginalia.model.gson.GsonFactory;
|
||||||
import nu.marginalia.rss.svc.FeedsGrpcService;
|
import nu.marginalia.rss.svc.FeedsGrpcService;
|
||||||
import nu.marginalia.screenshot.ScreenshotService;
|
|
||||||
import nu.marginalia.service.discovery.property.ServicePartition;
|
import nu.marginalia.service.discovery.property.ServicePartition;
|
||||||
import nu.marginalia.service.server.BaseServiceParams;
|
import nu.marginalia.service.server.BaseServiceParams;
|
||||||
import nu.marginalia.service.server.SparkService;
|
import nu.marginalia.service.server.JoobyService;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
import spark.Request;
|
|
||||||
import spark.Response;
|
|
||||||
import spark.Spark;
|
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
public class AssistantService extends SparkService {
|
public class AssistantService extends JoobyService {
|
||||||
private final Logger logger = LoggerFactory.getLogger(getClass());
|
private final Logger logger = LoggerFactory.getLogger(getClass());
|
||||||
private final Gson gson = GsonFactory.get();
|
private final Gson gson = GsonFactory.get();
|
||||||
|
@org.jetbrains.annotations.NotNull
|
||||||
|
private final ScreenshotService screenshotService;
|
||||||
private final Suggestions suggestions;
|
private final Suggestions suggestions;
|
||||||
|
|
||||||
@Inject
|
@Inject
|
||||||
@@ -39,30 +39,30 @@ public class AssistantService extends SparkService {
|
|||||||
List.of(domainInfoGrpcService,
|
List.of(domainInfoGrpcService,
|
||||||
mathGrpcService,
|
mathGrpcService,
|
||||||
liveCaptureGrpcService,
|
liveCaptureGrpcService,
|
||||||
feedsGrpcService));
|
feedsGrpcService),
|
||||||
|
List.of());
|
||||||
|
this.screenshotService = screenshotService;
|
||||||
|
|
||||||
this.suggestions = suggestions;
|
this.suggestions = suggestions;
|
||||||
|
|
||||||
Spark.staticFiles.expireTime(600);
|
|
||||||
|
|
||||||
Spark.get("/screenshot/:id", screenshotService::serveScreenshotRequest);
|
|
||||||
Spark.get("/suggest/", this::getSuggestions, this::convertToJson);
|
|
||||||
|
|
||||||
Spark.awaitInitialization();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private Object getSuggestions(Request request, Response response) {
|
public void startJooby(Jooby jooby) {
|
||||||
response.type("application/json");
|
super.startJooby(jooby);
|
||||||
var param = request.queryParams("partial");
|
|
||||||
if (param == null) {
|
jooby.get("/suggest/", this::getSuggestions);
|
||||||
|
jooby.get("/screenshot/{id}", screenshotService::serveScreenshotRequest);
|
||||||
|
}
|
||||||
|
|
||||||
|
private String getSuggestions(Context context) {
|
||||||
|
context.setResponseType("application/json");
|
||||||
|
var param = context.query("partial");
|
||||||
|
if (param.isMissing()) {
|
||||||
logger.warn("Bad parameter, partial is null");
|
logger.warn("Bad parameter, partial is null");
|
||||||
Spark.halt(500);
|
context.setResponseCode(500);
|
||||||
|
return "{}";
|
||||||
}
|
}
|
||||||
return suggestions.getSuggestions(10, param);
|
return gson.toJson(suggestions.getSuggestions(10, param.value()));
|
||||||
}
|
|
||||||
|
|
||||||
private String convertToJson(Object o) {
|
|
||||||
return gson.toJson(o);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@@ -0,0 +1,118 @@
|
|||||||
|
package nu.marginalia.assistant;
|
||||||
|
|
||||||
|
import com.google.common.base.Strings;
|
||||||
|
import com.google.inject.Inject;
|
||||||
|
import com.zaxxer.hikari.HikariDataSource;
|
||||||
|
import io.jooby.Context;
|
||||||
|
import nu.marginalia.db.DbDomainQueries;
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.sql.SQLException;
|
||||||
|
|
||||||
|
public class ScreenshotService {
|
||||||
|
|
||||||
|
private final DbDomainQueries domainQueries;
|
||||||
|
private final HikariDataSource dataSource;
|
||||||
|
|
||||||
|
private final Logger logger = LoggerFactory.getLogger(getClass());
|
||||||
|
|
||||||
|
@Inject
|
||||||
|
public ScreenshotService(DbDomainQueries dbDomainQueries, HikariDataSource dataSource) {
|
||||||
|
this.domainQueries = dbDomainQueries;
|
||||||
|
this.dataSource = dataSource;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean hasScreenshot(int domainId) {
|
||||||
|
try (var conn = dataSource.getConnection();
|
||||||
|
var ps = conn.prepareStatement("""
|
||||||
|
SELECT TRUE
|
||||||
|
FROM DATA_DOMAIN_SCREENSHOT
|
||||||
|
INNER JOIN EC_DOMAIN ON EC_DOMAIN.DOMAIN_NAME=DATA_DOMAIN_SCREENSHOT.DOMAIN_NAME
|
||||||
|
WHERE EC_DOMAIN.ID=?
|
||||||
|
""")) {
|
||||||
|
ps.setInt(1, domainId);
|
||||||
|
var rs = ps.executeQuery();
|
||||||
|
if (rs.next()) {
|
||||||
|
return rs.getBoolean(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catch (SQLException ex) {
|
||||||
|
logger.warn("SQL error", ex);
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Object serveScreenshotRequest(Context context) {
|
||||||
|
if (Strings.isNullOrEmpty(context.path("id").value(""))) {
|
||||||
|
context.setResponseCode(404);
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
|
int id = context.path("id").intValue();
|
||||||
|
|
||||||
|
try (var conn = dataSource.getConnection();
|
||||||
|
var ps = conn.prepareStatement("""
|
||||||
|
SELECT CONTENT_TYPE, DATA
|
||||||
|
FROM DATA_DOMAIN_SCREENSHOT
|
||||||
|
INNER JOIN EC_DOMAIN ON EC_DOMAIN.DOMAIN_NAME=DATA_DOMAIN_SCREENSHOT.DOMAIN_NAME
|
||||||
|
WHERE EC_DOMAIN.ID=?
|
||||||
|
""")) {
|
||||||
|
ps.setInt(1, id);
|
||||||
|
var rsp = ps.executeQuery();
|
||||||
|
if (rsp.next()) {
|
||||||
|
context.setResponseType(rsp.getString(1));
|
||||||
|
context.setResponseCode(200);
|
||||||
|
context.setResponseHeader("Cache-control", "public,max-age=3600");
|
||||||
|
|
||||||
|
try (var rs = context.responseStream()) {
|
||||||
|
IOUtils.copy(rsp.getBlob(2).getBinaryStream(), rs);
|
||||||
|
}
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catch (IOException ex) {
|
||||||
|
logger.warn("IO error", ex);
|
||||||
|
}
|
||||||
|
catch (SQLException ex) {
|
||||||
|
logger.warn("SQL error", ex);
|
||||||
|
}
|
||||||
|
|
||||||
|
context.setResponseType("image/svg+xml");
|
||||||
|
|
||||||
|
var name = domainQueries.getDomain(id).map(Object::toString)
|
||||||
|
.orElse("[Screenshot Not Yet Captured]");
|
||||||
|
|
||||||
|
return """
|
||||||
|
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||||
|
<svg
|
||||||
|
xmlns="http://www.w3.org/2000/svg"
|
||||||
|
width="640px"
|
||||||
|
height="480px"
|
||||||
|
viewBox="0 0 640 480"
|
||||||
|
version="1.1">
|
||||||
|
<g>
|
||||||
|
<rect
|
||||||
|
style="fill:#808080"
|
||||||
|
id="rect288"
|
||||||
|
width="595.41992"
|
||||||
|
height="430.01825"
|
||||||
|
x="23.034981"
|
||||||
|
y="27.850344" />
|
||||||
|
<text
|
||||||
|
xml:space="preserve"
|
||||||
|
style="font-size:100px;fill:#909090;font-family:sans-serif;"
|
||||||
|
x="20"
|
||||||
|
y="120">Placeholder</text>
|
||||||
|
<text
|
||||||
|
xml:space="preserve"
|
||||||
|
style="font-size:32px;fill:#000000;font-family:monospace;"
|
||||||
|
x="320" y="240" dominant-baseline="middle" text-anchor="middle">%s</text>
|
||||||
|
</g>
|
||||||
|
</svg>
|
||||||
|
""".formatted(name);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@@ -160,12 +160,12 @@ dependencyResolutionManagement {
|
|||||||
library('prometheus-server', 'io.prometheus', 'simpleclient_httpserver').version('0.16.0')
|
library('prometheus-server', 'io.prometheus', 'simpleclient_httpserver').version('0.16.0')
|
||||||
library('prometheus-hotspot', 'io.prometheus', 'simpleclient_hotspot').version('0.16.0')
|
library('prometheus-hotspot', 'io.prometheus', 'simpleclient_hotspot').version('0.16.0')
|
||||||
|
|
||||||
library('slf4j.api', 'org.slf4j', 'slf4j-api').version('1.7.36')
|
library('slf4j.api', 'org.slf4j', 'slf4j-api').version('2.0.3')
|
||||||
library('slf4j.jdk14', 'org.slf4j', 'slf4j-jdk14').version('2.0.3')
|
library('slf4j.jdk14', 'org.slf4j', 'slf4j-jdk14').version('2.0.3')
|
||||||
|
|
||||||
library('log4j.api', 'org.apache.logging.log4j', 'log4j-api').version('2.17.2')
|
library('log4j.api', 'org.apache.logging.log4j', 'log4j-api').version('2.24.3')
|
||||||
library('log4j.core', 'org.apache.logging.log4j', 'log4j-core').version('2.17.2')
|
library('log4j.core', 'org.apache.logging.log4j', 'log4j-core').version('2.24.3')
|
||||||
library('log4j.slf4j', 'org.apache.logging.log4j', 'log4j-slf4j-impl').version('2.17.2')
|
library('log4j.slf4j', 'org.apache.logging.log4j', 'log4j-slf4j2-impl').version('2.24.3')
|
||||||
|
|
||||||
library('notnull','org.jetbrains','annotations').version('24.0.0')
|
library('notnull','org.jetbrains','annotations').version('24.0.0')
|
||||||
|
|
||||||
@@ -239,6 +239,7 @@ dependencyResolutionManagement {
|
|||||||
library('jooby-jte','io.jooby','jooby-jte').version(joobyVersion)
|
library('jooby-jte','io.jooby','jooby-jte').version(joobyVersion)
|
||||||
library('jooby-apt','io.jooby','jooby-apt').version(joobyVersion)
|
library('jooby-apt','io.jooby','jooby-apt').version(joobyVersion)
|
||||||
|
|
||||||
|
library('wiremock', 'org.wiremock','wiremock').version('3.11.0')
|
||||||
library('jte','gg.jte','jte').version('3.1.15')
|
library('jte','gg.jte','jte').version('3.1.15')
|
||||||
|
|
||||||
bundle('jetty', ['jetty-server', 'jetty-util', 'jetty-servlet'])
|
bundle('jetty', ['jetty-server', 'jetty-util', 'jetty-servlet'])
|
||||||
|
Reference in New Issue
Block a user