1
1
mirror of https://github.com/MarginaliaSearch/MarginaliaSearch.git synced 2025-10-05 21:22:39 +02:00

Compare commits

...

16 Commits

Author SHA1 Message Date
Viktor Lofgren
20ca41ec95 (processed model) Use String columns instead of Txt columns for SlopDocumentRecord
It's very likely TxtStringColumn is the culprit of the bug seen in https://github.com/MarginaliaSearch/MarginaliaSearch/issues/154 where the wrong URL was shown for a search result.
2025-02-24 11:41:51 +01:00
Viktor Lofgren
7671f0d9e4 (search) Display message when no search results are found 2025-02-24 11:15:55 +01:00
Viktor Lofgren
44d6bc71b7 (assistant) Migrate to Jooby framework 2025-02-15 13:28:12 +01:00
Viktor Lofgren
9d302e2973 (assistant) Migrate to Jooby framework 2025-02-15 13:26:04 +01:00
Viktor Lofgren
f553701224 (assistant) Migrate to Jooby framework 2025-02-15 13:21:48 +01:00
Viktor Lofgren
f076d05595 (deps) Upgrade slf4j to latest 2025-02-15 12:50:16 +01:00
Viktor Lofgren
b513809710 (*) Stopgap fix for metrics server initialization errors bringing down services 2025-02-14 17:09:48 +01:00
Viktor Lofgren
7519b28e21 (search) Correct exception from misbehaving bots feeding invalid urls 2025-02-14 17:05:24 +01:00
Viktor Lofgren
3eac4dd57f (search) Correct exception in error handler when page is missing 2025-02-14 17:00:21 +01:00
Viktor Lofgren
4c2810720a (search) Add redirect handler for full URLs in the /site endpoint 2025-02-14 16:31:11 +01:00
Viktor Lofgren
8480ba8daa (live-capture) Code cleanup 2025-02-04 14:05:36 +01:00
Viktor Lofgren
fbba392491 (live-capture) Send a UA-string from the browserless fetcher as well
The change also introduces a somewhat convoluted wiremock test to intercept and verify that these headers are in fact sent
2025-02-04 13:36:49 +01:00
Viktor Lofgren
530eb35949 (update-rss) Do not fail the feed fetcher control actor if it takes a long time to complete. 2025-02-03 11:35:32 +01:00
Viktor Lofgren
c2dd2175a2 (search) Add new query expansion rule contracting WORD NUM pairs into WORD-NUM and WORDNUM 2025-02-01 13:13:30 +01:00
Viktor Lofgren
b8581b0f56 (crawler) Safe sanitization of headers during warc->slop conversion
The warc->slop converter was rejecting some items because they had headers that were representable in the Warc code's MessageHeader map implementation, but illegal in the HttpHeaders' implementation.

Fixing this by manually filtering these out.  Ostensibly the constructor has a filtering predicate, but this annoyingly runs too late and fails to prevent the problem.
2025-01-31 12:47:42 +01:00
Viktor Lofgren
2ea34767d8 (crawler) Use the response URL when resolving relative links
The crawler was incorrectly using the request URL as the base URL when resolving relative links.  This caused problems when encountering redirects.

 For example if we fetch /log, redirecting to  /log/ and find links to foo/, and bar/; these would resolve to /foo and /bar, and not /log/foo and /log/bar.
2025-01-31 12:40:13 +01:00
22 changed files with 355 additions and 80 deletions

View File

@@ -6,6 +6,7 @@ import nu.marginalia.service.ServiceId;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.net.InetAddress;
import java.net.NetworkInterface;
import java.util.Enumeration;
@@ -115,7 +116,7 @@ public class ServiceConfigurationModule extends AbstractModule {
}
}
public static String getLocalNetworkIP() throws Exception {
public static String getLocalNetworkIP() throws IOException {
Enumeration<NetworkInterface> nets = NetworkInterface.getNetworkInterfaces();
while (nets.hasMoreElements()) {

View File

@@ -15,6 +15,7 @@ import org.slf4j.LoggerFactory;
import org.slf4j.Marker;
import org.slf4j.MarkerFactory;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.List;
@@ -106,9 +107,12 @@ public class JoobyService {
config.externalAddress());
// FIXME: This won't work outside of docker, may need to submit a PR to jooby to allow classpaths here
jooby.install(new JteModule(Path.of("/app/resources/jte"), Path.of("/app/classes/jte-precompiled")));
jooby.assets("/*", Paths.get("/app/resources/static"));
if (Files.exists(Path.of("/app/resources/jte")) || Files.exists(Path.of("/app/classes/jte-precompiled"))) {
jooby.install(new JteModule(Path.of("/app/resources/jte"), Path.of("/app/classes/jte-precompiled")));
}
if (Files.exists(Path.of("/app/resources/static"))) {
jooby.assets("/*", Paths.get("/app/resources/static"));
}
var options = new ServerOptions();
options.setHost(config.bindAddress());
options.setPort(restEndpoint.port());

View File

@@ -6,25 +6,34 @@ import nu.marginalia.service.module.ServiceConfiguration;
import org.eclipse.jetty.server.Server;
import org.eclipse.jetty.servlet.ServletContextHandler;
import org.eclipse.jetty.servlet.ServletHolder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.net.InetSocketAddress;
public class MetricsServer {
private static Logger logger = LoggerFactory.getLogger(MetricsServer.class);
@Inject
public MetricsServer(ServiceConfiguration configuration) throws Exception {
public MetricsServer(ServiceConfiguration configuration) {
// If less than zero, we forego setting up a metrics server
if (configuration.metricsPort() < 0)
return;
Server server = new Server(new InetSocketAddress(configuration.bindAddress(), configuration.metricsPort()));
try {
Server server = new Server(new InetSocketAddress(configuration.bindAddress(), configuration.metricsPort()));
ServletContextHandler context = new ServletContextHandler();
context.setContextPath("/");
server.setHandler(context);
ServletContextHandler context = new ServletContextHandler();
context.setContextPath("/");
server.setHandler(context);
context.addServlet(new ServletHolder(new MetricsServlet()), "/metrics");
context.addServlet(new ServletHolder(new MetricsServlet()), "/metrics");
server.start();
server.start();
}
catch (Exception|NoSuchMethodError ex) {
logger.error("Failed to set up metrics server", ex);
}
}
}

View File

@@ -14,6 +14,8 @@ import nu.marginalia.mq.persistence.MqPersistence;
import nu.marginalia.nodecfg.NodeConfigurationService;
import nu.marginalia.nodecfg.model.NodeProfile;
import nu.marginalia.service.module.ServiceConfiguration;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.time.Duration;
import java.time.LocalDateTime;
@@ -29,6 +31,7 @@ public class UpdateRssActor extends RecordActorPrototype {
private final NodeConfigurationService nodeConfigurationService;
private final MqPersistence persistence;
private static final Logger logger = LoggerFactory.getLogger(UpdateRssActor.class);
@Inject
public UpdateRssActor(Gson gson,
@@ -101,8 +104,8 @@ public class UpdateRssActor extends RecordActorPrototype {
case UpdateRefresh(int count, long msgId) -> {
MqMessage msg = persistence.waitForMessageTerminalState(msgId, Duration.ofSeconds(10), Duration.ofHours(12));
if (msg == null) {
// Retry the update
yield new Error("Failed to update feeds: message not found");
logger.warn("UpdateRefresh is taking a very long time");
yield new UpdateRefresh(count, msgId);
} else if (msg.state() != MqMessageState.OK) {
// Retry the update
yield new Error("Failed to update feeds: " + msg.state());
@@ -119,8 +122,8 @@ public class UpdateRssActor extends RecordActorPrototype {
case UpdateClean(long msgId) -> {
MqMessage msg = persistence.waitForMessageTerminalState(msgId, Duration.ofSeconds(10), Duration.ofHours(12));
if (msg == null) {
// Retry the update
yield new Error("Failed to update feeds: message not found");
logger.warn("UpdateClean is taking a very long time");
yield new UpdateClean(msgId);
} else if (msg.state() != MqMessageState.OK) {
// Retry the update
yield new Error("Failed to update feeds: " + msg.state());

View File

@@ -34,6 +34,7 @@ dependencies {
implementation libs.bundles.slf4j
implementation libs.commons.lang3
implementation libs.commons.io
implementation libs.wiremock
implementation libs.prometheus
implementation libs.guava

View File

@@ -1,6 +1,7 @@
package nu.marginalia.livecapture;
import com.google.gson.Gson;
import nu.marginalia.WmsaHome;
import nu.marginalia.model.gson.GsonFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -12,6 +13,7 @@ import java.net.http.HttpRequest;
import java.net.http.HttpResponse;
import java.time.Duration;
import java.util.Map;
import java.util.Optional;
/** Client for local browserless.io API */
public class BrowserlessClient implements AutoCloseable {
@@ -27,13 +29,16 @@ public class BrowserlessClient implements AutoCloseable {
private final URI browserlessURI;
private final Gson gson = GsonFactory.get();
private final String userAgent = WmsaHome.getUserAgent().uaString();
public BrowserlessClient(URI browserlessURI) {
this.browserlessURI = browserlessURI;
}
public String content(String url, GotoOptions gotoOptions) throws IOException, InterruptedException {
public Optional<String> content(String url, GotoOptions gotoOptions) throws IOException, InterruptedException {
Map<String, Object> requestData = Map.of(
"url", url,
"userAgent", userAgent,
"gotoOptions", gotoOptions
);
@@ -49,10 +54,10 @@ public class BrowserlessClient implements AutoCloseable {
if (rsp.statusCode() >= 300) {
logger.info("Failed to fetch content for {}, status {}", url, rsp.statusCode());
return null;
return Optional.empty();
}
return rsp.body();
return Optional.of(rsp.body());
}
public byte[] screenshot(String url, GotoOptions gotoOptions, ScreenshotOptions screenshotOptions)
@@ -60,6 +65,7 @@ public class BrowserlessClient implements AutoCloseable {
Map<String, Object> requestData = Map.of(
"url", url,
"userAgent", userAgent,
"options", screenshotOptions,
"gotoOptions", gotoOptions
);
@@ -84,7 +90,7 @@ public class BrowserlessClient implements AutoCloseable {
}
@Override
public void close() throws Exception {
public void close() {
httpClient.shutdownNow();
}

View File

@@ -1,5 +1,9 @@
package nu.marginalia.livecapture;
import com.github.tomakehurst.wiremock.WireMockServer;
import com.github.tomakehurst.wiremock.core.WireMockConfiguration;
import nu.marginalia.WmsaHome;
import nu.marginalia.service.module.ServiceConfigurationModule;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Tag;
@@ -8,34 +12,86 @@ import org.testcontainers.containers.GenericContainer;
import org.testcontainers.junit.jupiter.Testcontainers;
import org.testcontainers.utility.DockerImageName;
import java.io.IOException;
import java.net.URI;
import java.util.Map;
import static com.github.tomakehurst.wiremock.client.WireMock.*;
@Testcontainers
@Tag("slow")
public class BrowserlessClientTest {
static GenericContainer<?> container = new GenericContainer<>(DockerImageName.parse("browserless/chrome"))
.withEnv(Map.of("TOKEN", "BROWSERLESS_TOKEN"))
.withNetworkMode("bridge")
.withExposedPorts(3000);
static WireMockServer wireMockServer =
new WireMockServer(WireMockConfiguration.wireMockConfig()
.port(18089));
static String localIp;
static URI browserlessURI;
@BeforeAll
public static void setup() {
public static void setup() throws IOException {
container.start();
browserlessURI = URI.create(String.format("http://%s:%d/",
container.getHost(),
container.getMappedPort(3000))
);
wireMockServer.start();
wireMockServer.stubFor(get("/").willReturn(aResponse().withStatus(200).withBody("Ok")));
localIp = ServiceConfigurationModule.getLocalNetworkIP();
}
@Tag("flaky")
@Test
public void testInspectContentUA__Flaky() throws Exception {
try (var client = new BrowserlessClient(browserlessURI)) {
client.content("http://" + localIp + ":18089/",
BrowserlessClient.GotoOptions.defaultValues()
);
}
wireMockServer.verify(getRequestedFor(urlEqualTo("/")).withHeader("User-Agent", equalTo(WmsaHome.getUserAgent().uaString())));
}
@Tag("flaky")
@Test
public void testInspectScreenshotUA__Flaky() throws Exception {
try (var client = new BrowserlessClient(browserlessURI)) {
client.screenshot("http://" + localIp + ":18089/",
BrowserlessClient.GotoOptions.defaultValues(),
BrowserlessClient.ScreenshotOptions.defaultValues()
);
}
wireMockServer.verify(getRequestedFor(urlEqualTo("/")).withHeader("User-Agent", equalTo(WmsaHome.getUserAgent().uaString())));
}
@Test
public void testContent() throws Exception {
try (var client = new BrowserlessClient(URI.create("http://" + container.getHost() + ":" + container.getMappedPort(3000)))) {
var content = client.content("https://www.marginalia.nu/", BrowserlessClient.GotoOptions.defaultValues());
Assertions.assertNotNull(content, "Content should not be null");
try (var client = new BrowserlessClient(browserlessURI)) {
var content = client.content("https://www.marginalia.nu/", BrowserlessClient.GotoOptions.defaultValues()).orElseThrow();
Assertions.assertFalse(content.isBlank(), "Content should not be empty");
}
}
@Test
public void testScreenshot() throws Exception {
try (var client = new BrowserlessClient(URI.create("http://" + container.getHost() + ":" + container.getMappedPort(3000)))) {
var screenshot = client.screenshot("https://www.marginalia.nu/", BrowserlessClient.GotoOptions.defaultValues(), BrowserlessClient.ScreenshotOptions.defaultValues());
try (var client = new BrowserlessClient(browserlessURI)) {
var screenshot = client.screenshot("https://www.marginalia.nu/",
BrowserlessClient.GotoOptions.defaultValues(),
BrowserlessClient.ScreenshotOptions.defaultValues());
Assertions.assertNotNull(screenshot, "Screenshot should not be null");
}
}

View File

@@ -134,6 +134,10 @@ public class QueryExpansion {
if (scoreCombo > scoreA + scoreB || scoreCombo > 1000) {
graph.addVariantForSpan(prev, qw, joinedWord);
}
else if (StringUtils.isAlpha(prev.word()) && StringUtils.isNumeric(qw.word())) { // join e.g. trs 80 to trs80 and trs-80
graph.addVariantForSpan(prev, qw, prev.word() + qw.word());
graph.addVariantForSpan(prev, qw, prev.word() + "-" + qw.word());
}
}
prev = qw;

View File

@@ -213,6 +213,18 @@ public class QueryFactoryTest {
System.out.println(subquery);
}
@Test
public void testContractionWordNum() {
var subquery = parseAndGetSpecs("glove 80");
Assertions.assertTrue(subquery.query.compiledQuery.contains(" glove "));
Assertions.assertTrue(subquery.query.compiledQuery.contains(" 80 "));
Assertions.assertTrue(subquery.query.compiledQuery.contains(" glove-80 "));
Assertions.assertTrue(subquery.query.compiledQuery.contains(" glove80 "));
}
@Test
public void testCplusPlus() {
var subquery = parseAndGetSpecs("std::vector::push_back vector");

View File

@@ -11,7 +11,6 @@ import nu.marginalia.slop.column.primitive.IntColumn;
import nu.marginalia.slop.column.primitive.LongColumn;
import nu.marginalia.slop.column.string.EnumColumn;
import nu.marginalia.slop.column.string.StringColumn;
import nu.marginalia.slop.column.string.TxtStringColumn;
import nu.marginalia.slop.desc.StorageType;
import org.jetbrains.annotations.Nullable;
@@ -182,8 +181,8 @@ public record SlopDocumentRecord(
}
// Basic information
private static final TxtStringColumn domainsColumn = new TxtStringColumn("domain", StandardCharsets.UTF_8, StorageType.GZIP);
private static final TxtStringColumn urlsColumn = new TxtStringColumn("url", StandardCharsets.UTF_8, StorageType.GZIP);
private static final StringColumn domainsColumn = new StringColumn("domain", StandardCharsets.UTF_8, StorageType.GZIP);
private static final StringColumn urlsColumn = new StringColumn("url", StandardCharsets.UTF_8, StorageType.GZIP);
private static final VarintColumn ordinalsColumn = new VarintColumn("ordinal", StorageType.PLAIN);
private static final EnumColumn statesColumn = new EnumColumn("state", StandardCharsets.US_ASCII, StorageType.PLAIN);
private static final StringColumn stateReasonsColumn = new StringColumn("stateReason", StandardCharsets.US_ASCII, StorageType.GZIP);
@@ -211,7 +210,7 @@ public record SlopDocumentRecord(
private static final VarintCodedSequenceArrayColumn spansColumn = new VarintCodedSequenceArrayColumn("spans", StorageType.ZSTD);
public static class KeywordsProjectionReader extends SlopTable {
private final TxtStringColumn.Reader domainsReader;
private final StringColumn.Reader domainsReader;
private final VarintColumn.Reader ordinalsReader;
private final IntColumn.Reader htmlFeaturesReader;
private final LongColumn.Reader domainMetadataReader;
@@ -275,8 +274,8 @@ public record SlopDocumentRecord(
}
public static class MetadataReader extends SlopTable {
private final TxtStringColumn.Reader domainsReader;
private final TxtStringColumn.Reader urlsReader;
private final StringColumn.Reader domainsReader;
private final StringColumn.Reader urlsReader;
private final VarintColumn.Reader ordinalsReader;
private final StringColumn.Reader titlesReader;
private final StringColumn.Reader descriptionsReader;
@@ -332,8 +331,8 @@ public record SlopDocumentRecord(
}
public static class Writer extends SlopTable {
private final TxtStringColumn.Writer domainsWriter;
private final TxtStringColumn.Writer urlsWriter;
private final StringColumn.Writer domainsWriter;
private final StringColumn.Writer urlsWriter;
private final VarintColumn.Writer ordinalsWriter;
private final EnumColumn.Writer statesWriter;
private final StringColumn.Writer stateReasonsWriter;

View File

@@ -381,8 +381,10 @@ public class CrawlerRetreiver implements AutoCloseable {
if (docOpt.isPresent()) {
var doc = docOpt.get();
crawlFrontier.enqueueLinksFromDocument(top, doc);
crawlFrontier.addVisited(new EdgeUrl(ok.uri()));
var responseUrl = new EdgeUrl(ok.uri());
crawlFrontier.enqueueLinksFromDocument(responseUrl, doc);
crawlFrontier.addVisited(responseUrl);
}
}
else if (fetchedDoc instanceof HttpFetchResult.Result304Raw && reference.doc() != null) {

View File

@@ -12,8 +12,7 @@ import java.io.InputStream;
import java.net.InetAddress;
import java.net.URI;
import java.net.http.HttpHeaders;
import java.util.Arrays;
import java.util.Optional;
import java.util.*;
/* FIXME: This interface has a very unfortunate name that is not very descriptive.
*/
@@ -65,7 +64,21 @@ public sealed interface HttpFetchResult {
) implements HttpFetchResult {
public ResultOk(URI uri, int status, MessageHeaders headers, String ipAddress, byte[] bytes, int bytesStart, int length) {
this(uri, status, HttpHeaders.of(headers.map(), (k,v) -> true), ipAddress, bytes, bytesStart, length);
this(uri, status, convertHeaders(headers), ipAddress, bytes, bytesStart, length);
}
private static HttpHeaders convertHeaders(MessageHeaders messageHeaders) {
Map<String, List<String>> inputMap = messageHeaders.map();
Map<String, List<String>> filteredMap = new HashMap<>(Math.max(4, inputMap.size()));
inputMap.forEach((k, v) -> {
if (k.isBlank()) return;
if (!Character.isAlphabetic(k.charAt(0))) return;
filteredMap.put(k, v);
});
return HttpHeaders.of(filteredMap, (k,v) -> true);
}
public boolean isOk() {

View File

@@ -3,8 +3,10 @@ package nu.marginalia.search;
import com.google.inject.Inject;
import io.jooby.Context;
import io.jooby.Jooby;
import io.jooby.StatusCode;
import io.prometheus.client.Counter;
import io.prometheus.client.Histogram;
import nu.marginalia.WebsiteUrl;
import nu.marginalia.search.svc.*;
import nu.marginalia.service.discovery.property.ServicePartition;
import nu.marginalia.service.server.BaseServiceParams;
@@ -16,6 +18,7 @@ import java.util.List;
public class SearchService extends JoobyService {
private final WebsiteUrl websiteUrl;
private final SearchSiteSubscriptionService siteSubscriptionService;
private static final Logger logger = LoggerFactory.getLogger(SearchService.class);
@@ -33,6 +36,7 @@ public class SearchService extends JoobyService {
@Inject
public SearchService(BaseServiceParams params,
WebsiteUrl websiteUrl,
SearchFrontPageService frontPageService,
SearchAddToCrawlQueueService addToCrawlQueueService,
SearchSiteSubscriptionService siteSubscriptionService,
@@ -51,6 +55,7 @@ public class SearchService extends JoobyService {
new SearchAddToCrawlQueueService_(addToCrawlQueueService),
new SearchBrowseService_(searchBrowseService)
));
this.websiteUrl = websiteUrl;
this.siteSubscriptionService = siteSubscriptionService;
}
@@ -62,6 +67,10 @@ public class SearchService extends JoobyService {
final String startTimeAttribute = "start-time";
jooby.get("/export-opml", siteSubscriptionService::exportOpml);
jooby.get("/site/https://*", this::handleSiteUrlRedirect);
jooby.get("/site/http://*", this::handleSiteUrlRedirect);
jooby.before((Context ctx) -> {
ctx.setAttribute(startTimeAttribute, System.nanoTime());
});
@@ -80,5 +89,19 @@ public class SearchService extends JoobyService {
});
}
/** Redirect handler for the case when the user passes
* an url like /site/https://example.com/, in this
* scenario we want to extract the domain name and redirect
* to /site/example.com/
*/
private Context handleSiteUrlRedirect(Context ctx) {
var pv = ctx.path("*").value();
int trailSlash = pv.indexOf('/');
if (trailSlash > 0) {
pv = pv.substring(0, trailSlash);
}
ctx.sendRedirect(StatusCode.TEMPORARY_REDIRECT, websiteUrl.withPath("site/" + pv));
return ctx;
}
}

View File

@@ -86,8 +86,10 @@ public record SearchParameters(WebsiteUrl url,
public String renderUrl() {
StringBuilder pathBuilder = new StringBuilder("/search?");
pathBuilder.append("query=").append(URLEncoder.encode(query, StandardCharsets.UTF_8));
if (query != null) {
pathBuilder.append("query=").append(URLEncoder.encode(query, StandardCharsets.UTF_8));
}
if (profile != SearchProfile.NO_FILTER) {
pathBuilder.append("&profile=").append(URLEncoder.encode(profile.filterId, StandardCharsets.UTF_8));
}

View File

@@ -67,6 +67,10 @@ public class DecoratedSearchResults {
return focusDomainId >= 0;
}
public boolean isEmpty() {
return results.isEmpty();
}
public SearchFilters getFilters() {
return filters;
}

View File

@@ -56,7 +56,9 @@ public class SearchQueryService {
}
catch (Exception ex) {
logger.error("Error", ex);
return errorPageService.serveError(SearchParameters.defaultsForQuery(websiteUrl, query, page));
return errorPageService.serveError(
SearchParameters.defaultsForQuery(websiteUrl, query, Objects.requireNonNullElse(page, 1))
);
}
}

View File

@@ -44,6 +44,11 @@
<div class="grow"></div>
<a href="${results.getParams().renderUrlWithoutSiteFocus()}" class="fa fa-remove"></a>
</div>
@elseif (results.isEmpty())
<div class="border dark:border-gray-600 rounded flex space-x-4 bg-white dark:bg-gray-800 text-gray-600 dark:text-gray-100 text-sm p-4 items-center">
No search results found. Try different search terms, or spelling variations. The search engine currently
only supports queries in the English language.
</div>
@endif
<div class="space-y-4 sm:space-y-6">

View File

@@ -23,7 +23,12 @@ apply from: "$rootProject.projectDir/srcsets.gradle"
apply from: "$rootProject.projectDir/docker.gradle"
dependencies {
implementation project(':third-party:symspell')
implementation project(':code:common:db')
implementation project(':code:common:model')
implementation project(':code:common:service')
implementation project(':code:common:config')
implementation project(':code:functions:live-capture')
implementation project(':code:functions:live-capture:api')
@@ -32,20 +37,16 @@ dependencies {
implementation project(':code:functions:domain-info')
implementation project(':code:functions:domain-info:api')
implementation project(':code:common:config')
implementation project(':code:common:service')
implementation project(':code:common:model')
implementation project(':code:common:db')
implementation project(':code:features-search:screenshots')
implementation project(':code:libraries:geo-ip')
implementation project(':code:libraries:language-processing')
implementation project(':code:libraries:term-frequency-dict')
implementation libs.bundles.slf4j
implementation project(':third-party:symspell')
implementation libs.bundles.slf4j
implementation libs.prometheus
implementation libs.commons.io
implementation libs.guava
libs.bundles.grpc.get().each {
implementation dependencies.create(it) {
@@ -59,9 +60,7 @@ dependencies {
implementation dependencies.create(libs.guice.get()) {
exclude group: 'com.google.guava'
}
implementation dependencies.create(libs.spark.get()) {
exclude group: 'org.eclipse.jetty'
}
implementation libs.bundles.jooby
implementation libs.bundles.jetty
implementation libs.opencsv
implementation libs.trove

View File

@@ -3,6 +3,8 @@ package nu.marginalia.assistant;
import com.google.inject.Guice;
import com.google.inject.Inject;
import com.google.inject.Injector;
import io.jooby.ExecutionMode;
import io.jooby.Jooby;
import nu.marginalia.livecapture.LivecaptureModule;
import nu.marginalia.service.MainClass;
import nu.marginalia.service.ServiceId;
@@ -38,8 +40,17 @@ public class AssistantMain extends MainClass {
var configuration = injector.getInstance(ServiceConfiguration.class);
orchestrateBoot(registry, configuration);
injector.getInstance(AssistantMain.class);
var main = injector.getInstance(AssistantMain.class);
injector.getInstance(Initialization.class).setReady();
Jooby.runApp(new String[] { "application.env=prod" }, ExecutionMode.WORKER, () -> new Jooby() {
{
main.start(this);
}
});
}
public void start(Jooby jooby) {
service.startJooby(jooby);
}
}

View File

@@ -2,27 +2,27 @@ package nu.marginalia.assistant;
import com.google.gson.Gson;
import com.google.inject.Inject;
import io.jooby.Context;
import io.jooby.Jooby;
import nu.marginalia.assistant.suggest.Suggestions;
import nu.marginalia.functions.domains.DomainInfoGrpcService;
import nu.marginalia.functions.math.MathGrpcService;
import nu.marginalia.livecapture.LiveCaptureGrpcService;
import nu.marginalia.model.gson.GsonFactory;
import nu.marginalia.rss.svc.FeedsGrpcService;
import nu.marginalia.screenshot.ScreenshotService;
import nu.marginalia.service.discovery.property.ServicePartition;
import nu.marginalia.service.server.BaseServiceParams;
import nu.marginalia.service.server.SparkService;
import nu.marginalia.service.server.JoobyService;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import spark.Request;
import spark.Response;
import spark.Spark;
import java.util.List;
public class AssistantService extends SparkService {
public class AssistantService extends JoobyService {
private final Logger logger = LoggerFactory.getLogger(getClass());
private final Gson gson = GsonFactory.get();
@org.jetbrains.annotations.NotNull
private final ScreenshotService screenshotService;
private final Suggestions suggestions;
@Inject
@@ -39,30 +39,30 @@ public class AssistantService extends SparkService {
List.of(domainInfoGrpcService,
mathGrpcService,
liveCaptureGrpcService,
feedsGrpcService));
feedsGrpcService),
List.of());
this.screenshotService = screenshotService;
this.suggestions = suggestions;
Spark.staticFiles.expireTime(600);
Spark.get("/screenshot/:id", screenshotService::serveScreenshotRequest);
Spark.get("/suggest/", this::getSuggestions, this::convertToJson);
Spark.awaitInitialization();
}
private Object getSuggestions(Request request, Response response) {
response.type("application/json");
var param = request.queryParams("partial");
if (param == null) {
public void startJooby(Jooby jooby) {
super.startJooby(jooby);
jooby.get("/suggest/", this::getSuggestions);
jooby.get("/screenshot/{id}", screenshotService::serveScreenshotRequest);
}
private String getSuggestions(Context context) {
context.setResponseType("application/json");
var param = context.query("partial");
if (param.isMissing()) {
logger.warn("Bad parameter, partial is null");
Spark.halt(500);
context.setResponseCode(500);
return "{}";
}
return suggestions.getSuggestions(10, param);
}
private String convertToJson(Object o) {
return gson.toJson(o);
return gson.toJson(suggestions.getSuggestions(10, param.value()));
}
}

View File

@@ -0,0 +1,118 @@
package nu.marginalia.assistant;
import com.google.common.base.Strings;
import com.google.inject.Inject;
import com.zaxxer.hikari.HikariDataSource;
import io.jooby.Context;
import nu.marginalia.db.DbDomainQueries;
import org.apache.commons.io.IOUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.sql.SQLException;
public class ScreenshotService {
private final DbDomainQueries domainQueries;
private final HikariDataSource dataSource;
private final Logger logger = LoggerFactory.getLogger(getClass());
@Inject
public ScreenshotService(DbDomainQueries dbDomainQueries, HikariDataSource dataSource) {
this.domainQueries = dbDomainQueries;
this.dataSource = dataSource;
}
public boolean hasScreenshot(int domainId) {
try (var conn = dataSource.getConnection();
var ps = conn.prepareStatement("""
SELECT TRUE
FROM DATA_DOMAIN_SCREENSHOT
INNER JOIN EC_DOMAIN ON EC_DOMAIN.DOMAIN_NAME=DATA_DOMAIN_SCREENSHOT.DOMAIN_NAME
WHERE EC_DOMAIN.ID=?
""")) {
ps.setInt(1, domainId);
var rs = ps.executeQuery();
if (rs.next()) {
return rs.getBoolean(1);
}
}
catch (SQLException ex) {
logger.warn("SQL error", ex);
}
return false;
}
public Object serveScreenshotRequest(Context context) {
if (Strings.isNullOrEmpty(context.path("id").value(""))) {
context.setResponseCode(404);
return "";
}
int id = context.path("id").intValue();
try (var conn = dataSource.getConnection();
var ps = conn.prepareStatement("""
SELECT CONTENT_TYPE, DATA
FROM DATA_DOMAIN_SCREENSHOT
INNER JOIN EC_DOMAIN ON EC_DOMAIN.DOMAIN_NAME=DATA_DOMAIN_SCREENSHOT.DOMAIN_NAME
WHERE EC_DOMAIN.ID=?
""")) {
ps.setInt(1, id);
var rsp = ps.executeQuery();
if (rsp.next()) {
context.setResponseType(rsp.getString(1));
context.setResponseCode(200);
context.setResponseHeader("Cache-control", "public,max-age=3600");
try (var rs = context.responseStream()) {
IOUtils.copy(rsp.getBlob(2).getBinaryStream(), rs);
}
return "";
}
}
catch (IOException ex) {
logger.warn("IO error", ex);
}
catch (SQLException ex) {
logger.warn("SQL error", ex);
}
context.setResponseType("image/svg+xml");
var name = domainQueries.getDomain(id).map(Object::toString)
.orElse("[Screenshot Not Yet Captured]");
return """
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<svg
xmlns="http://www.w3.org/2000/svg"
width="640px"
height="480px"
viewBox="0 0 640 480"
version="1.1">
<g>
<rect
style="fill:#808080"
id="rect288"
width="595.41992"
height="430.01825"
x="23.034981"
y="27.850344" />
<text
xml:space="preserve"
style="font-size:100px;fill:#909090;font-family:sans-serif;"
x="20"
y="120">Placeholder</text>
<text
xml:space="preserve"
style="font-size:32px;fill:#000000;font-family:monospace;"
x="320" y="240" dominant-baseline="middle" text-anchor="middle">%s</text>
</g>
</svg>
""".formatted(name);
}
}

View File

@@ -160,12 +160,12 @@ dependencyResolutionManagement {
library('prometheus-server', 'io.prometheus', 'simpleclient_httpserver').version('0.16.0')
library('prometheus-hotspot', 'io.prometheus', 'simpleclient_hotspot').version('0.16.0')
library('slf4j.api', 'org.slf4j', 'slf4j-api').version('1.7.36')
library('slf4j.api', 'org.slf4j', 'slf4j-api').version('2.0.3')
library('slf4j.jdk14', 'org.slf4j', 'slf4j-jdk14').version('2.0.3')
library('log4j.api', 'org.apache.logging.log4j', 'log4j-api').version('2.17.2')
library('log4j.core', 'org.apache.logging.log4j', 'log4j-core').version('2.17.2')
library('log4j.slf4j', 'org.apache.logging.log4j', 'log4j-slf4j-impl').version('2.17.2')
library('log4j.api', 'org.apache.logging.log4j', 'log4j-api').version('2.24.3')
library('log4j.core', 'org.apache.logging.log4j', 'log4j-core').version('2.24.3')
library('log4j.slf4j', 'org.apache.logging.log4j', 'log4j-slf4j2-impl').version('2.24.3')
library('notnull','org.jetbrains','annotations').version('24.0.0')
@@ -239,6 +239,7 @@ dependencyResolutionManagement {
library('jooby-jte','io.jooby','jooby-jte').version(joobyVersion)
library('jooby-apt','io.jooby','jooby-apt').version(joobyVersion)
library('wiremock', 'org.wiremock','wiremock').version('3.11.0')
library('jte','gg.jte','jte').version('3.1.15')
bundle('jetty', ['jetty-server', 'jetty-util', 'jetty-servlet'])