1
1
mirror of https://github.com/MarginaliaSearch/MarginaliaSearch.git synced 2025-10-06 17:32:39 +02:00

Compare commits

...

4 Commits

Author SHA1 Message Date
Viktor Lofgren
b1130d7a04 (domainstatedb) Allow creation of disconnected db
This is required for executor services that do not have crawl data to still be able to initialize.
2025-03-21 14:59:36 +01:00
Viktor Lofgren
8364bcdc97 (favicon) Add favicons to the matchograms 2025-03-21 14:30:40 +01:00
Viktor Lofgren
626cab5fab (favicon) Add favicon to site overview 2025-03-21 14:15:23 +01:00
Viktor Lofgren
cfd4712191 (favicon) Add capability for fetching favicons 2025-03-21 13:38:58 +01:00
16 changed files with 336 additions and 17 deletions

View File

@@ -22,6 +22,7 @@ public class DbDomainQueries {
private static final Logger logger = LoggerFactory.getLogger(DbDomainQueries.class);
private final Cache<EdgeDomain, Integer> domainIdCache = CacheBuilder.newBuilder().maximumSize(10_000).build();
private final Cache<EdgeDomain, DomainIdWithNode> domainWithNodeCache = CacheBuilder.newBuilder().maximumSize(10_000).build();
private final Cache<Integer, EdgeDomain> domainNameCache = CacheBuilder.newBuilder().maximumSize(10_000).build();
private final Cache<String, List<DomainWithNode>> siblingsCache = CacheBuilder.newBuilder().maximumSize(10_000).build();
@@ -59,6 +60,34 @@ public class DbDomainQueries {
}
}
public DomainIdWithNode getDomainIdWithNode(EdgeDomain domain) throws NoSuchElementException {
try {
return domainWithNodeCache.get(domain, () -> {
try (var connection = dataSource.getConnection();
var stmt = connection.prepareStatement("SELECT ID, NODE_AFFINITY FROM EC_DOMAIN WHERE DOMAIN_NAME=?")) {
stmt.setString(1, domain.toString());
var rsp = stmt.executeQuery();
if (rsp.next()) {
return new DomainIdWithNode(rsp.getInt(1), rsp.getInt(2));
}
}
catch (SQLException ex) {
throw new RuntimeException(ex);
}
throw new NoSuchElementException();
});
}
catch (UncheckedExecutionException ex) {
throw new NoSuchElementException();
}
catch (ExecutionException ex) {
throw new RuntimeException(ex.getCause());
}
}
public OptionalInt tryGetDomainId(EdgeDomain domain) {
Integer maybeId = domainIdCache.getIfPresent(domain);
@@ -145,4 +174,6 @@ public class DbDomainQueries {
return nodeAffinity > 0;
}
}
public record DomainIdWithNode (int domainId, int nodeAffinity) { }
}

View File

@@ -0,0 +1,47 @@
plugins {
id 'java'
id "com.google.protobuf" version "0.9.4"
id 'jvm-test-suite'
}
java {
toolchain {
languageVersion.set(JavaLanguageVersion.of(rootProject.ext.jvmVersion))
}
}
jar.archiveBaseName = 'favicon-api'
apply from: "$rootProject.projectDir/protobuf.gradle"
apply from: "$rootProject.projectDir/srcsets.gradle"
dependencies {
implementation project(':code:common:model')
implementation project(':code:common:config')
implementation project(':code:common:service')
implementation libs.bundles.slf4j
implementation libs.prometheus
implementation libs.notnull
implementation libs.guava
implementation dependencies.create(libs.guice.get()) {
exclude group: 'com.google.guava'
}
implementation libs.gson
implementation libs.bundles.protobuf
implementation libs.guava
libs.bundles.grpc.get().each {
implementation dependencies.create(it) {
exclude group: 'com.google.guava'
}
}
testImplementation libs.bundles.slf4j.test
testImplementation libs.bundles.junit
testImplementation libs.mockito
}

View File

@@ -0,0 +1,39 @@
package nu.marginalia.api.favicon;
import com.google.inject.Inject;
import nu.marginalia.service.client.GrpcChannelPoolFactory;
import nu.marginalia.service.client.GrpcMultiNodeChannelPool;
import nu.marginalia.service.discovery.property.ServiceKey;
import nu.marginalia.service.discovery.property.ServicePartition;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.Optional;
public class FaviconClient {
private static final Logger logger = LoggerFactory.getLogger(FaviconClient.class);
private final GrpcMultiNodeChannelPool<FaviconAPIGrpc.FaviconAPIBlockingStub> channelPool;
@Inject
public FaviconClient(GrpcChannelPoolFactory factory) {
this.channelPool = factory.createMulti(
ServiceKey.forGrpcApi(FaviconAPIGrpc.class, ServicePartition.multi()),
FaviconAPIGrpc::newBlockingStub);
}
public record FaviconData(byte[] bytes, String contentType) {}
public Optional<FaviconData> getFavicon(String domain, int node) {
RpcFaviconResponse rsp = channelPool.call(FaviconAPIGrpc.FaviconAPIBlockingStub::getFavicon)
.forNode(node)
.run(RpcFaviconRequest.newBuilder().setDomain(domain).build());
if (rsp.getData().isEmpty())
return Optional.empty();
return Optional.of(new FaviconData(rsp.getData().toByteArray(), rsp.getContentType()));
}
}

View File

@@ -0,0 +1,20 @@
syntax="proto3";
package marginalia.api.favicon;
option java_package="nu.marginalia.api.favicon";
option java_multiple_files=true;
service FaviconAPI {
/** Fetches information about a domain. */
rpc getFavicon(RpcFaviconRequest) returns (RpcFaviconResponse) {}
}
message RpcFaviconRequest {
string domain = 1;
}
message RpcFaviconResponse {
string domain = 1;
bytes data = 2;
string contentType = 3;
}

View File

@@ -0,0 +1,49 @@
plugins {
id 'java'
id 'application'
id 'jvm-test-suite'
}
java {
toolchain {
languageVersion.set(JavaLanguageVersion.of(rootProject.ext.jvmVersion))
}
}
apply from: "$rootProject.projectDir/srcsets.gradle"
dependencies {
implementation project(':code:common:config')
implementation project(':code:common:service')
implementation project(':code:common:model')
implementation project(':code:common:db')
implementation project(':code:functions:favicon:api')
implementation project(':code:processes:crawling-process')
implementation libs.bundles.slf4j
implementation libs.prometheus
implementation libs.guava
libs.bundles.grpc.get().each {
implementation dependencies.create(it) {
exclude group: 'com.google.guava'
}
}
implementation libs.notnull
implementation libs.guava
implementation dependencies.create(libs.guice.get()) {
exclude group: 'com.google.guava'
}
implementation dependencies.create(libs.spark.get()) {
exclude group: 'org.eclipse.jetty'
}
testImplementation libs.bundles.slf4j.test
testImplementation libs.bundles.junit
testImplementation libs.mockito
}

View File

@@ -0,0 +1,48 @@
package nu.marginalia.functions.favicon;
import com.google.inject.Inject;
import com.google.inject.Singleton;
import com.google.protobuf.ByteString;
import io.grpc.stub.StreamObserver;
import nu.marginalia.api.favicon.FaviconAPIGrpc;
import nu.marginalia.api.favicon.RpcFaviconRequest;
import nu.marginalia.api.favicon.RpcFaviconResponse;
import nu.marginalia.crawl.DomainStateDb;
import nu.marginalia.service.server.DiscoverableService;
import java.util.Optional;
@Singleton
public class FaviconGrpcService extends FaviconAPIGrpc.FaviconAPIImplBase implements DiscoverableService {
private final DomainStateDb domainStateDb;
@Inject
public FaviconGrpcService(DomainStateDb domainStateDb) {
this.domainStateDb = domainStateDb;
}
public boolean shouldRegisterService() {
return domainStateDb.isAvailable();
}
@Override
public void getFavicon(RpcFaviconRequest request, StreamObserver<RpcFaviconResponse> responseObserver) {
Optional<DomainStateDb.FaviconRecord> icon = domainStateDb.getIcon(request.getDomain());
RpcFaviconResponse response;
if (icon.isEmpty()) {
response = RpcFaviconResponse.newBuilder().build();
}
else {
var iconRecord = icon.get();
response = RpcFaviconResponse.newBuilder()
.setContentType(iconRecord.contentType())
.setDomain(request.getDomain())
.setData(ByteString.copyFrom(iconRecord.imageData()))
.build();
}
responseObserver.onNext(response);
responseObserver.onCompleted();
}
}

View File

@@ -1,5 +1,8 @@
package nu.marginalia.crawl;
import com.google.inject.Inject;
import nu.marginalia.storage.FileStorageService;
import nu.marginalia.storage.model.FileStorageType;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -63,7 +66,29 @@ public class DomainStateDb implements AutoCloseable {
public record FaviconRecord(String contentType, byte[] imageData) {}
public DomainStateDb(Path filename) throws SQLException {
@Inject
public DomainStateDb(FileStorageService fileStorageService) throws SQLException {
this(findFilename(fileStorageService));
}
private static Path findFilename(FileStorageService fileStorageService) throws SQLException {
var fsId = fileStorageService.getOnlyActiveFileStorage(FileStorageType.CRAWL_DATA);
if (fsId.isPresent()) {
var fs = fileStorageService.getStorage(fsId.get());
return fs.asPath().resolve("domainstate.db");
}
else {
return null;
}
}
public DomainStateDb(@Nullable Path filename) throws SQLException {
if (null == filename) {
connection = null;
return;
}
String sqliteDbString = "jdbc:sqlite:" + filename.toString();
connection = DriverManager.getConnection(sqliteDbString);
@@ -90,11 +115,18 @@ public class DomainStateDb implements AutoCloseable {
@Override
public void close() throws SQLException {
connection.close();
if (connection != null) {
connection.close();
}
}
public boolean isAvailable() {
return connection != null;
}
public void saveIcon(String domain, FaviconRecord faviconRecord) {
if (connection == null) throw new IllegalStateException("No connection to domainstate db");
try (var stmt = connection.prepareStatement("""
INSERT OR REPLACE INTO favicon (domain, contentType, icon)
VALUES(?, ?, ?)
@@ -110,6 +142,9 @@ public class DomainStateDb implements AutoCloseable {
}
public Optional<FaviconRecord> getIcon(String domain) {
if (connection == null)
return Optional.empty();
try (var stmt = connection.prepareStatement("SELECT contentType, icon FROM favicon WHERE DOMAIN = ?")) {
stmt.setString(1, domain);
var rs = stmt.executeQuery();
@@ -130,6 +165,8 @@ public class DomainStateDb implements AutoCloseable {
}
public void save(SummaryRecord record) {
if (connection == null) throw new IllegalStateException("No connection to domainstate db");
try (var stmt = connection.prepareStatement("""
INSERT OR REPLACE INTO summary (domain, lastUpdatedEpochMs, state, stateDesc, feedUrl)
VALUES (?, ?, ?, ?, ?)
@@ -146,6 +183,9 @@ public class DomainStateDb implements AutoCloseable {
}
public Optional<SummaryRecord> get(String domainName) {
if (connection == null)
return Optional.empty();
try (var stmt = connection.prepareStatement("""
SELECT domain, lastUpdatedEpochMs, state, stateDesc, feedUrl
FROM summary

View File

@@ -41,6 +41,7 @@ dependencies {
implementation project(':code:functions:live-capture:api')
implementation project(':code:functions:math:api')
implementation project(':code:functions:favicon:api')
implementation project(':code:functions:domain-info:api')
implementation project(':code:functions:search-query:api')

View File

@@ -3,10 +3,14 @@ package nu.marginalia.search;
import com.google.inject.Inject;
import io.jooby.Context;
import io.jooby.Jooby;
import io.jooby.MediaType;
import io.jooby.StatusCode;
import io.prometheus.client.Counter;
import io.prometheus.client.Histogram;
import nu.marginalia.WebsiteUrl;
import nu.marginalia.api.favicon.FaviconClient;
import nu.marginalia.db.DbDomainQueries;
import nu.marginalia.model.EdgeDomain;
import nu.marginalia.search.svc.*;
import nu.marginalia.service.discovery.property.ServicePartition;
import nu.marginalia.service.server.BaseServiceParams;
@@ -15,11 +19,14 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.List;
import java.util.NoSuchElementException;
public class SearchService extends JoobyService {
private final WebsiteUrl websiteUrl;
private final SearchSiteSubscriptionService siteSubscriptionService;
private final FaviconClient faviconClient;
private final DbDomainQueries domainQueries;
private static final Logger logger = LoggerFactory.getLogger(SearchService.class);
private static final Histogram wmsa_search_service_request_time = Histogram.build()
@@ -43,6 +50,8 @@ public class SearchService extends JoobyService {
SearchSiteInfoService siteInfoService,
SearchCrosstalkService crosstalkService,
SearchBrowseService searchBrowseService,
FaviconClient faviconClient,
DbDomainQueries domainQueries,
SearchQueryService searchQueryService)
throws Exception {
super(params,
@@ -58,6 +67,8 @@ public class SearchService extends JoobyService {
this.websiteUrl = websiteUrl;
this.siteSubscriptionService = siteSubscriptionService;
this.faviconClient = faviconClient;
this.domainQueries = domainQueries;
}
@Override
@@ -71,6 +82,31 @@ public class SearchService extends JoobyService {
jooby.get("/site/https://*", this::handleSiteUrlRedirect);
jooby.get("/site/http://*", this::handleSiteUrlRedirect);
jooby.get("/site/{domain}/favicon", ctx -> {
String domain = ctx.path("domain").value();
logger.info("Finding icon for domain {}", domain);
domainQueries.getDomainId(new EdgeDomain(domain));
try {
DbDomainQueries.DomainIdWithNode domainIdWithNode = domainQueries.getDomainIdWithNode(new EdgeDomain(domain));
var faviconMaybe = faviconClient.getFavicon(domain, domainIdWithNode.nodeAffinity());
if (faviconMaybe.isEmpty()) {
ctx.setResponseCode(404);
return "";
} else {
var favicon = faviconMaybe.get();
ctx.responseStream(MediaType.valueOf(favicon.contentType()), consumer -> {
consumer.write(favicon.bytes());
});
}
}
catch (NoSuchElementException ex) {
ctx.setResponseCode(404);
}
return "";
});
jooby.before((Context ctx) -> {
ctx.setAttribute(startTimeAttribute, System.nanoTime());
});

View File

@@ -26,15 +26,15 @@
It operates a bit like a clock, starting at the top and working its way around clockwise.</p>
<div class="flex gap-4 place-items-middle">
@template.serp.part.matchogram(mask = 90)
@template.serp.part.matchogram(mask = 90, domain = "example.com")
<div>This is by the beginning</div>
</div>
<div class="flex gap-4 place-items-middle">
@template.serp.part.matchogram(mask = 90L<<26)
@template.serp.part.matchogram(mask = 90L<<26, domain = "example.com")
<div>This is in the middle</div>
</div>
<div class="flex gap-4 place-items-middle">
@template.serp.part.matchogram(mask = 5L<<48)
@template.serp.part.matchogram(mask = 5L<<48, domain = "example.com")
<div>This is toward the end</div>
</div>

View File

@@ -1,11 +1,13 @@
@import java.util.stream.IntStream
@param long mask
@param String domain
<svg width="40" height="40">
<svg width="40" height="40"
style="background-image: url('/site/${domain}/favicon'); background-repeat: no-repeat; background-size: 16px 16px; background-position: center; ">
<circle
cx="18"
cy="18"
cx="20"
cy="20"
r="16"
fill="none"
stroke="#eee"
@@ -13,10 +15,10 @@
/>
@for (int bit : IntStream.range(0, 56).filter(bit -> (mask & (1L << bit)) != 0).toArray())
<line
x1="${18 + 15*Math.sin(2 * Math.PI * bit / 56.)}"
y1="${18 - 15*Math.cos(2 * Math.PI * bit / 56.)}"
x2="${18 + 17*Math.sin(2 * Math.PI * bit / 56.)}"
y2="${18 - 17*Math.cos(2 * Math.PI * bit / 56.)}"
x1="${20 + 15*Math.sin(2 * Math.PI * bit / 56.)}"
y1="${20 - 15*Math.cos(2 * Math.PI * bit / 56.)}"
x2="${20 + 17*Math.sin(2 * Math.PI * bit / 56.)}"
y2="${20 - 17*Math.cos(2 * Math.PI * bit / 56.)}"
stroke="#444"
stroke-width="2"
/>

View File

@@ -12,7 +12,7 @@
<div class="flex flex-col grow" >
<div class="flex flex-row space-x-2 place-items-center">
<div class="flex-0" title="Match density">
@template.serp.part.matchogram(mask = result.first.positionsMask)
@template.serp.part.matchogram(mask = result.first.positionsMask, domain=result.getFirst().url.domain.toString())
</div>
<div class="flex grow justify-between items-start">
<div class="flex-1">

View File

@@ -9,8 +9,8 @@
<div class="flex-1 p-4 space-y-4 mx-auto w-full md:w-auto">
<div class="flex border dark:border-gray-600 rounded bg-white dark:bg-gray-800 flex-col space-y-4 pb-4 overflow-hidden md:max-w-lg" >
<div class="flex place-items-baseline space-x-2 p-2 text-md border-b dark:border-gray-600 bg-margeblue text-white">
<i class="fa fa-globe"></i>
<div class="flex place-items-center space-x-2 p-2 text-md border-b dark:border-gray-600 bg-margeblue text-white">
<img src="/site/${siteInfo.domain()}/favicon" style="width: 16px; height: 16px; vertical-align: center">
<span>${siteInfo.domain()}</span>
<div class="grow">
</div>

View File

@@ -42,6 +42,8 @@ dependencies {
implementation project(':code:libraries:message-queue')
implementation project(':code:functions:link-graph:api')
implementation project(':code:functions:favicon')
implementation project(':code:functions:favicon:api')
implementation project(':code:processes:crawling-process:model')
implementation project(':code:processes:crawling-process:model')

View File

@@ -2,6 +2,7 @@ package nu.marginalia.executor;
import com.google.inject.Inject;
import nu.marginalia.execution.*;
import nu.marginalia.functions.favicon.FaviconGrpcService;
import nu.marginalia.service.discovery.property.ServicePartition;
import nu.marginalia.service.server.BaseServiceParams;
import nu.marginalia.service.server.SparkService;
@@ -24,6 +25,7 @@ public class ExecutorSvc extends SparkService {
ExecutorCrawlGrpcService executorCrawlGrpcService,
ExecutorSideloadGrpcService executorSideloadGrpcService,
ExecutorExportGrpcService executorExportGrpcService,
FaviconGrpcService faviconGrpcService,
ExecutionInit executionInit,
ExecutorFileTransferService fileTransferService) throws Exception {
super(params,
@@ -31,7 +33,8 @@ public class ExecutorSvc extends SparkService {
List.of(executorGrpcService,
executorCrawlGrpcService,
executorSideloadGrpcService,
executorExportGrpcService)
executorExportGrpcService,
faviconGrpcService)
);
this.executionInit = executionInit;

View File

@@ -16,7 +16,8 @@ include 'code:services-application:status-service'
include 'code:functions:math'
include 'code:functions:math:api'
include 'code:functions:favicon'
include 'code:functions:favicon:api'
include 'code:functions:domain-info'
include 'code:functions:domain-info:api'