mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-10-06 07:32:38 +02:00
Compare commits
6 Commits
deploy-009
...
deploy-010
Author | SHA1 | Date | |
---|---|---|---|
|
8364bcdc97 | ||
|
626cab5fab | ||
|
cfd4712191 | ||
|
9f18ced73d | ||
|
18e91269ab | ||
|
e315ca5758 |
@@ -22,6 +22,7 @@ public class DbDomainQueries {
|
|||||||
private static final Logger logger = LoggerFactory.getLogger(DbDomainQueries.class);
|
private static final Logger logger = LoggerFactory.getLogger(DbDomainQueries.class);
|
||||||
|
|
||||||
private final Cache<EdgeDomain, Integer> domainIdCache = CacheBuilder.newBuilder().maximumSize(10_000).build();
|
private final Cache<EdgeDomain, Integer> domainIdCache = CacheBuilder.newBuilder().maximumSize(10_000).build();
|
||||||
|
private final Cache<EdgeDomain, DomainIdWithNode> domainWithNodeCache = CacheBuilder.newBuilder().maximumSize(10_000).build();
|
||||||
private final Cache<Integer, EdgeDomain> domainNameCache = CacheBuilder.newBuilder().maximumSize(10_000).build();
|
private final Cache<Integer, EdgeDomain> domainNameCache = CacheBuilder.newBuilder().maximumSize(10_000).build();
|
||||||
private final Cache<String, List<DomainWithNode>> siblingsCache = CacheBuilder.newBuilder().maximumSize(10_000).build();
|
private final Cache<String, List<DomainWithNode>> siblingsCache = CacheBuilder.newBuilder().maximumSize(10_000).build();
|
||||||
|
|
||||||
@@ -59,6 +60,34 @@ public class DbDomainQueries {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public DomainIdWithNode getDomainIdWithNode(EdgeDomain domain) throws NoSuchElementException {
|
||||||
|
try {
|
||||||
|
return domainWithNodeCache.get(domain, () -> {
|
||||||
|
try (var connection = dataSource.getConnection();
|
||||||
|
var stmt = connection.prepareStatement("SELECT ID, NODE_AFFINITY FROM EC_DOMAIN WHERE DOMAIN_NAME=?")) {
|
||||||
|
|
||||||
|
stmt.setString(1, domain.toString());
|
||||||
|
var rsp = stmt.executeQuery();
|
||||||
|
if (rsp.next()) {
|
||||||
|
return new DomainIdWithNode(rsp.getInt(1), rsp.getInt(2));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catch (SQLException ex) {
|
||||||
|
throw new RuntimeException(ex);
|
||||||
|
}
|
||||||
|
|
||||||
|
throw new NoSuchElementException();
|
||||||
|
});
|
||||||
|
}
|
||||||
|
catch (UncheckedExecutionException ex) {
|
||||||
|
throw new NoSuchElementException();
|
||||||
|
}
|
||||||
|
catch (ExecutionException ex) {
|
||||||
|
throw new RuntimeException(ex.getCause());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public OptionalInt tryGetDomainId(EdgeDomain domain) {
|
public OptionalInt tryGetDomainId(EdgeDomain domain) {
|
||||||
|
|
||||||
Integer maybeId = domainIdCache.getIfPresent(domain);
|
Integer maybeId = domainIdCache.getIfPresent(domain);
|
||||||
@@ -145,4 +174,6 @@ public class DbDomainQueries {
|
|||||||
return nodeAffinity > 0;
|
return nodeAffinity > 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public record DomainIdWithNode (int domainId, int nodeAffinity) { }
|
||||||
}
|
}
|
||||||
|
47
code/functions/favicon/api/build.gradle
Normal file
47
code/functions/favicon/api/build.gradle
Normal file
@@ -0,0 +1,47 @@
|
|||||||
|
plugins {
|
||||||
|
id 'java'
|
||||||
|
|
||||||
|
id "com.google.protobuf" version "0.9.4"
|
||||||
|
id 'jvm-test-suite'
|
||||||
|
}
|
||||||
|
|
||||||
|
java {
|
||||||
|
toolchain {
|
||||||
|
languageVersion.set(JavaLanguageVersion.of(rootProject.ext.jvmVersion))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
jar.archiveBaseName = 'favicon-api'
|
||||||
|
|
||||||
|
apply from: "$rootProject.projectDir/protobuf.gradle"
|
||||||
|
apply from: "$rootProject.projectDir/srcsets.gradle"
|
||||||
|
|
||||||
|
dependencies {
|
||||||
|
implementation project(':code:common:model')
|
||||||
|
implementation project(':code:common:config')
|
||||||
|
implementation project(':code:common:service')
|
||||||
|
|
||||||
|
implementation libs.bundles.slf4j
|
||||||
|
|
||||||
|
implementation libs.prometheus
|
||||||
|
implementation libs.notnull
|
||||||
|
implementation libs.guava
|
||||||
|
implementation dependencies.create(libs.guice.get()) {
|
||||||
|
exclude group: 'com.google.guava'
|
||||||
|
}
|
||||||
|
implementation libs.gson
|
||||||
|
implementation libs.bundles.protobuf
|
||||||
|
implementation libs.guava
|
||||||
|
libs.bundles.grpc.get().each {
|
||||||
|
implementation dependencies.create(it) {
|
||||||
|
exclude group: 'com.google.guava'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
testImplementation libs.bundles.slf4j.test
|
||||||
|
testImplementation libs.bundles.junit
|
||||||
|
testImplementation libs.mockito
|
||||||
|
|
||||||
|
}
|
@@ -0,0 +1,39 @@
|
|||||||
|
package nu.marginalia.api.favicon;
|
||||||
|
|
||||||
|
import com.google.inject.Inject;
|
||||||
|
import nu.marginalia.service.client.GrpcChannelPoolFactory;
|
||||||
|
import nu.marginalia.service.client.GrpcMultiNodeChannelPool;
|
||||||
|
import nu.marginalia.service.discovery.property.ServiceKey;
|
||||||
|
import nu.marginalia.service.discovery.property.ServicePartition;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import java.util.Optional;
|
||||||
|
|
||||||
|
public class FaviconClient {
|
||||||
|
private static final Logger logger = LoggerFactory.getLogger(FaviconClient.class);
|
||||||
|
|
||||||
|
private final GrpcMultiNodeChannelPool<FaviconAPIGrpc.FaviconAPIBlockingStub> channelPool;
|
||||||
|
|
||||||
|
@Inject
|
||||||
|
public FaviconClient(GrpcChannelPoolFactory factory) {
|
||||||
|
this.channelPool = factory.createMulti(
|
||||||
|
ServiceKey.forGrpcApi(FaviconAPIGrpc.class, ServicePartition.multi()),
|
||||||
|
FaviconAPIGrpc::newBlockingStub);
|
||||||
|
}
|
||||||
|
|
||||||
|
public record FaviconData(byte[] bytes, String contentType) {}
|
||||||
|
|
||||||
|
|
||||||
|
public Optional<FaviconData> getFavicon(String domain, int node) {
|
||||||
|
RpcFaviconResponse rsp = channelPool.call(FaviconAPIGrpc.FaviconAPIBlockingStub::getFavicon)
|
||||||
|
.forNode(node)
|
||||||
|
.run(RpcFaviconRequest.newBuilder().setDomain(domain).build());
|
||||||
|
|
||||||
|
if (rsp.getData().isEmpty())
|
||||||
|
return Optional.empty();
|
||||||
|
|
||||||
|
return Optional.of(new FaviconData(rsp.getData().toByteArray(), rsp.getContentType()));
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
20
code/functions/favicon/api/src/main/protobuf/favicon.proto
Normal file
20
code/functions/favicon/api/src/main/protobuf/favicon.proto
Normal file
@@ -0,0 +1,20 @@
|
|||||||
|
syntax="proto3";
|
||||||
|
package marginalia.api.favicon;
|
||||||
|
|
||||||
|
option java_package="nu.marginalia.api.favicon";
|
||||||
|
option java_multiple_files=true;
|
||||||
|
|
||||||
|
service FaviconAPI {
|
||||||
|
/** Fetches information about a domain. */
|
||||||
|
rpc getFavicon(RpcFaviconRequest) returns (RpcFaviconResponse) {}
|
||||||
|
}
|
||||||
|
|
||||||
|
message RpcFaviconRequest {
|
||||||
|
string domain = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
message RpcFaviconResponse {
|
||||||
|
string domain = 1;
|
||||||
|
bytes data = 2;
|
||||||
|
string contentType = 3;
|
||||||
|
}
|
49
code/functions/favicon/build.gradle
Normal file
49
code/functions/favicon/build.gradle
Normal file
@@ -0,0 +1,49 @@
|
|||||||
|
plugins {
|
||||||
|
id 'java'
|
||||||
|
|
||||||
|
id 'application'
|
||||||
|
id 'jvm-test-suite'
|
||||||
|
}
|
||||||
|
|
||||||
|
java {
|
||||||
|
toolchain {
|
||||||
|
languageVersion.set(JavaLanguageVersion.of(rootProject.ext.jvmVersion))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
apply from: "$rootProject.projectDir/srcsets.gradle"
|
||||||
|
|
||||||
|
dependencies {
|
||||||
|
implementation project(':code:common:config')
|
||||||
|
implementation project(':code:common:service')
|
||||||
|
implementation project(':code:common:model')
|
||||||
|
implementation project(':code:common:db')
|
||||||
|
implementation project(':code:functions:favicon:api')
|
||||||
|
implementation project(':code:processes:crawling-process')
|
||||||
|
|
||||||
|
implementation libs.bundles.slf4j
|
||||||
|
|
||||||
|
implementation libs.prometheus
|
||||||
|
implementation libs.guava
|
||||||
|
libs.bundles.grpc.get().each {
|
||||||
|
implementation dependencies.create(it) {
|
||||||
|
exclude group: 'com.google.guava'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
implementation libs.notnull
|
||||||
|
implementation libs.guava
|
||||||
|
implementation dependencies.create(libs.guice.get()) {
|
||||||
|
exclude group: 'com.google.guava'
|
||||||
|
}
|
||||||
|
implementation dependencies.create(libs.spark.get()) {
|
||||||
|
exclude group: 'org.eclipse.jetty'
|
||||||
|
}
|
||||||
|
|
||||||
|
testImplementation libs.bundles.slf4j.test
|
||||||
|
testImplementation libs.bundles.junit
|
||||||
|
testImplementation libs.mockito
|
||||||
|
|
||||||
|
|
||||||
|
}
|
@@ -0,0 +1,44 @@
|
|||||||
|
package nu.marginalia.functions.favicon;
|
||||||
|
|
||||||
|
import com.google.inject.Inject;
|
||||||
|
import com.google.inject.Singleton;
|
||||||
|
import com.google.protobuf.ByteString;
|
||||||
|
import io.grpc.stub.StreamObserver;
|
||||||
|
import nu.marginalia.api.favicon.FaviconAPIGrpc;
|
||||||
|
import nu.marginalia.api.favicon.RpcFaviconRequest;
|
||||||
|
import nu.marginalia.api.favicon.RpcFaviconResponse;
|
||||||
|
import nu.marginalia.crawl.DomainStateDb;
|
||||||
|
import nu.marginalia.service.server.DiscoverableService;
|
||||||
|
|
||||||
|
import java.util.Optional;
|
||||||
|
|
||||||
|
@Singleton
|
||||||
|
public class FaviconGrpcService extends FaviconAPIGrpc.FaviconAPIImplBase implements DiscoverableService {
|
||||||
|
private final DomainStateDb domainStateDb;
|
||||||
|
|
||||||
|
@Inject
|
||||||
|
public FaviconGrpcService(DomainStateDb domainStateDb) {
|
||||||
|
this.domainStateDb = domainStateDb;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void getFavicon(RpcFaviconRequest request, StreamObserver<RpcFaviconResponse> responseObserver) {
|
||||||
|
Optional<DomainStateDb.FaviconRecord> icon = domainStateDb.getIcon(request.getDomain());
|
||||||
|
|
||||||
|
RpcFaviconResponse response;
|
||||||
|
if (icon.isEmpty()) {
|
||||||
|
response = RpcFaviconResponse.newBuilder().build();
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
var iconRecord = icon.get();
|
||||||
|
response = RpcFaviconResponse.newBuilder()
|
||||||
|
.setContentType(iconRecord.contentType())
|
||||||
|
.setDomain(request.getDomain())
|
||||||
|
.setData(ByteString.copyFrom(iconRecord.imageData()))
|
||||||
|
.build();
|
||||||
|
}
|
||||||
|
|
||||||
|
responseObserver.onNext(response);
|
||||||
|
responseObserver.onCompleted();
|
||||||
|
}
|
||||||
|
}
|
@@ -41,7 +41,10 @@ import java.nio.file.Files;
|
|||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
import java.nio.file.StandardCopyOption;
|
import java.nio.file.StandardCopyOption;
|
||||||
import java.security.Security;
|
import java.security.Security;
|
||||||
import java.util.*;
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
import java.util.concurrent.ConcurrentHashMap;
|
import java.util.concurrent.ConcurrentHashMap;
|
||||||
import java.util.concurrent.TimeUnit;
|
import java.util.concurrent.TimeUnit;
|
||||||
import java.util.concurrent.atomic.AtomicInteger;
|
import java.util.concurrent.atomic.AtomicInteger;
|
||||||
@@ -248,44 +251,35 @@ public class CrawlerMain extends ProcessMainClass {
|
|||||||
// List of deferred tasks used to ensure beneficial scheduling of domains with regard to DomainLocks,
|
// List of deferred tasks used to ensure beneficial scheduling of domains with regard to DomainLocks,
|
||||||
// merely shuffling the domains tends to lead to a lot of threads being blocked waiting for a semphore,
|
// merely shuffling the domains tends to lead to a lot of threads being blocked waiting for a semphore,
|
||||||
// this will more aggressively attempt to schedule the jobs to avoid blocking
|
// this will more aggressively attempt to schedule the jobs to avoid blocking
|
||||||
List<CrawlTask> deferredTasks = new LinkedList<>();
|
List<CrawlTask> taskList = new ArrayList<>();
|
||||||
|
|
||||||
// Create crawl tasks and submit them to the pool for execution
|
// Create crawl tasks
|
||||||
for (CrawlSpecRecord crawlSpec : crawlSpecRecords) {
|
for (CrawlSpecRecord crawlSpec : crawlSpecRecords) {
|
||||||
if (workLog.isJobFinished(crawlSpec.domain()))
|
if (workLog.isJobFinished(crawlSpec.domain))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
// Add to the end of the deferral list
|
var task = new CrawlTask(
|
||||||
deferredTasks.addLast(new CrawlTask(
|
|
||||||
crawlSpec,
|
crawlSpec,
|
||||||
anchorTagsSource,
|
anchorTagsSource,
|
||||||
outputDir,
|
outputDir,
|
||||||
warcArchiver,
|
warcArchiver,
|
||||||
domainStateDb,
|
domainStateDb,
|
||||||
workLog));
|
workLog);
|
||||||
|
|
||||||
// Start every task we currently can from the deferral list
|
// Try to run immediately, to avoid unnecessarily keeping the entire work set in RAM
|
||||||
deferredTasks.removeIf(task -> {
|
if (!trySubmitDeferredTask(task)) {
|
||||||
if (task.canRun()) {
|
// Otherwise add to the taskList for deferred execution
|
||||||
if (pendingCrawlTasks.putIfAbsent(crawlSpec.domain(), task) != null) {
|
taskList.add(task);
|
||||||
return true; // task has already run, duplicate in crawl specs
|
}
|
||||||
}
|
|
||||||
|
|
||||||
// This blocks the caller when the pool is full
|
|
||||||
pool.submitQuietly(task);
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
return false;
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Schedule any lingering tasks for immediate execution
|
// Schedule viable tasks for execution until list is empty
|
||||||
for (var task : deferredTasks) {
|
while (!taskList.isEmpty()) {
|
||||||
if (pendingCrawlTasks.putIfAbsent(task.domain, task) != null)
|
taskList.removeIf(this::trySubmitDeferredTask);
|
||||||
continue;
|
|
||||||
|
|
||||||
pool.submitQuietly(task);
|
// Add a small pause here to avoid busy looping toward the end of the execution cycle when
|
||||||
|
// we might have no new viable tasks to run for hours on end
|
||||||
|
TimeUnit.MILLISECONDS.sleep(50);
|
||||||
}
|
}
|
||||||
|
|
||||||
logger.info("Shutting down the pool, waiting for tasks to complete...");
|
logger.info("Shutting down the pool, waiting for tasks to complete...");
|
||||||
@@ -312,6 +306,28 @@ public class CrawlerMain extends ProcessMainClass {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Submit a task for execution if it can be run, returns true if it was submitted
|
||||||
|
* or if it can be discarded */
|
||||||
|
private boolean trySubmitDeferredTask(CrawlTask task) {
|
||||||
|
if (!task.canRun()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (pendingCrawlTasks.putIfAbsent(task.domain, task) != null) {
|
||||||
|
return true; // task has already run, duplicate in crawl specs
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
// This blocks the caller when the pool is full
|
||||||
|
pool.submitQuietly(task);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
catch (RuntimeException ex) {
|
||||||
|
logger.error("Failed to submit task " + task.domain, ex);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public void runForSingleDomain(String targetDomainName, FileStorageId fileStorageId) throws Exception {
|
public void runForSingleDomain(String targetDomainName, FileStorageId fileStorageId) throws Exception {
|
||||||
runForSingleDomain(targetDomainName, fileStorageService.getStorage(fileStorageId).asPath());
|
runForSingleDomain(targetDomainName, fileStorageService.getStorage(fileStorageId).asPath());
|
||||||
}
|
}
|
||||||
@@ -377,6 +393,11 @@ public class CrawlerMain extends ProcessMainClass {
|
|||||||
@Override
|
@Override
|
||||||
public void run() throws Exception {
|
public void run() throws Exception {
|
||||||
|
|
||||||
|
if (workLog.isJobFinished(domain)) { // No-Op
|
||||||
|
logger.info("Omitting task {}, as it is already run", domain);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
Path newWarcFile = CrawlerOutputFile.createWarcPath(outputDir, id, domain, CrawlerOutputFile.WarcFileVersion.LIVE);
|
Path newWarcFile = CrawlerOutputFile.createWarcPath(outputDir, id, domain, CrawlerOutputFile.WarcFileVersion.LIVE);
|
||||||
Path tempFile = CrawlerOutputFile.createWarcPath(outputDir, id, domain, CrawlerOutputFile.WarcFileVersion.TEMP);
|
Path tempFile = CrawlerOutputFile.createWarcPath(outputDir, id, domain, CrawlerOutputFile.WarcFileVersion.TEMP);
|
||||||
Path slopFile = CrawlerOutputFile.createSlopPath(outputDir, id, domain);
|
Path slopFile = CrawlerOutputFile.createSlopPath(outputDir, id, domain);
|
||||||
@@ -431,7 +452,7 @@ public class CrawlerMain extends ProcessMainClass {
|
|||||||
logger.error("Error fetching domain " + domain, e);
|
logger.error("Error fetching domain " + domain, e);
|
||||||
}
|
}
|
||||||
finally {
|
finally {
|
||||||
// We don't need to double-count these; it's also kept int he workLog
|
// We don't need to double-count these; it's also kept in the workLog
|
||||||
pendingCrawlTasks.remove(domain);
|
pendingCrawlTasks.remove(domain);
|
||||||
Thread.currentThread().setName("[idle]");
|
Thread.currentThread().setName("[idle]");
|
||||||
|
|
||||||
|
@@ -1,5 +1,8 @@
|
|||||||
package nu.marginalia.crawl;
|
package nu.marginalia.crawl;
|
||||||
|
|
||||||
|
import com.google.inject.Inject;
|
||||||
|
import nu.marginalia.storage.FileStorageService;
|
||||||
|
import nu.marginalia.storage.model.FileStorageType;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
@@ -63,6 +66,23 @@ public class DomainStateDb implements AutoCloseable {
|
|||||||
|
|
||||||
public record FaviconRecord(String contentType, byte[] imageData) {}
|
public record FaviconRecord(String contentType, byte[] imageData) {}
|
||||||
|
|
||||||
|
@Inject
|
||||||
|
public DomainStateDb(FileStorageService fileStorageService) throws SQLException {
|
||||||
|
this(findFilename(fileStorageService));
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Path findFilename(FileStorageService fileStorageService) throws SQLException {
|
||||||
|
var fsId = fileStorageService.getOnlyActiveFileStorage(FileStorageType.CRAWL_DATA);
|
||||||
|
|
||||||
|
if (fsId.isPresent()) {
|
||||||
|
var fs = fileStorageService.getStorage(fsId.get());
|
||||||
|
return fs.asPath().resolve("domainstate.db");
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
throw new SQLException("Could not find crawl data storage");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public DomainStateDb(Path filename) throws SQLException {
|
public DomainStateDb(Path filename) throws SQLException {
|
||||||
String sqliteDbString = "jdbc:sqlite:" + filename.toString();
|
String sqliteDbString = "jdbc:sqlite:" + filename.toString();
|
||||||
connection = DriverManager.getConnection(sqliteDbString);
|
connection = DriverManager.getConnection(sqliteDbString);
|
||||||
|
@@ -41,6 +41,7 @@ dependencies {
|
|||||||
|
|
||||||
implementation project(':code:functions:live-capture:api')
|
implementation project(':code:functions:live-capture:api')
|
||||||
implementation project(':code:functions:math:api')
|
implementation project(':code:functions:math:api')
|
||||||
|
implementation project(':code:functions:favicon:api')
|
||||||
implementation project(':code:functions:domain-info:api')
|
implementation project(':code:functions:domain-info:api')
|
||||||
implementation project(':code:functions:search-query:api')
|
implementation project(':code:functions:search-query:api')
|
||||||
|
|
||||||
|
@@ -3,10 +3,14 @@ package nu.marginalia.search;
|
|||||||
import com.google.inject.Inject;
|
import com.google.inject.Inject;
|
||||||
import io.jooby.Context;
|
import io.jooby.Context;
|
||||||
import io.jooby.Jooby;
|
import io.jooby.Jooby;
|
||||||
|
import io.jooby.MediaType;
|
||||||
import io.jooby.StatusCode;
|
import io.jooby.StatusCode;
|
||||||
import io.prometheus.client.Counter;
|
import io.prometheus.client.Counter;
|
||||||
import io.prometheus.client.Histogram;
|
import io.prometheus.client.Histogram;
|
||||||
import nu.marginalia.WebsiteUrl;
|
import nu.marginalia.WebsiteUrl;
|
||||||
|
import nu.marginalia.api.favicon.FaviconClient;
|
||||||
|
import nu.marginalia.db.DbDomainQueries;
|
||||||
|
import nu.marginalia.model.EdgeDomain;
|
||||||
import nu.marginalia.search.svc.*;
|
import nu.marginalia.search.svc.*;
|
||||||
import nu.marginalia.service.discovery.property.ServicePartition;
|
import nu.marginalia.service.discovery.property.ServicePartition;
|
||||||
import nu.marginalia.service.server.BaseServiceParams;
|
import nu.marginalia.service.server.BaseServiceParams;
|
||||||
@@ -15,11 +19,14 @@ import org.slf4j.Logger;
|
|||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.NoSuchElementException;
|
||||||
|
|
||||||
public class SearchService extends JoobyService {
|
public class SearchService extends JoobyService {
|
||||||
|
|
||||||
private final WebsiteUrl websiteUrl;
|
private final WebsiteUrl websiteUrl;
|
||||||
private final SearchSiteSubscriptionService siteSubscriptionService;
|
private final SearchSiteSubscriptionService siteSubscriptionService;
|
||||||
|
private final FaviconClient faviconClient;
|
||||||
|
private final DbDomainQueries domainQueries;
|
||||||
|
|
||||||
private static final Logger logger = LoggerFactory.getLogger(SearchService.class);
|
private static final Logger logger = LoggerFactory.getLogger(SearchService.class);
|
||||||
private static final Histogram wmsa_search_service_request_time = Histogram.build()
|
private static final Histogram wmsa_search_service_request_time = Histogram.build()
|
||||||
@@ -43,6 +50,8 @@ public class SearchService extends JoobyService {
|
|||||||
SearchSiteInfoService siteInfoService,
|
SearchSiteInfoService siteInfoService,
|
||||||
SearchCrosstalkService crosstalkService,
|
SearchCrosstalkService crosstalkService,
|
||||||
SearchBrowseService searchBrowseService,
|
SearchBrowseService searchBrowseService,
|
||||||
|
FaviconClient faviconClient,
|
||||||
|
DbDomainQueries domainQueries,
|
||||||
SearchQueryService searchQueryService)
|
SearchQueryService searchQueryService)
|
||||||
throws Exception {
|
throws Exception {
|
||||||
super(params,
|
super(params,
|
||||||
@@ -58,6 +67,8 @@ public class SearchService extends JoobyService {
|
|||||||
this.websiteUrl = websiteUrl;
|
this.websiteUrl = websiteUrl;
|
||||||
|
|
||||||
this.siteSubscriptionService = siteSubscriptionService;
|
this.siteSubscriptionService = siteSubscriptionService;
|
||||||
|
this.faviconClient = faviconClient;
|
||||||
|
this.domainQueries = domainQueries;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@@ -71,6 +82,31 @@ public class SearchService extends JoobyService {
|
|||||||
jooby.get("/site/https://*", this::handleSiteUrlRedirect);
|
jooby.get("/site/https://*", this::handleSiteUrlRedirect);
|
||||||
jooby.get("/site/http://*", this::handleSiteUrlRedirect);
|
jooby.get("/site/http://*", this::handleSiteUrlRedirect);
|
||||||
|
|
||||||
|
jooby.get("/site/{domain}/favicon", ctx -> {
|
||||||
|
String domain = ctx.path("domain").value();
|
||||||
|
logger.info("Finding icon for domain {}", domain);
|
||||||
|
domainQueries.getDomainId(new EdgeDomain(domain));
|
||||||
|
try {
|
||||||
|
DbDomainQueries.DomainIdWithNode domainIdWithNode = domainQueries.getDomainIdWithNode(new EdgeDomain(domain));
|
||||||
|
var faviconMaybe = faviconClient.getFavicon(domain, domainIdWithNode.nodeAffinity());
|
||||||
|
|
||||||
|
if (faviconMaybe.isEmpty()) {
|
||||||
|
ctx.setResponseCode(404);
|
||||||
|
return "";
|
||||||
|
} else {
|
||||||
|
var favicon = faviconMaybe.get();
|
||||||
|
|
||||||
|
ctx.responseStream(MediaType.valueOf(favicon.contentType()), consumer -> {
|
||||||
|
consumer.write(favicon.bytes());
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catch (NoSuchElementException ex) {
|
||||||
|
ctx.setResponseCode(404);
|
||||||
|
}
|
||||||
|
return "";
|
||||||
|
});
|
||||||
|
|
||||||
jooby.before((Context ctx) -> {
|
jooby.before((Context ctx) -> {
|
||||||
ctx.setAttribute(startTimeAttribute, System.nanoTime());
|
ctx.setAttribute(startTimeAttribute, System.nanoTime());
|
||||||
});
|
});
|
||||||
|
@@ -81,7 +81,7 @@ public class SearchFilters {
|
|||||||
),
|
),
|
||||||
List.of(
|
List.of(
|
||||||
new Filter("Vintage", "fa-clock-rotate-left", SearchProfile.VINTAGE, parameters),
|
new Filter("Vintage", "fa-clock-rotate-left", SearchProfile.VINTAGE, parameters),
|
||||||
new Filter("Small Web", "fa-user-minus", SearchProfile.SMALLWEB, parameters),
|
new Filter("Small Web", "fa-minus", SearchProfile.SMALLWEB, parameters),
|
||||||
new Filter("Plain Text", "fa-file", SearchProfile.PLAIN_TEXT, parameters),
|
new Filter("Plain Text", "fa-file", SearchProfile.PLAIN_TEXT, parameters),
|
||||||
new Filter("Tilde", "fa-house", SearchProfile.TILDE, parameters)
|
new Filter("Tilde", "fa-house", SearchProfile.TILDE, parameters)
|
||||||
),
|
),
|
||||||
|
@@ -26,15 +26,15 @@
|
|||||||
It operates a bit like a clock, starting at the top and working its way around clockwise.</p>
|
It operates a bit like a clock, starting at the top and working its way around clockwise.</p>
|
||||||
|
|
||||||
<div class="flex gap-4 place-items-middle">
|
<div class="flex gap-4 place-items-middle">
|
||||||
@template.serp.part.matchogram(mask = 90)
|
@template.serp.part.matchogram(mask = 90, domain = "example.com")
|
||||||
<div>This is by the beginning</div>
|
<div>This is by the beginning</div>
|
||||||
</div>
|
</div>
|
||||||
<div class="flex gap-4 place-items-middle">
|
<div class="flex gap-4 place-items-middle">
|
||||||
@template.serp.part.matchogram(mask = 90L<<26)
|
@template.serp.part.matchogram(mask = 90L<<26, domain = "example.com")
|
||||||
<div>This is in the middle</div>
|
<div>This is in the middle</div>
|
||||||
</div>
|
</div>
|
||||||
<div class="flex gap-4 place-items-middle">
|
<div class="flex gap-4 place-items-middle">
|
||||||
@template.serp.part.matchogram(mask = 5L<<48)
|
@template.serp.part.matchogram(mask = 5L<<48, domain = "example.com")
|
||||||
<div>This is toward the end</div>
|
<div>This is toward the end</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
@@ -1,11 +1,13 @@
|
|||||||
@import java.util.stream.IntStream
|
@import java.util.stream.IntStream
|
||||||
|
|
||||||
@param long mask
|
@param long mask
|
||||||
|
@param String domain
|
||||||
|
|
||||||
<svg width="40" height="40">
|
<svg width="40" height="40"
|
||||||
|
style="background-image: url('/site/${domain}/favicon'); background-repeat: no-repeat; background-size: 16px 16px; background-position: center; ">
|
||||||
<circle
|
<circle
|
||||||
cx="18"
|
cx="20"
|
||||||
cy="18"
|
cy="20"
|
||||||
r="16"
|
r="16"
|
||||||
fill="none"
|
fill="none"
|
||||||
stroke="#eee"
|
stroke="#eee"
|
||||||
@@ -13,10 +15,10 @@
|
|||||||
/>
|
/>
|
||||||
@for (int bit : IntStream.range(0, 56).filter(bit -> (mask & (1L << bit)) != 0).toArray())
|
@for (int bit : IntStream.range(0, 56).filter(bit -> (mask & (1L << bit)) != 0).toArray())
|
||||||
<line
|
<line
|
||||||
x1="${18 + 15*Math.sin(2 * Math.PI * bit / 56.)}"
|
x1="${20 + 15*Math.sin(2 * Math.PI * bit / 56.)}"
|
||||||
y1="${18 - 15*Math.cos(2 * Math.PI * bit / 56.)}"
|
y1="${20 - 15*Math.cos(2 * Math.PI * bit / 56.)}"
|
||||||
x2="${18 + 17*Math.sin(2 * Math.PI * bit / 56.)}"
|
x2="${20 + 17*Math.sin(2 * Math.PI * bit / 56.)}"
|
||||||
y2="${18 - 17*Math.cos(2 * Math.PI * bit / 56.)}"
|
y2="${20 - 17*Math.cos(2 * Math.PI * bit / 56.)}"
|
||||||
stroke="#444"
|
stroke="#444"
|
||||||
stroke-width="2"
|
stroke-width="2"
|
||||||
/>
|
/>
|
||||||
|
@@ -12,7 +12,7 @@
|
|||||||
<div class="flex flex-col grow" >
|
<div class="flex flex-col grow" >
|
||||||
<div class="flex flex-row space-x-2 place-items-center">
|
<div class="flex flex-row space-x-2 place-items-center">
|
||||||
<div class="flex-0" title="Match density">
|
<div class="flex-0" title="Match density">
|
||||||
@template.serp.part.matchogram(mask = result.first.positionsMask)
|
@template.serp.part.matchogram(mask = result.first.positionsMask, domain=result.getFirst().url.domain.toString())
|
||||||
</div>
|
</div>
|
||||||
<div class="flex grow justify-between items-start">
|
<div class="flex grow justify-between items-start">
|
||||||
<div class="flex-1">
|
<div class="flex-1">
|
||||||
|
@@ -9,8 +9,8 @@
|
|||||||
|
|
||||||
<div class="flex-1 p-4 space-y-4 mx-auto w-full md:w-auto">
|
<div class="flex-1 p-4 space-y-4 mx-auto w-full md:w-auto">
|
||||||
<div class="flex border dark:border-gray-600 rounded bg-white dark:bg-gray-800 flex-col space-y-4 pb-4 overflow-hidden md:max-w-lg" >
|
<div class="flex border dark:border-gray-600 rounded bg-white dark:bg-gray-800 flex-col space-y-4 pb-4 overflow-hidden md:max-w-lg" >
|
||||||
<div class="flex place-items-baseline space-x-2 p-2 text-md border-b dark:border-gray-600 bg-margeblue text-white">
|
<div class="flex place-items-center space-x-2 p-2 text-md border-b dark:border-gray-600 bg-margeblue text-white">
|
||||||
<i class="fa fa-globe"></i>
|
<img src="/site/${siteInfo.domain()}/favicon" style="width: 16px; height: 16px; vertical-align: center">
|
||||||
<span>${siteInfo.domain()}</span>
|
<span>${siteInfo.domain()}</span>
|
||||||
<div class="grow">
|
<div class="grow">
|
||||||
</div>
|
</div>
|
||||||
|
@@ -42,6 +42,8 @@ dependencies {
|
|||||||
implementation project(':code:libraries:message-queue')
|
implementation project(':code:libraries:message-queue')
|
||||||
|
|
||||||
implementation project(':code:functions:link-graph:api')
|
implementation project(':code:functions:link-graph:api')
|
||||||
|
implementation project(':code:functions:favicon')
|
||||||
|
implementation project(':code:functions:favicon:api')
|
||||||
|
|
||||||
implementation project(':code:processes:crawling-process:model')
|
implementation project(':code:processes:crawling-process:model')
|
||||||
implementation project(':code:processes:crawling-process:model')
|
implementation project(':code:processes:crawling-process:model')
|
||||||
|
@@ -2,6 +2,7 @@ package nu.marginalia.executor;
|
|||||||
|
|
||||||
import com.google.inject.Inject;
|
import com.google.inject.Inject;
|
||||||
import nu.marginalia.execution.*;
|
import nu.marginalia.execution.*;
|
||||||
|
import nu.marginalia.functions.favicon.FaviconGrpcService;
|
||||||
import nu.marginalia.service.discovery.property.ServicePartition;
|
import nu.marginalia.service.discovery.property.ServicePartition;
|
||||||
import nu.marginalia.service.server.BaseServiceParams;
|
import nu.marginalia.service.server.BaseServiceParams;
|
||||||
import nu.marginalia.service.server.SparkService;
|
import nu.marginalia.service.server.SparkService;
|
||||||
@@ -24,6 +25,7 @@ public class ExecutorSvc extends SparkService {
|
|||||||
ExecutorCrawlGrpcService executorCrawlGrpcService,
|
ExecutorCrawlGrpcService executorCrawlGrpcService,
|
||||||
ExecutorSideloadGrpcService executorSideloadGrpcService,
|
ExecutorSideloadGrpcService executorSideloadGrpcService,
|
||||||
ExecutorExportGrpcService executorExportGrpcService,
|
ExecutorExportGrpcService executorExportGrpcService,
|
||||||
|
FaviconGrpcService faviconGrpcService,
|
||||||
ExecutionInit executionInit,
|
ExecutionInit executionInit,
|
||||||
ExecutorFileTransferService fileTransferService) throws Exception {
|
ExecutorFileTransferService fileTransferService) throws Exception {
|
||||||
super(params,
|
super(params,
|
||||||
@@ -31,7 +33,8 @@ public class ExecutorSvc extends SparkService {
|
|||||||
List.of(executorGrpcService,
|
List.of(executorGrpcService,
|
||||||
executorCrawlGrpcService,
|
executorCrawlGrpcService,
|
||||||
executorSideloadGrpcService,
|
executorSideloadGrpcService,
|
||||||
executorExportGrpcService)
|
executorExportGrpcService,
|
||||||
|
faviconGrpcService)
|
||||||
);
|
);
|
||||||
|
|
||||||
this.executionInit = executionInit;
|
this.executionInit = executionInit;
|
||||||
|
@@ -16,7 +16,8 @@ include 'code:services-application:status-service'
|
|||||||
|
|
||||||
include 'code:functions:math'
|
include 'code:functions:math'
|
||||||
include 'code:functions:math:api'
|
include 'code:functions:math:api'
|
||||||
|
include 'code:functions:favicon'
|
||||||
|
include 'code:functions:favicon:api'
|
||||||
include 'code:functions:domain-info'
|
include 'code:functions:domain-info'
|
||||||
include 'code:functions:domain-info:api'
|
include 'code:functions:domain-info:api'
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user