mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-10-06 07:32:38 +02:00
Compare commits
11 Commits
deploy-005
...
deploy-006
Author | SHA1 | Date | |
---|---|---|---|
|
e34230c25b | ||
|
ef3f175ede | ||
|
bbe4b5d9fd | ||
|
c67a635103 | ||
|
20b24133fb | ||
|
f2567677e8 | ||
|
bc2c2061f2 | ||
|
1c7f5a31a5 | ||
|
59a8ea60f7 | ||
|
aa9b1244ea | ||
|
2d17233366 |
@@ -20,7 +20,10 @@ public class DbDomainQueries {
|
|||||||
private final HikariDataSource dataSource;
|
private final HikariDataSource dataSource;
|
||||||
|
|
||||||
private static final Logger logger = LoggerFactory.getLogger(DbDomainQueries.class);
|
private static final Logger logger = LoggerFactory.getLogger(DbDomainQueries.class);
|
||||||
|
|
||||||
private final Cache<EdgeDomain, Integer> domainIdCache = CacheBuilder.newBuilder().maximumSize(10_000).build();
|
private final Cache<EdgeDomain, Integer> domainIdCache = CacheBuilder.newBuilder().maximumSize(10_000).build();
|
||||||
|
private final Cache<Integer, EdgeDomain> domainNameCache = CacheBuilder.newBuilder().maximumSize(10_000).build();
|
||||||
|
private final Cache<String, List<DomainWithNode>> siblingsCache = CacheBuilder.newBuilder().maximumSize(10_000).build();
|
||||||
|
|
||||||
@Inject
|
@Inject
|
||||||
public DbDomainQueries(HikariDataSource dataSource)
|
public DbDomainQueries(HikariDataSource dataSource)
|
||||||
@@ -30,16 +33,21 @@ public class DbDomainQueries {
|
|||||||
|
|
||||||
|
|
||||||
public Integer getDomainId(EdgeDomain domain) throws NoSuchElementException {
|
public Integer getDomainId(EdgeDomain domain) throws NoSuchElementException {
|
||||||
try (var connection = dataSource.getConnection()) {
|
try {
|
||||||
|
|
||||||
return domainIdCache.get(domain, () -> {
|
return domainIdCache.get(domain, () -> {
|
||||||
try (var stmt = connection.prepareStatement("SELECT ID FROM EC_DOMAIN WHERE DOMAIN_NAME=?")) {
|
try (var connection = dataSource.getConnection();
|
||||||
|
var stmt = connection.prepareStatement("SELECT ID FROM EC_DOMAIN WHERE DOMAIN_NAME=?")) {
|
||||||
|
|
||||||
stmt.setString(1, domain.toString());
|
stmt.setString(1, domain.toString());
|
||||||
var rsp = stmt.executeQuery();
|
var rsp = stmt.executeQuery();
|
||||||
if (rsp.next()) {
|
if (rsp.next()) {
|
||||||
return rsp.getInt(1);
|
return rsp.getInt(1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
catch (SQLException ex) {
|
||||||
|
throw new RuntimeException(ex);
|
||||||
|
}
|
||||||
|
|
||||||
throw new NoSuchElementException();
|
throw new NoSuchElementException();
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
@@ -49,9 +57,6 @@ public class DbDomainQueries {
|
|||||||
catch (ExecutionException ex) {
|
catch (ExecutionException ex) {
|
||||||
throw new RuntimeException(ex.getCause());
|
throw new RuntimeException(ex.getCause());
|
||||||
}
|
}
|
||||||
catch (SQLException ex) {
|
|
||||||
throw new RuntimeException(ex);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public OptionalInt tryGetDomainId(EdgeDomain domain) {
|
public OptionalInt tryGetDomainId(EdgeDomain domain) {
|
||||||
@@ -84,47 +89,55 @@ public class DbDomainQueries {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public Optional<EdgeDomain> getDomain(int id) {
|
public Optional<EdgeDomain> getDomain(int id) {
|
||||||
try (var connection = dataSource.getConnection()) {
|
|
||||||
|
|
||||||
|
EdgeDomain existing = domainNameCache.getIfPresent(id);
|
||||||
|
if (existing != null) {
|
||||||
|
return Optional.of(existing);
|
||||||
|
}
|
||||||
|
|
||||||
|
try (var connection = dataSource.getConnection()) {
|
||||||
try (var stmt = connection.prepareStatement("SELECT DOMAIN_NAME FROM EC_DOMAIN WHERE ID=?")) {
|
try (var stmt = connection.prepareStatement("SELECT DOMAIN_NAME FROM EC_DOMAIN WHERE ID=?")) {
|
||||||
stmt.setInt(1, id);
|
stmt.setInt(1, id);
|
||||||
var rsp = stmt.executeQuery();
|
var rsp = stmt.executeQuery();
|
||||||
if (rsp.next()) {
|
if (rsp.next()) {
|
||||||
return Optional.of(new EdgeDomain(rsp.getString(1)));
|
var val = new EdgeDomain(rsp.getString(1));
|
||||||
|
domainNameCache.put(id, val);
|
||||||
|
return Optional.of(val);
|
||||||
}
|
}
|
||||||
return Optional.empty();
|
return Optional.empty();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
catch (UncheckedExecutionException ex) {
|
|
||||||
throw new RuntimeException(ex.getCause());
|
|
||||||
}
|
|
||||||
catch (SQLException ex) {
|
catch (SQLException ex) {
|
||||||
throw new RuntimeException(ex);
|
throw new RuntimeException(ex);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<DomainWithNode> otherSubdomains(EdgeDomain domain, int cnt) {
|
public List<DomainWithNode> otherSubdomains(EdgeDomain domain, int cnt) throws ExecutionException {
|
||||||
List<DomainWithNode> ret = new ArrayList<>();
|
String topDomain = domain.topDomain;
|
||||||
|
|
||||||
try (var conn = dataSource.getConnection();
|
return siblingsCache.get(topDomain, () -> {
|
||||||
var stmt = conn.prepareStatement("SELECT DOMAIN_NAME, NODE_AFFINITY FROM EC_DOMAIN WHERE DOMAIN_TOP = ? LIMIT ?")) {
|
List<DomainWithNode> ret = new ArrayList<>();
|
||||||
stmt.setString(1, domain.topDomain);
|
|
||||||
stmt.setInt(2, cnt);
|
|
||||||
|
|
||||||
var rs = stmt.executeQuery();
|
try (var conn = dataSource.getConnection();
|
||||||
while (rs.next()) {
|
var stmt = conn.prepareStatement("SELECT DOMAIN_NAME, NODE_AFFINITY FROM EC_DOMAIN WHERE DOMAIN_TOP = ? LIMIT ?")) {
|
||||||
var sibling = new EdgeDomain(rs.getString(1));
|
stmt.setString(1, topDomain);
|
||||||
|
stmt.setInt(2, cnt);
|
||||||
|
|
||||||
if (sibling.equals(domain))
|
var rs = stmt.executeQuery();
|
||||||
continue;
|
while (rs.next()) {
|
||||||
|
var sibling = new EdgeDomain(rs.getString(1));
|
||||||
|
|
||||||
ret.add(new DomainWithNode(sibling, rs.getInt(2)));
|
if (sibling.equals(domain))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
ret.add(new DomainWithNode(sibling, rs.getInt(2)));
|
||||||
|
}
|
||||||
|
} catch (SQLException e) {
|
||||||
|
logger.error("Failed to get domain neighbors");
|
||||||
}
|
}
|
||||||
} catch (SQLException e) {
|
return ret;
|
||||||
logger.error("Failed to get domain neighbors");
|
});
|
||||||
}
|
|
||||||
|
|
||||||
return ret;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public record DomainWithNode (EdgeDomain domain, int nodeAffinity) {
|
public record DomainWithNode (EdgeDomain domain, int nodeAffinity) {
|
||||||
|
@@ -1,118 +0,0 @@
|
|||||||
package nu.marginalia.db;
|
|
||||||
|
|
||||||
import com.zaxxer.hikari.HikariDataSource;
|
|
||||||
|
|
||||||
import java.sql.Connection;
|
|
||||||
import java.sql.PreparedStatement;
|
|
||||||
import java.sql.SQLException;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.OptionalInt;
|
|
||||||
|
|
||||||
/** Class used in exporting data. This is intended to be used for a brief time
|
|
||||||
* and then discarded, not kept around as a service.
|
|
||||||
*/
|
|
||||||
public class DbDomainStatsExportMultitool implements AutoCloseable {
|
|
||||||
private final Connection connection;
|
|
||||||
private final int nodeId;
|
|
||||||
private final PreparedStatement knownUrlsQuery;
|
|
||||||
private final PreparedStatement visitedUrlsQuery;
|
|
||||||
private final PreparedStatement goodUrlsQuery;
|
|
||||||
private final PreparedStatement domainNameToId;
|
|
||||||
|
|
||||||
private final PreparedStatement allDomainsQuery;
|
|
||||||
private final PreparedStatement crawlQueueDomains;
|
|
||||||
private final PreparedStatement indexedDomainsQuery;
|
|
||||||
|
|
||||||
public DbDomainStatsExportMultitool(HikariDataSource dataSource, int nodeId) throws SQLException {
|
|
||||||
this.connection = dataSource.getConnection();
|
|
||||||
this.nodeId = nodeId;
|
|
||||||
|
|
||||||
knownUrlsQuery = connection.prepareStatement("""
|
|
||||||
SELECT KNOWN_URLS
|
|
||||||
FROM EC_DOMAIN INNER JOIN DOMAIN_METADATA
|
|
||||||
ON EC_DOMAIN.ID=DOMAIN_METADATA.ID
|
|
||||||
WHERE DOMAIN_NAME=?
|
|
||||||
""");
|
|
||||||
visitedUrlsQuery = connection.prepareStatement("""
|
|
||||||
SELECT VISITED_URLS
|
|
||||||
FROM EC_DOMAIN INNER JOIN DOMAIN_METADATA
|
|
||||||
ON EC_DOMAIN.ID=DOMAIN_METADATA.ID
|
|
||||||
WHERE DOMAIN_NAME=?
|
|
||||||
""");
|
|
||||||
goodUrlsQuery = connection.prepareStatement("""
|
|
||||||
SELECT GOOD_URLS
|
|
||||||
FROM EC_DOMAIN INNER JOIN DOMAIN_METADATA
|
|
||||||
ON EC_DOMAIN.ID=DOMAIN_METADATA.ID
|
|
||||||
WHERE DOMAIN_NAME=?
|
|
||||||
""");
|
|
||||||
domainNameToId = connection.prepareStatement("""
|
|
||||||
SELECT ID
|
|
||||||
FROM EC_DOMAIN
|
|
||||||
WHERE DOMAIN_NAME=?
|
|
||||||
""");
|
|
||||||
allDomainsQuery = connection.prepareStatement("""
|
|
||||||
SELECT DOMAIN_NAME
|
|
||||||
FROM EC_DOMAIN
|
|
||||||
""");
|
|
||||||
crawlQueueDomains = connection.prepareStatement("""
|
|
||||||
SELECT DOMAIN_NAME
|
|
||||||
FROM CRAWL_QUEUE
|
|
||||||
""");
|
|
||||||
indexedDomainsQuery = connection.prepareStatement("""
|
|
||||||
SELECT DOMAIN_NAME
|
|
||||||
FROM EC_DOMAIN
|
|
||||||
WHERE INDEXED > 0
|
|
||||||
""");
|
|
||||||
}
|
|
||||||
|
|
||||||
public OptionalInt getVisitedUrls(String domainName) throws SQLException {
|
|
||||||
return executeNameToIntQuery(domainName, visitedUrlsQuery);
|
|
||||||
}
|
|
||||||
|
|
||||||
public OptionalInt getDomainId(String domainName) throws SQLException {
|
|
||||||
return executeNameToIntQuery(domainName, domainNameToId);
|
|
||||||
}
|
|
||||||
|
|
||||||
public List<String> getCrawlQueueDomains() throws SQLException {
|
|
||||||
return executeListQuery(crawlQueueDomains, 100);
|
|
||||||
}
|
|
||||||
public List<String> getAllIndexedDomains() throws SQLException {
|
|
||||||
return executeListQuery(indexedDomainsQuery, 100_000);
|
|
||||||
}
|
|
||||||
|
|
||||||
private OptionalInt executeNameToIntQuery(String domainName, PreparedStatement statement)
|
|
||||||
throws SQLException {
|
|
||||||
statement.setString(1, domainName);
|
|
||||||
var rs = statement.executeQuery();
|
|
||||||
|
|
||||||
if (rs.next()) {
|
|
||||||
return OptionalInt.of(rs.getInt(1));
|
|
||||||
}
|
|
||||||
|
|
||||||
return OptionalInt.empty();
|
|
||||||
}
|
|
||||||
|
|
||||||
private List<String> executeListQuery(PreparedStatement statement, int sizeHint) throws SQLException {
|
|
||||||
List<String> ret = new ArrayList<>(sizeHint);
|
|
||||||
|
|
||||||
var rs = statement.executeQuery();
|
|
||||||
|
|
||||||
while (rs.next()) {
|
|
||||||
ret.add(rs.getString(1));
|
|
||||||
}
|
|
||||||
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void close() throws SQLException {
|
|
||||||
knownUrlsQuery.close();
|
|
||||||
goodUrlsQuery.close();
|
|
||||||
visitedUrlsQuery.close();
|
|
||||||
allDomainsQuery.close();
|
|
||||||
crawlQueueDomains.close();
|
|
||||||
domainNameToId.close();
|
|
||||||
connection.close();
|
|
||||||
}
|
|
||||||
}
|
|
@@ -16,20 +16,19 @@ import org.slf4j.LoggerFactory;
|
|||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Comparator;
|
import java.util.Comparator;
|
||||||
import java.util.Iterator;
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.concurrent.CompletableFuture;
|
import java.util.concurrent.CompletableFuture;
|
||||||
import java.util.concurrent.ExecutorService;
|
import java.util.concurrent.ExecutorService;
|
||||||
import java.util.concurrent.Executors;
|
import java.util.concurrent.Executors;
|
||||||
|
import java.util.concurrent.atomic.AtomicInteger;
|
||||||
import static java.lang.Math.clamp;
|
import java.util.function.Consumer;
|
||||||
|
|
||||||
@Singleton
|
@Singleton
|
||||||
public class IndexClient {
|
public class IndexClient {
|
||||||
private static final Logger logger = LoggerFactory.getLogger(IndexClient.class);
|
private static final Logger logger = LoggerFactory.getLogger(IndexClient.class);
|
||||||
private final GrpcMultiNodeChannelPool<IndexApiGrpc.IndexApiBlockingStub> channelPool;
|
private final GrpcMultiNodeChannelPool<IndexApiGrpc.IndexApiBlockingStub> channelPool;
|
||||||
private final DomainBlacklistImpl blacklist;
|
private final DomainBlacklistImpl blacklist;
|
||||||
private static final ExecutorService executor = Executors.newVirtualThreadPerTaskExecutor();
|
private static final ExecutorService executor = Executors.newCachedThreadPool();
|
||||||
|
|
||||||
@Inject
|
@Inject
|
||||||
public IndexClient(GrpcChannelPoolFactory channelPoolFactory, DomainBlacklistImpl blacklist) {
|
public IndexClient(GrpcChannelPoolFactory channelPoolFactory, DomainBlacklistImpl blacklist) {
|
||||||
@@ -51,40 +50,37 @@ public class IndexClient {
|
|||||||
|
|
||||||
/** Execute a query on the index partitions and return the combined results. */
|
/** Execute a query on the index partitions and return the combined results. */
|
||||||
public AggregateQueryResponse executeQueries(RpcIndexQuery indexRequest, Pagination pagination) {
|
public AggregateQueryResponse executeQueries(RpcIndexQuery indexRequest, Pagination pagination) {
|
||||||
List<CompletableFuture<Iterator<RpcDecoratedResultItem>>> futures =
|
|
||||||
channelPool.call(IndexApiGrpc.IndexApiBlockingStub::query)
|
|
||||||
.async(executor)
|
|
||||||
.runEach(indexRequest);
|
|
||||||
|
|
||||||
final int requestedMaxResults = indexRequest.getQueryLimits().getResultsTotal();
|
final int requestedMaxResults = indexRequest.getQueryLimits().getResultsTotal();
|
||||||
final int resultsUpperBound = requestedMaxResults * channelPool.getNumNodes();
|
|
||||||
|
|
||||||
List<RpcDecoratedResultItem> results = new ArrayList<>(resultsUpperBound);
|
AtomicInteger totalNumResults = new AtomicInteger(0);
|
||||||
|
|
||||||
for (var future : futures) {
|
List<RpcDecoratedResultItem> results =
|
||||||
try {
|
channelPool.call(IndexApiGrpc.IndexApiBlockingStub::query)
|
||||||
future.get().forEachRemaining(results::add);
|
.async(executor)
|
||||||
}
|
.runEach(indexRequest)
|
||||||
catch (Exception e) {
|
.stream()
|
||||||
logger.error("Downstream exception", e);
|
.map(future -> future.thenApply(iterator -> {
|
||||||
}
|
List<RpcDecoratedResultItem> ret = new ArrayList<>(requestedMaxResults);
|
||||||
}
|
iterator.forEachRemaining(ret::add);
|
||||||
|
totalNumResults.addAndGet(ret.size());
|
||||||
|
return ret;
|
||||||
|
}))
|
||||||
|
.mapMulti((CompletableFuture<List<RpcDecoratedResultItem>> fut, Consumer<List<RpcDecoratedResultItem>> c) ->{
|
||||||
|
try {
|
||||||
|
c.accept(fut.join());
|
||||||
|
} catch (Exception e) {
|
||||||
|
logger.error("Error while fetching results", e);
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.flatMap(List::stream)
|
||||||
|
.filter(item -> !isBlacklisted(item))
|
||||||
|
.sorted(comparator)
|
||||||
|
.skip(Math.max(0, (pagination.page - 1) * pagination.pageSize))
|
||||||
|
.limit(pagination.pageSize)
|
||||||
|
.toList();
|
||||||
|
|
||||||
// Sort the results by ranking score and remove blacklisted domains
|
return new AggregateQueryResponse(results, pagination.page(), totalNumResults.get());
|
||||||
results.sort(comparator);
|
|
||||||
results.removeIf(this::isBlacklisted);
|
|
||||||
|
|
||||||
int numReceivedResults = results.size();
|
|
||||||
|
|
||||||
// pagination is typically 1-indexed, so we need to adjust the start and end indices
|
|
||||||
int indexStart = (pagination.page - 1) * pagination.pageSize;
|
|
||||||
int indexEnd = (pagination.page) * pagination.pageSize;
|
|
||||||
|
|
||||||
results = results.subList(
|
|
||||||
clamp(indexStart, 0, Math.max(0, results.size() - 1)), // from is inclusive, so subtract 1 from size()
|
|
||||||
clamp(indexEnd, 0, results.size()));
|
|
||||||
|
|
||||||
return new AggregateQueryResponse(results, pagination.page(), numReceivedResults);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private boolean isBlacklisted(RpcDecoratedResultItem item) {
|
private boolean isBlacklisted(RpcDecoratedResultItem item) {
|
||||||
|
@@ -84,18 +84,33 @@ public record SearchParameters(WebsiteUrl url,
|
|||||||
}
|
}
|
||||||
|
|
||||||
public String renderUrl() {
|
public String renderUrl() {
|
||||||
String path = String.format("/search?query=%s&profile=%s&js=%s&adtech=%s&recent=%s&searchTitle=%s&newfilter=%s&page=%d",
|
|
||||||
URLEncoder.encode(query, StandardCharsets.UTF_8),
|
|
||||||
URLEncoder.encode(profile.filterId, StandardCharsets.UTF_8),
|
|
||||||
URLEncoder.encode(js.value, StandardCharsets.UTF_8),
|
|
||||||
URLEncoder.encode(adtech.value, StandardCharsets.UTF_8),
|
|
||||||
URLEncoder.encode(recent.value, StandardCharsets.UTF_8),
|
|
||||||
URLEncoder.encode(searchTitle.value, StandardCharsets.UTF_8),
|
|
||||||
Boolean.valueOf(newFilter).toString(),
|
|
||||||
page
|
|
||||||
);
|
|
||||||
|
|
||||||
return path;
|
StringBuilder pathBuilder = new StringBuilder("/search?");
|
||||||
|
pathBuilder.append("query=").append(URLEncoder.encode(query, StandardCharsets.UTF_8));
|
||||||
|
|
||||||
|
if (profile != SearchProfile.NO_FILTER) {
|
||||||
|
pathBuilder.append("&profile=").append(URLEncoder.encode(profile.filterId, StandardCharsets.UTF_8));
|
||||||
|
}
|
||||||
|
if (js != SearchJsParameter.DEFAULT) {
|
||||||
|
pathBuilder.append("&js=").append(URLEncoder.encode(js.value, StandardCharsets.UTF_8));
|
||||||
|
}
|
||||||
|
if (adtech != SearchAdtechParameter.DEFAULT) {
|
||||||
|
pathBuilder.append("&adtech=").append(URLEncoder.encode(adtech.value, StandardCharsets.UTF_8));
|
||||||
|
}
|
||||||
|
if (recent != SearchRecentParameter.DEFAULT) {
|
||||||
|
pathBuilder.append("&recent=").append(URLEncoder.encode(recent.value, StandardCharsets.UTF_8));
|
||||||
|
}
|
||||||
|
if (searchTitle != SearchTitleParameter.DEFAULT) {
|
||||||
|
pathBuilder.append("&searchTitle=").append(URLEncoder.encode(searchTitle.value, StandardCharsets.UTF_8));
|
||||||
|
}
|
||||||
|
if (page != 1) {
|
||||||
|
pathBuilder.append("&page=").append(page);
|
||||||
|
}
|
||||||
|
if (newFilter) {
|
||||||
|
pathBuilder.append("&newfilter=").append(Boolean.valueOf(newFilter).toString());
|
||||||
|
}
|
||||||
|
|
||||||
|
return pathBuilder.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
public RpcTemporalBias.Bias temporalBias() {
|
public RpcTemporalBias.Bias temporalBias() {
|
||||||
|
@@ -3,27 +3,22 @@ package nu.marginalia.search.command.commands;
|
|||||||
import com.google.inject.Inject;
|
import com.google.inject.Inject;
|
||||||
import io.jooby.MapModelAndView;
|
import io.jooby.MapModelAndView;
|
||||||
import io.jooby.ModelAndView;
|
import io.jooby.ModelAndView;
|
||||||
import nu.marginalia.search.JteRenderer;
|
|
||||||
import nu.marginalia.search.SearchOperator;
|
import nu.marginalia.search.SearchOperator;
|
||||||
import nu.marginalia.search.command.SearchCommandInterface;
|
import nu.marginalia.search.command.SearchCommandInterface;
|
||||||
import nu.marginalia.search.command.SearchParameters;
|
import nu.marginalia.search.command.SearchParameters;
|
||||||
import nu.marginalia.search.model.DecoratedSearchResults;
|
import nu.marginalia.search.model.DecoratedSearchResults;
|
||||||
import nu.marginalia.search.model.NavbarModel;
|
import nu.marginalia.search.model.NavbarModel;
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
|
|
||||||
public class SearchCommand implements SearchCommandInterface {
|
public class SearchCommand implements SearchCommandInterface {
|
||||||
private final SearchOperator searchOperator;
|
private final SearchOperator searchOperator;
|
||||||
private final JteRenderer jteRenderer;
|
|
||||||
|
|
||||||
|
|
||||||
@Inject
|
@Inject
|
||||||
public SearchCommand(SearchOperator searchOperator,
|
public SearchCommand(SearchOperator searchOperator){
|
||||||
JteRenderer jteRenderer) throws IOException {
|
|
||||||
this.searchOperator = searchOperator;
|
this.searchOperator = searchOperator;
|
||||||
this.jteRenderer = jteRenderer;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@@ -26,10 +26,9 @@ import org.slf4j.Logger;
|
|||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import java.sql.SQLException;
|
import java.sql.SQLException;
|
||||||
import java.time.Duration;
|
|
||||||
import java.time.Instant;
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
import java.util.concurrent.CompletableFuture;
|
import java.util.concurrent.CompletableFuture;
|
||||||
|
import java.util.concurrent.ExecutionException;
|
||||||
import java.util.concurrent.Future;
|
import java.util.concurrent.Future;
|
||||||
import java.util.concurrent.TimeUnit;
|
import java.util.concurrent.TimeUnit;
|
||||||
import java.util.function.Supplier;
|
import java.util.function.Supplier;
|
||||||
@@ -69,9 +68,11 @@ public class SearchSiteInfoService {
|
|||||||
this.screenshotService = screenshotService;
|
this.screenshotService = screenshotService;
|
||||||
this.dataSource = dataSource;
|
this.dataSource = dataSource;
|
||||||
this.searchSiteSubscriptions = searchSiteSubscriptions;
|
this.searchSiteSubscriptions = searchSiteSubscriptions;
|
||||||
|
|
||||||
|
Thread.ofPlatform().name("Recently Added Domains Model Updater").start(this::modelUpdater);
|
||||||
}
|
}
|
||||||
|
|
||||||
private volatile SiteOverviewModel model = new SiteOverviewModel(List.of(), Instant.EPOCH);
|
private volatile SiteOverviewModel cachedOverviewModel = new SiteOverviewModel(List.of());
|
||||||
|
|
||||||
@GET
|
@GET
|
||||||
@Path("/site")
|
@Path("/site")
|
||||||
@@ -81,55 +82,52 @@ public class SearchSiteInfoService {
|
|||||||
return new MapModelAndView("redirect.jte", Map.of("url", "/site/"+domain));
|
return new MapModelAndView("redirect.jte", Map.of("url", "/site/"+domain));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (model.age().compareTo(Duration.ofMinutes(15)) > 0) {
|
|
||||||
updateModel();
|
|
||||||
}
|
|
||||||
|
|
||||||
return new MapModelAndView("siteinfo/start.jte",
|
return new MapModelAndView("siteinfo/start.jte",
|
||||||
Map.of("navbar", NavbarModel.SITEINFO,
|
Map.of("navbar", NavbarModel.SITEINFO,
|
||||||
"model", model));
|
"model", cachedOverviewModel));
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Update the model if it is older than 15 minutes.
|
private void modelUpdater() {
|
||||||
* This query is expensive and should not be run too often,
|
while (!Thread.interrupted()) {
|
||||||
* and the data doesn't change that often either.
|
List<SiteOverviewModel.DiscoveredDomain> domains = new ArrayList<>();
|
||||||
* <p></p>
|
|
||||||
* This method is synchronized to avoid multiple threads updating the model at the same time.
|
|
||||||
*/
|
|
||||||
private synchronized void updateModel() {
|
|
||||||
var currentModel = model;
|
|
||||||
if (currentModel.age().compareTo(Duration.ofMinutes(15)) < 0) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
List<SiteOverviewModel.DiscoveredDomain> domains = new ArrayList<>();
|
// This query can be quite expensive, so we can't run it on demand
|
||||||
|
// for every request. Instead, we run it every 15 minutes and cache
|
||||||
|
// the result.
|
||||||
|
|
||||||
try (var conn = dataSource.getConnection();
|
try (var conn = dataSource.getConnection();
|
||||||
var stmt = conn.prepareStatement("SELECT DOMAIN_NAME, DISCOVER_DATE FROM EC_DOMAIN WHERE NODE_AFFINITY = 0 ORDER BY ID DESC LIMIT 10")) {
|
var stmt = conn.prepareStatement("""
|
||||||
|
SELECT DOMAIN_NAME, DISCOVER_DATE
|
||||||
|
FROM EC_DOMAIN
|
||||||
|
WHERE NODE_AFFINITY = 0
|
||||||
|
ORDER BY ID DESC
|
||||||
|
LIMIT 10
|
||||||
|
"""))
|
||||||
|
{
|
||||||
|
var rs = stmt.executeQuery();
|
||||||
|
while (rs.next()) {
|
||||||
|
domains.add(new SiteOverviewModel.DiscoveredDomain(
|
||||||
|
rs.getString("DOMAIN_NAME"),
|
||||||
|
rs.getString("DISCOVER_DATE"))
|
||||||
|
);
|
||||||
|
}
|
||||||
|
} catch (SQLException ex) {
|
||||||
|
logger.warn("Failed to get recently added domains: {}", ex.getMessage());
|
||||||
|
}
|
||||||
|
|
||||||
var rs = stmt.executeQuery();
|
cachedOverviewModel = new SiteOverviewModel(domains);
|
||||||
while (rs.next()) {
|
|
||||||
domains.add(new SiteOverviewModel.DiscoveredDomain(rs.getString("DOMAIN_NAME"), rs.getString("DISCOVER_DATE")));
|
try {
|
||||||
|
TimeUnit.MINUTES.sleep(15);
|
||||||
|
} catch (InterruptedException e) {
|
||||||
|
Thread.currentThread().interrupt();
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
catch (SQLException ex) {
|
|
||||||
throw new RuntimeException();
|
|
||||||
}
|
|
||||||
|
|
||||||
model = new SiteOverviewModel(domains);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public record SiteOverviewModel(List<DiscoveredDomain> domains, Instant captureTime) {
|
public record SiteOverviewModel(List<DiscoveredDomain> domains) {
|
||||||
|
|
||||||
public SiteOverviewModel(List<DiscoveredDomain> domains) {
|
|
||||||
this(domains, Instant.now());
|
|
||||||
}
|
|
||||||
|
|
||||||
public record DiscoveredDomain(String name, String timestamp) {}
|
public record DiscoveredDomain(String name, String timestamp) {}
|
||||||
|
|
||||||
public Duration age() {
|
|
||||||
return Duration.between(captureTime, Instant.now());
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@GET
|
@GET
|
||||||
@@ -139,7 +137,7 @@ public class SearchSiteInfoService {
|
|||||||
@PathParam String domainName,
|
@PathParam String domainName,
|
||||||
@QueryParam String view,
|
@QueryParam String view,
|
||||||
@QueryParam Integer page
|
@QueryParam Integer page
|
||||||
) throws SQLException {
|
) throws SQLException, ExecutionException {
|
||||||
|
|
||||||
if (null == domainName || domainName.isBlank()) {
|
if (null == domainName || domainName.isBlank()) {
|
||||||
return null;
|
return null;
|
||||||
@@ -225,7 +223,7 @@ public class SearchSiteInfoService {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
private SiteInfoWithContext listInfo(Context context, String domainName) {
|
private SiteInfoWithContext listInfo(Context context, String domainName) throws ExecutionException {
|
||||||
|
|
||||||
var domain = new EdgeDomain(domainName);
|
var domain = new EdgeDomain(domainName);
|
||||||
final int domainId = domainQueries.tryGetDomainId(domain).orElse(-1);
|
final int domainId = domainQueries.tryGetDomainId(domain).orElse(-1);
|
||||||
|
@@ -36,10 +36,11 @@
|
|||||||
|
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
@if (filters.showRecentOption.isSet()) <input type="hidden" name="js" value="${filters.removeJsOption.value()}"> @endif
|
||||||
|
@if (filters.reduceAdtechOption.isSet()) <input type="hidden" name="adtech" value="${filters.reduceAdtechOption.value()}"> @endif
|
||||||
|
@if (filters.searchTitleOption.isSet()) <input type="hidden" name="searchTitle" value="${filters.searchTitleOption.value()}"> @endif
|
||||||
|
@if (filters.showRecentOption.isSet()) <input type="hidden" name="recent" value="${filters.showRecentOption.value()}"> @endif
|
||||||
|
|
||||||
<input type="hidden" name="js" value="${filters.removeJsOption.value()}">
|
|
||||||
<input type="hidden" name="adtech" value="${filters.reduceAdtechOption.value()}">
|
|
||||||
<input type="hidden" name="searchTitle" value="${filters.searchTitleOption.value()}">
|
|
||||||
<input type="hidden" name="profile" value="${profile}">
|
<input type="hidden" name="profile" value="${profile}">
|
||||||
<input type="hidden" name="recent" value="${filters.showRecentOption.value()}">
|
|
||||||
</form>
|
</form>
|
||||||
|
@@ -36,7 +36,7 @@
|
|||||||
<div class="text-slate-700 dark:text-white text-sm p-4">
|
<div class="text-slate-700 dark:text-white text-sm p-4">
|
||||||
<div class="fas fa-gift mr-1 text-margeblue dark:text-slate-200"></div>
|
<div class="fas fa-gift mr-1 text-margeblue dark:text-slate-200"></div>
|
||||||
This is the new design and home of Marginalia Search.
|
This is the new design and home of Marginalia Search.
|
||||||
You can about what this entails <a href="https://about.marginalia-search.com/article/redesign/" class="underline text-liteblue dark:text-blue-200">here</a>.
|
You can read about what this entails <a href="https://about.marginalia-search.com/article/redesign/" class="underline text-liteblue dark:text-blue-200">here</a>.
|
||||||
<p class="my-4"></p>
|
<p class="my-4"></p>
|
||||||
The old version of Marginalia Search remains available at
|
The old version of Marginalia Search remains available at
|
||||||
<a href="https://old-search.marginalia.nu/" class="underline text-liteblue dark:text-blue-200">https://old-search.marginalia.nu/</a>.
|
<a href="https://old-search.marginalia.nu/" class="underline text-liteblue dark:text-blue-200">https://old-search.marginalia.nu/</a>.
|
||||||
|
@@ -1,5 +1,4 @@
|
|||||||
@import nu.marginalia.db.DbDomainQueries
|
@import nu.marginalia.db.DbDomainQueries
|
||||||
@import nu.marginalia.model.EdgeDomain
|
|
||||||
@import nu.marginalia.search.svc.SearchSiteInfoService
|
@import nu.marginalia.search.svc.SearchSiteInfoService
|
||||||
@import nu.marginalia.search.svc.SearchSiteInfoService.*
|
@import nu.marginalia.search.svc.SearchSiteInfoService.*
|
||||||
@import nu.marginalia.search.model.UrlDetails
|
@import nu.marginalia.search.model.UrlDetails
|
||||||
|
Reference in New Issue
Block a user