mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-10-06 07:32:38 +02:00
Compare commits
4 Commits
deploy-005
...
deploy-005
Author | SHA1 | Date | |
---|---|---|---|
|
aa9b1244ea | ||
|
2d17233366 | ||
|
b245cc9f38 | ||
|
6614d05bdf |
@@ -1,118 +0,0 @@
|
||||
package nu.marginalia.db;
|
||||
|
||||
import com.zaxxer.hikari.HikariDataSource;
|
||||
|
||||
import java.sql.Connection;
|
||||
import java.sql.PreparedStatement;
|
||||
import java.sql.SQLException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.OptionalInt;
|
||||
|
||||
/** Class used in exporting data. This is intended to be used for a brief time
|
||||
* and then discarded, not kept around as a service.
|
||||
*/
|
||||
public class DbDomainStatsExportMultitool implements AutoCloseable {
|
||||
private final Connection connection;
|
||||
private final int nodeId;
|
||||
private final PreparedStatement knownUrlsQuery;
|
||||
private final PreparedStatement visitedUrlsQuery;
|
||||
private final PreparedStatement goodUrlsQuery;
|
||||
private final PreparedStatement domainNameToId;
|
||||
|
||||
private final PreparedStatement allDomainsQuery;
|
||||
private final PreparedStatement crawlQueueDomains;
|
||||
private final PreparedStatement indexedDomainsQuery;
|
||||
|
||||
public DbDomainStatsExportMultitool(HikariDataSource dataSource, int nodeId) throws SQLException {
|
||||
this.connection = dataSource.getConnection();
|
||||
this.nodeId = nodeId;
|
||||
|
||||
knownUrlsQuery = connection.prepareStatement("""
|
||||
SELECT KNOWN_URLS
|
||||
FROM EC_DOMAIN INNER JOIN DOMAIN_METADATA
|
||||
ON EC_DOMAIN.ID=DOMAIN_METADATA.ID
|
||||
WHERE DOMAIN_NAME=?
|
||||
""");
|
||||
visitedUrlsQuery = connection.prepareStatement("""
|
||||
SELECT VISITED_URLS
|
||||
FROM EC_DOMAIN INNER JOIN DOMAIN_METADATA
|
||||
ON EC_DOMAIN.ID=DOMAIN_METADATA.ID
|
||||
WHERE DOMAIN_NAME=?
|
||||
""");
|
||||
goodUrlsQuery = connection.prepareStatement("""
|
||||
SELECT GOOD_URLS
|
||||
FROM EC_DOMAIN INNER JOIN DOMAIN_METADATA
|
||||
ON EC_DOMAIN.ID=DOMAIN_METADATA.ID
|
||||
WHERE DOMAIN_NAME=?
|
||||
""");
|
||||
domainNameToId = connection.prepareStatement("""
|
||||
SELECT ID
|
||||
FROM EC_DOMAIN
|
||||
WHERE DOMAIN_NAME=?
|
||||
""");
|
||||
allDomainsQuery = connection.prepareStatement("""
|
||||
SELECT DOMAIN_NAME
|
||||
FROM EC_DOMAIN
|
||||
""");
|
||||
crawlQueueDomains = connection.prepareStatement("""
|
||||
SELECT DOMAIN_NAME
|
||||
FROM CRAWL_QUEUE
|
||||
""");
|
||||
indexedDomainsQuery = connection.prepareStatement("""
|
||||
SELECT DOMAIN_NAME
|
||||
FROM EC_DOMAIN
|
||||
WHERE INDEXED > 0
|
||||
""");
|
||||
}
|
||||
|
||||
public OptionalInt getVisitedUrls(String domainName) throws SQLException {
|
||||
return executeNameToIntQuery(domainName, visitedUrlsQuery);
|
||||
}
|
||||
|
||||
public OptionalInt getDomainId(String domainName) throws SQLException {
|
||||
return executeNameToIntQuery(domainName, domainNameToId);
|
||||
}
|
||||
|
||||
public List<String> getCrawlQueueDomains() throws SQLException {
|
||||
return executeListQuery(crawlQueueDomains, 100);
|
||||
}
|
||||
public List<String> getAllIndexedDomains() throws SQLException {
|
||||
return executeListQuery(indexedDomainsQuery, 100_000);
|
||||
}
|
||||
|
||||
private OptionalInt executeNameToIntQuery(String domainName, PreparedStatement statement)
|
||||
throws SQLException {
|
||||
statement.setString(1, domainName);
|
||||
var rs = statement.executeQuery();
|
||||
|
||||
if (rs.next()) {
|
||||
return OptionalInt.of(rs.getInt(1));
|
||||
}
|
||||
|
||||
return OptionalInt.empty();
|
||||
}
|
||||
|
||||
private List<String> executeListQuery(PreparedStatement statement, int sizeHint) throws SQLException {
|
||||
List<String> ret = new ArrayList<>(sizeHint);
|
||||
|
||||
var rs = statement.executeQuery();
|
||||
|
||||
while (rs.next()) {
|
||||
ret.add(rs.getString(1));
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws SQLException {
|
||||
knownUrlsQuery.close();
|
||||
goodUrlsQuery.close();
|
||||
visitedUrlsQuery.close();
|
||||
allDomainsQuery.close();
|
||||
crawlQueueDomains.close();
|
||||
domainNameToId.close();
|
||||
connection.close();
|
||||
}
|
||||
}
|
@@ -89,7 +89,7 @@ public class DatabaseModule extends AbstractModule {
|
||||
config.addDataSourceProperty("prepStmtCacheSize", "250");
|
||||
config.addDataSourceProperty("prepStmtCacheSqlLimit", "2048");
|
||||
|
||||
config.setMaximumPoolSize(5);
|
||||
config.setMaximumPoolSize(Integer.getInteger("db.poolSize", 5));
|
||||
config.setMinimumIdle(2);
|
||||
|
||||
config.setMaxLifetime(Duration.ofMinutes(9).toMillis());
|
||||
|
@@ -67,8 +67,12 @@ public class SearchSiteInfoService {
|
||||
this.screenshotService = screenshotService;
|
||||
this.dataSource = dataSource;
|
||||
this.searchSiteSubscriptions = searchSiteSubscriptions;
|
||||
|
||||
Thread.ofPlatform().name("Recently Added Domains Model Updater").start(this::modelUpdater);
|
||||
}
|
||||
|
||||
private volatile SiteOverviewModel cachedOverviewModel = new SiteOverviewModel(List.of());
|
||||
|
||||
@GET
|
||||
@Path("/site")
|
||||
public ModelAndView<?> handleOverview(@QueryParam String domain) {
|
||||
@@ -77,23 +81,48 @@ public class SearchSiteInfoService {
|
||||
return new MapModelAndView("redirect.jte", Map.of("url", "/site/"+domain));
|
||||
}
|
||||
|
||||
List<SiteOverviewModel.DiscoveredDomain> domains = new ArrayList<>();
|
||||
|
||||
try (var conn = dataSource.getConnection();
|
||||
var stmt = conn.prepareStatement("SELECT DOMAIN_NAME, DISCOVER_DATE FROM EC_DOMAIN WHERE NODE_AFFINITY = 0 ORDER BY ID DESC LIMIT 10")) {
|
||||
|
||||
var rs = stmt.executeQuery();
|
||||
while (rs.next()) {
|
||||
domains.add(new SiteOverviewModel.DiscoveredDomain(rs.getString("DOMAIN_NAME"), rs.getString("DISCOVER_DATE")));
|
||||
}
|
||||
}
|
||||
catch (SQLException ex) {
|
||||
throw new RuntimeException();
|
||||
}
|
||||
|
||||
return new MapModelAndView("siteinfo/start.jte",
|
||||
Map.of("navbar", NavbarModel.SITEINFO,
|
||||
"model", new SiteOverviewModel(domains)));
|
||||
"model", cachedOverviewModel));
|
||||
}
|
||||
|
||||
private void modelUpdater() {
|
||||
while (!Thread.interrupted()) {
|
||||
List<SiteOverviewModel.DiscoveredDomain> domains = new ArrayList<>();
|
||||
|
||||
// This query can be quite expensive, so we can't run it on demand
|
||||
// for every request. Instead, we run it every 15 minutes and cache
|
||||
// the result.
|
||||
|
||||
try (var conn = dataSource.getConnection();
|
||||
var stmt = conn.prepareStatement("""
|
||||
SELECT DOMAIN_NAME, DISCOVER_DATE
|
||||
FROM EC_DOMAIN
|
||||
WHERE NODE_AFFINITY = 0
|
||||
ORDER BY ID DESC
|
||||
LIMIT 10
|
||||
"""))
|
||||
{
|
||||
var rs = stmt.executeQuery();
|
||||
while (rs.next()) {
|
||||
domains.add(new SiteOverviewModel.DiscoveredDomain(
|
||||
rs.getString("DOMAIN_NAME"),
|
||||
rs.getString("DISCOVER_DATE"))
|
||||
);
|
||||
}
|
||||
} catch (SQLException ex) {
|
||||
logger.warn("Failed to get recently added domains: {}", ex.getMessage());
|
||||
}
|
||||
|
||||
cachedOverviewModel = new SiteOverviewModel(domains);
|
||||
|
||||
try {
|
||||
TimeUnit.MINUTES.sleep(15);
|
||||
} catch (InterruptedException e) {
|
||||
Thread.currentThread().interrupt();
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public record SiteOverviewModel(List<DiscoveredDomain> domains) {
|
||||
|
@@ -1,5 +1,4 @@
|
||||
@import nu.marginalia.db.DbDomainQueries
|
||||
@import nu.marginalia.model.EdgeDomain
|
||||
@import nu.marginalia.search.svc.SearchSiteInfoService
|
||||
@import nu.marginalia.search.svc.SearchSiteInfoService.*
|
||||
@import nu.marginalia.search.model.UrlDetails
|
||||
@@ -81,35 +80,6 @@
|
||||
|
||||
@endif
|
||||
|
||||
|
||||
@if (!siteInfo.siblingDomains().isEmpty())
|
||||
<div class="mx-3 flex place-items-baseline space-x-2 p-2 bg-gray-100 dark:bg-gray-600 rounded">
|
||||
<i class="fas fa-globe"></i>
|
||||
<span>Related Subdomains</span>
|
||||
</div>
|
||||
|
||||
<table class="min-w-full divide-y divide-gray-200 dark:divide-gray-600 mx-4">
|
||||
<thead>
|
||||
<tr class="bg-gray-50 dark:bg-gray-700">
|
||||
<th scope="col" class="px-2 py-2 text-left text-xs font-medium text-gray-500 dark:text-gray-100 uppercase tracking-wider">Domain Name</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody class="bg-white dark:bg-gray-800 divide-y divide-gray-200 dark:divide-gray-600 text-xs">
|
||||
@for (DbDomainQueries.DomainWithNode sibling : siteInfo.siblingDomains())
|
||||
<tr>
|
||||
<td class="px-3 py-6 md:py-3 whitespace-nowrap">
|
||||
<a class="text-liteblue dark:text-blue-200" href="/site/${sibling.domain().toString()}">${sibling.domain().toString()}</a>
|
||||
|
||||
@if (!sibling.isIndexed())
|
||||
<i class="ml-1 fa-regular fa-question-circle text-gray-400 dark:text-gray-600 text-xs" title="Not indexed"></i>
|
||||
@endif
|
||||
</td>
|
||||
</tr>
|
||||
@endfor
|
||||
</tbody>
|
||||
</table>
|
||||
@endif
|
||||
|
||||
@if (siteInfo.domainInformation().isUnknownDomain())
|
||||
<div class="mx-3 flex place-items-baseline space-x-2 p-2 bg-gray-100 dark:bg-gray-600 rounded">
|
||||
<i class="fa-regular fa-circle-question"></i>
|
||||
@@ -178,6 +148,36 @@
|
||||
</form>
|
||||
@endif
|
||||
|
||||
|
||||
@if (!siteInfo.siblingDomains().isEmpty())
|
||||
<div class="mx-3 flex place-items-baseline space-x-2 p-2 bg-gray-100 dark:bg-gray-600 rounded">
|
||||
<i class="fas fa-globe"></i>
|
||||
<span>Related Subdomains</span>
|
||||
</div>
|
||||
|
||||
<table class="min-w-full divide-y divide-gray-200 dark:divide-gray-600 mx-4">
|
||||
<thead>
|
||||
<tr class="bg-gray-50 dark:bg-gray-700">
|
||||
<th scope="col" class="px-2 py-2 text-left text-xs font-medium text-gray-500 dark:text-gray-100 uppercase tracking-wider">Domain Name</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody class="bg-white dark:bg-gray-800 divide-y divide-gray-200 dark:divide-gray-600 text-xs">
|
||||
@for (DbDomainQueries.DomainWithNode sibling : siteInfo.siblingDomains())
|
||||
<tr>
|
||||
<td class="px-3 py-6 md:py-3 whitespace-nowrap">
|
||||
<a class="text-liteblue dark:text-blue-200" href="/site/${sibling.domain().toString()}">${sibling.domain().toString()}</a>
|
||||
|
||||
@if (!sibling.isIndexed())
|
||||
<i class="ml-1 fa-regular fa-question-circle text-gray-400 dark:text-gray-600 text-xs" title="Not indexed"></i>
|
||||
@endif
|
||||
</td>
|
||||
</tr>
|
||||
@endfor
|
||||
</tbody>
|
||||
</table>
|
||||
@endif
|
||||
|
||||
|
||||
@if (siteInfo.isKnown())
|
||||
<div class="mx-3 flex place-items-baseline space-x-2 p-2 bg-gray-100 dark:bg-gray-600 rounded">
|
||||
<i class="fas fa-chart-simple"></i>
|
||||
|
Reference in New Issue
Block a user