mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-10-05 21:22:39 +02:00
Compare commits
5 Commits
deploy-004
...
deploy-004
Author | SHA1 | Date | |
---|---|---|---|
|
b7f0a2a98e | ||
|
5fb76b2e79 | ||
|
ad8c97f342 | ||
|
dc1b6373eb | ||
|
983d6d067c |
@@ -103,11 +103,11 @@ public class DbDomainQueries {
|
||||
}
|
||||
}
|
||||
|
||||
public List<EdgeDomain> otherSubdomains(EdgeDomain domain, int cnt) {
|
||||
List<EdgeDomain> ret = new ArrayList<>();
|
||||
public List<DomainWithNode> otherSubdomains(EdgeDomain domain, int cnt) {
|
||||
List<DomainWithNode> ret = new ArrayList<>();
|
||||
|
||||
try (var conn = dataSource.getConnection();
|
||||
var stmt = conn.prepareStatement("SELECT DOMAIN_NAME FROM EC_DOMAIN WHERE DOMAIN_TOP = ? LIMIT ?")) {
|
||||
var stmt = conn.prepareStatement("SELECT DOMAIN_NAME, NODE_AFFINITY FROM EC_DOMAIN WHERE DOMAIN_TOP = ? LIMIT ?")) {
|
||||
stmt.setString(1, domain.topDomain);
|
||||
stmt.setInt(2, cnt);
|
||||
|
||||
@@ -118,7 +118,7 @@ public class DbDomainQueries {
|
||||
if (sibling.equals(domain))
|
||||
continue;
|
||||
|
||||
ret.add(sibling);
|
||||
ret.add(new DomainWithNode(sibling, rs.getInt(2)));
|
||||
}
|
||||
} catch (SQLException e) {
|
||||
logger.error("Failed to get domain neighbors");
|
||||
@@ -126,4 +126,10 @@ public class DbDomainQueries {
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
public record DomainWithNode (EdgeDomain domain, int nodeAffinity) {
|
||||
public boolean isIndexed() {
|
||||
return nodeAffinity > 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -67,8 +67,7 @@ public class ResultRankingParameters {
|
||||
this.exportDebugData = exportDebugData;
|
||||
}
|
||||
|
||||
public static ResultRankingParameters sensibleDefaults() {
|
||||
return builder()
|
||||
private static final ResultRankingParameters _sensibleDefaults = builder()
|
||||
.bm25Params(new Bm25Parameters(1.2, 0.5))
|
||||
.shortDocumentThreshold(2000)
|
||||
.shortDocumentPenalty(2.)
|
||||
@@ -85,6 +84,9 @@ public class ResultRankingParameters {
|
||||
.exportDebugData(false)
|
||||
.disablePenalties(false)
|
||||
.build();
|
||||
|
||||
public static ResultRankingParameters sensibleDefaults() {
|
||||
return _sensibleDefaults;
|
||||
}
|
||||
|
||||
public static ResultRankingParametersBuilder builder() {
|
||||
|
@@ -1,15 +1,14 @@
|
||||
package nu.marginalia.search;
|
||||
|
||||
import com.google.inject.Inject;
|
||||
import io.jooby.Context;
|
||||
import io.jooby.Jooby;
|
||||
import io.prometheus.client.Counter;
|
||||
import io.prometheus.client.Histogram;
|
||||
import nu.marginalia.WebsiteUrl;
|
||||
import nu.marginalia.search.svc.*;
|
||||
import nu.marginalia.service.discovery.property.ServicePartition;
|
||||
import nu.marginalia.service.server.BaseServiceParams;
|
||||
import nu.marginalia.service.server.JoobyService;
|
||||
import nu.marginalia.service.server.StaticResources;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
@@ -34,8 +33,6 @@ public class SearchService extends JoobyService {
|
||||
|
||||
@Inject
|
||||
public SearchService(BaseServiceParams params,
|
||||
WebsiteUrl websiteUrl,
|
||||
StaticResources staticResources,
|
||||
SearchFrontPageService frontPageService,
|
||||
SearchAddToCrawlQueueService addToCrawlQueueService,
|
||||
SearchSiteSubscriptionService siteSubscriptionService,
|
||||
@@ -62,7 +59,25 @@ public class SearchService extends JoobyService {
|
||||
public void startJooby(Jooby jooby) {
|
||||
super.startJooby(jooby);
|
||||
|
||||
final String startTimeAttribute = "start-time";
|
||||
|
||||
jooby.get("/export-opml", siteSubscriptionService::exportOpml);
|
||||
jooby.before((Context ctx) -> {
|
||||
ctx.setAttribute(startTimeAttribute, System.nanoTime());
|
||||
});
|
||||
|
||||
jooby.after((Context ctx, Object result, Throwable failure) -> {
|
||||
if (failure != null) {
|
||||
wmsa_search_service_error_count.labels(ctx.getRoute().getPattern(), ctx.getMethod()).inc();
|
||||
}
|
||||
else {
|
||||
Long startTime = ctx.getAttribute(startTimeAttribute);
|
||||
if (startTime != null) {
|
||||
wmsa_search_service_request_time.labels(ctx.getRoute().getPattern(), ctx.getMethod())
|
||||
.observe((System.nanoTime() - startTime) / 1e9);
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
|
@@ -47,18 +47,23 @@ public class SearchAddToCrawlQueueService {
|
||||
return new MapModelAndView("redirect.jte", Map.of("url", "/site/"+domainName));
|
||||
}
|
||||
|
||||
private void addToCrawlQueue(int id) throws SQLException {
|
||||
/** Mark a domain for crawling by setting node affinity to zero,
|
||||
* unless it is already marked for crawling, then node affinity should
|
||||
* be left unchanged.
|
||||
* */
|
||||
void addToCrawlQueue(int domainId) throws SQLException {
|
||||
try (var conn = dataSource.getConnection();
|
||||
var stmt = conn.prepareStatement("""
|
||||
INSERT IGNORE INTO CRAWL_QUEUE(DOMAIN_NAME, SOURCE)
|
||||
SELECT DOMAIN_NAME, "user" FROM EC_DOMAIN WHERE ID=?
|
||||
UPDATE EC_DOMAIN
|
||||
SET WMSA_prod.EC_DOMAIN.NODE_AFFINITY = 0
|
||||
WHERE ID=? AND WMSA_prod.EC_DOMAIN.NODE_AFFINITY < 0
|
||||
""")) {
|
||||
stmt.setInt(1, id);
|
||||
stmt.setInt(1, domainId);
|
||||
stmt.executeUpdate();
|
||||
}
|
||||
}
|
||||
|
||||
private String getDomainName(int id) {
|
||||
String getDomainName(int id) {
|
||||
var domain = domainQueries.getDomain(id);
|
||||
if (domain.isEmpty())
|
||||
throw new IllegalArgumentException();
|
||||
|
@@ -352,7 +352,7 @@ public class SearchSiteInfoService {
|
||||
|
||||
public record SiteInfoWithContext(String domain,
|
||||
boolean isSubscribed,
|
||||
List<EdgeDomain> siblingDomains,
|
||||
List<DbDomainQueries.DomainWithNode> siblingDomains,
|
||||
int domainId,
|
||||
String siteUrl,
|
||||
boolean hasScreenshot,
|
||||
|
@@ -2,13 +2,24 @@
|
||||
|
||||
This service handles search traffic and is the service
|
||||
you're most directly interacting with when visiting
|
||||
[search.marginalia.nu](https://search.marginalia.nu).
|
||||
[marginalia-search.com](https://marginalia-search.com).
|
||||
|
||||
It interprets a "human" query and translates it into a
|
||||
request that gets passed into to the index service, which finds
|
||||
related documents, which this service then ranks and returns
|
||||
to the user.
|
||||
|
||||
The UI is built using [JTE templates](https://jte.gg/syntax/) and the [Jooby framework](https://jooby.io), primarily using
|
||||
its MVC facilities.
|
||||
|
||||
When developing, it's possible to set up a mock version of the UI by running
|
||||
the gradle command
|
||||
|
||||
```$ ./gradlew paperDoll -i```
|
||||
|
||||
The UI will be available at http://localhost:9999/, and has hot reloading of JTE classes
|
||||
and static resources.
|
||||
|
||||
|
||||

|
||||
|
||||
|
@@ -1,3 +1,4 @@
|
||||
@import nu.marginalia.db.DbDomainQueries
|
||||
@import nu.marginalia.model.EdgeDomain
|
||||
@import nu.marginalia.search.svc.SearchSiteInfoService
|
||||
@import nu.marginalia.search.svc.SearchSiteInfoService.*
|
||||
@@ -94,10 +95,14 @@
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody class="bg-white dark:bg-gray-800 divide-y divide-gray-200 dark:divide-gray-600 text-xs">
|
||||
@for (EdgeDomain sibling : siteInfo.siblingDomains())
|
||||
@for (DbDomainQueries.DomainWithNode sibling : siteInfo.siblingDomains())
|
||||
<tr>
|
||||
<td class="px-3 py-6 md:py-3 whitespace-nowrap">
|
||||
<a class="text-liteblue dark:text-blue-200" href="/site/${sibling.toString()}">${sibling.toString()}</a>
|
||||
<a class="text-liteblue dark:text-blue-200" href="/site/${sibling.domain().toString()}">${sibling.domain().toString()}</a>
|
||||
|
||||
@if (!sibling.isIndexed())
|
||||
<i class="ml-1 fa-regular fa-question-circle text-gray-400 dark:text-gray-600 text-xs" title="Not indexed"></i>
|
||||
@endif
|
||||
</td>
|
||||
</tr>
|
||||
@endfor
|
||||
|
@@ -6,6 +6,7 @@ import nu.marginalia.api.domains.model.SimilarDomain;
|
||||
import nu.marginalia.api.searchquery.model.results.SearchResultItem;
|
||||
import nu.marginalia.browse.model.BrowseResult;
|
||||
import nu.marginalia.browse.model.BrowseResultSet;
|
||||
import nu.marginalia.db.DbDomainQueries;
|
||||
import nu.marginalia.model.EdgeDomain;
|
||||
import nu.marginalia.model.EdgeUrl;
|
||||
import nu.marginalia.model.crawl.DomainIndexingState;
|
||||
@@ -132,8 +133,9 @@ public class MockedSearchResults {
|
||||
return new SearchSiteInfoService.SiteInfoWithContext(
|
||||
"www.example.com",
|
||||
false,
|
||||
List.of(new EdgeDomain("example.com"),
|
||||
new EdgeDomain("about.example.com")
|
||||
List.of(
|
||||
new DbDomainQueries.DomainWithNode(new EdgeDomain("example.com"), 1),
|
||||
new DbDomainQueries.DomainWithNode(new EdgeDomain("example.com"), 0)
|
||||
),
|
||||
14,
|
||||
"https://www.example.com",
|
||||
|
@@ -0,0 +1,85 @@
|
||||
package nu.marginalia.search.svc;
|
||||
|
||||
import com.zaxxer.hikari.HikariConfig;
|
||||
import com.zaxxer.hikari.HikariDataSource;
|
||||
import nu.marginalia.db.DbDomainQueries;
|
||||
import nu.marginalia.model.EdgeDomain;
|
||||
import nu.marginalia.test.TestMigrationLoader;
|
||||
import org.junit.jupiter.api.*;
|
||||
import org.testcontainers.containers.MariaDBContainer;
|
||||
import org.testcontainers.junit.jupiter.Container;
|
||||
import org.testcontainers.junit.jupiter.Testcontainers;
|
||||
|
||||
import java.sql.SQLException;
|
||||
|
||||
@Tag("slow")
|
||||
@Testcontainers
|
||||
class SearchAddToCrawlQueueServiceTest {
|
||||
@Container
|
||||
static MariaDBContainer<?> mariaDBContainer = new MariaDBContainer<>("mariadb")
|
||||
.withDatabaseName("WMSA_prod")
|
||||
.withUsername("wmsa")
|
||||
.withPassword("wmsa")
|
||||
.withNetworkAliases("mariadb");
|
||||
|
||||
static HikariDataSource dataSource;
|
||||
|
||||
private DbDomainQueries domainQueries;
|
||||
private SearchAddToCrawlQueueService addToCrawlQueueService;
|
||||
|
||||
@BeforeEach
|
||||
public void setUp() throws SQLException {
|
||||
try (var conn = dataSource.getConnection();
|
||||
var stmt = conn.createStatement()) {
|
||||
stmt.executeQuery("DELETE FROM EC_DOMAIN"); // Wipe any old state from other test runs
|
||||
|
||||
stmt.executeQuery("INSERT INTO EC_DOMAIN (DOMAIN_NAME, DOMAIN_TOP, NODE_AFFINITY) VALUES ('known.example.com', 'example.com', -1)");
|
||||
stmt.executeQuery("INSERT INTO EC_DOMAIN (DOMAIN_NAME, DOMAIN_TOP, NODE_AFFINITY) VALUES ('added.example.com', 'example.com', 0)");
|
||||
stmt.executeQuery("INSERT INTO EC_DOMAIN (DOMAIN_NAME, DOMAIN_TOP, NODE_AFFINITY) VALUES ('indexed.example.com', 'example.com', 1)");
|
||||
}
|
||||
|
||||
domainQueries = new DbDomainQueries(dataSource);
|
||||
addToCrawlQueueService = new SearchAddToCrawlQueueService(domainQueries, dataSource);
|
||||
}
|
||||
|
||||
@BeforeAll
|
||||
public static void setUpAll() {
|
||||
HikariConfig config = new HikariConfig();
|
||||
config.setJdbcUrl(mariaDBContainer.getJdbcUrl());
|
||||
config.setUsername("wmsa");
|
||||
config.setPassword("wmsa");
|
||||
|
||||
dataSource = new HikariDataSource(config);
|
||||
TestMigrationLoader.flywayMigration(dataSource);
|
||||
}
|
||||
|
||||
private int getNodeAffinity(String domainName) throws SQLException {
|
||||
try (var conn = dataSource.getConnection();
|
||||
var stmt = conn.prepareStatement("SELECT NODE_AFFINITY FROM EC_DOMAIN WHERE DOMAIN_NAME=?"))
|
||||
{
|
||||
stmt.setString(1, domainName);
|
||||
var rsp = stmt.executeQuery();
|
||||
if (rsp.next()) {
|
||||
return rsp.getInt(1);
|
||||
}
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
@Test
|
||||
void addToCrawlQueue() throws SQLException {
|
||||
int knownId = domainQueries.getDomainId(new EdgeDomain("known.example.com"));
|
||||
int addedId = domainQueries.getDomainId(new EdgeDomain("added.example.com"));
|
||||
int indexedId = domainQueries.getDomainId(new EdgeDomain("indexed.example.com"));
|
||||
|
||||
addToCrawlQueueService.addToCrawlQueue(knownId);
|
||||
addToCrawlQueueService.addToCrawlQueue(addedId);
|
||||
addToCrawlQueueService.addToCrawlQueue(indexedId);
|
||||
|
||||
Assertions.assertEquals(0, getNodeAffinity("known.example.com"));
|
||||
Assertions.assertEquals(0, getNodeAffinity("added.example.com"));
|
||||
Assertions.assertEquals(1, getNodeAffinity("indexed.example.com"));
|
||||
}
|
||||
|
||||
}
|
Reference in New Issue
Block a user