mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-10-06 17:32:39 +02:00
Compare commits
7 Commits
deploy-004
...
deploy-004
Author | SHA1 | Date | |
---|---|---|---|
|
b7f0a2a98e | ||
|
5fb76b2e79 | ||
|
ad8c97f342 | ||
|
dc1b6373eb | ||
|
983d6d067c | ||
|
a84a06975c | ||
|
d2864c13ec |
@@ -103,11 +103,11 @@ public class DbDomainQueries {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<EdgeDomain> otherSubdomains(EdgeDomain domain, int cnt) {
|
public List<DomainWithNode> otherSubdomains(EdgeDomain domain, int cnt) {
|
||||||
List<EdgeDomain> ret = new ArrayList<>();
|
List<DomainWithNode> ret = new ArrayList<>();
|
||||||
|
|
||||||
try (var conn = dataSource.getConnection();
|
try (var conn = dataSource.getConnection();
|
||||||
var stmt = conn.prepareStatement("SELECT DOMAIN_NAME FROM EC_DOMAIN WHERE DOMAIN_TOP = ? LIMIT ?")) {
|
var stmt = conn.prepareStatement("SELECT DOMAIN_NAME, NODE_AFFINITY FROM EC_DOMAIN WHERE DOMAIN_TOP = ? LIMIT ?")) {
|
||||||
stmt.setString(1, domain.topDomain);
|
stmt.setString(1, domain.topDomain);
|
||||||
stmt.setInt(2, cnt);
|
stmt.setInt(2, cnt);
|
||||||
|
|
||||||
@@ -118,7 +118,7 @@ public class DbDomainQueries {
|
|||||||
if (sibling.equals(domain))
|
if (sibling.equals(domain))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
ret.add(sibling);
|
ret.add(new DomainWithNode(sibling, rs.getInt(2)));
|
||||||
}
|
}
|
||||||
} catch (SQLException e) {
|
} catch (SQLException e) {
|
||||||
logger.error("Failed to get domain neighbors");
|
logger.error("Failed to get domain neighbors");
|
||||||
@@ -126,4 +126,10 @@ public class DbDomainQueries {
|
|||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public record DomainWithNode (EdgeDomain domain, int nodeAffinity) {
|
||||||
|
public boolean isIndexed() {
|
||||||
|
return nodeAffinity > 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@@ -83,6 +83,11 @@ public class QueryParams {
|
|||||||
if (path.endsWith("StoryView.py")) { // folklore.org is neat
|
if (path.endsWith("StoryView.py")) { // folklore.org is neat
|
||||||
return param.startsWith("project=") || param.startsWith("story=");
|
return param.startsWith("project=") || param.startsWith("story=");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// www.perseus.tufts.edu:
|
||||||
|
if (param.startsWith("collection=")) return true;
|
||||||
|
if (param.startsWith("doc=")) return true;
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -121,6 +121,7 @@ public class IndexProtobufCodec {
|
|||||||
params.getTcfProximityWeight(),
|
params.getTcfProximityWeight(),
|
||||||
ResultRankingParameters.TemporalBias.valueOf(params.getTemporalBias().getBias().name()),
|
ResultRankingParameters.TemporalBias.valueOf(params.getTemporalBias().getBias().name()),
|
||||||
params.getTemporalBiasWeight(),
|
params.getTemporalBiasWeight(),
|
||||||
|
params.getDisablePenalties(),
|
||||||
params.getExportDebugData()
|
params.getExportDebugData()
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
@@ -146,6 +147,7 @@ public class IndexProtobufCodec {
|
|||||||
.setTcfProximityWeight(rankingParams.tcfProximity)
|
.setTcfProximityWeight(rankingParams.tcfProximity)
|
||||||
.setTcfVerbatimWeight(rankingParams.tcfVerbatim)
|
.setTcfVerbatimWeight(rankingParams.tcfVerbatim)
|
||||||
.setTemporalBiasWeight(rankingParams.temporalBiasWeight)
|
.setTemporalBiasWeight(rankingParams.temporalBiasWeight)
|
||||||
|
.setDisablePenalties(rankingParams.disablePenalties)
|
||||||
.setExportDebugData(rankingParams.exportDebugData);
|
.setExportDebugData(rankingParams.exportDebugData);
|
||||||
|
|
||||||
if (temporalBias != null && temporalBias.getBias() != RpcTemporalBias.Bias.NONE) {
|
if (temporalBias != null && temporalBias.getBias() != RpcTemporalBias.Bias.NONE) {
|
||||||
|
@@ -42,12 +42,14 @@ public class ResultRankingParameters {
|
|||||||
public double tcfVerbatim;
|
public double tcfVerbatim;
|
||||||
public double tcfProximity;
|
public double tcfProximity;
|
||||||
|
|
||||||
|
|
||||||
public TemporalBias temporalBias;
|
public TemporalBias temporalBias;
|
||||||
public double temporalBiasWeight;
|
public double temporalBiasWeight;
|
||||||
|
|
||||||
|
public boolean disablePenalties;
|
||||||
public boolean exportDebugData;
|
public boolean exportDebugData;
|
||||||
|
|
||||||
public ResultRankingParameters(Bm25Parameters bm25Params, int shortDocumentThreshold, double shortDocumentPenalty, double domainRankBonus, double qualityPenalty, int shortSentenceThreshold, double shortSentencePenalty, double bm25Weight, double tcfFirstPosition, double tcfVerbatim, double tcfProximity, TemporalBias temporalBias, double temporalBiasWeight, boolean exportDebugData) {
|
public ResultRankingParameters(Bm25Parameters bm25Params, int shortDocumentThreshold, double shortDocumentPenalty, double domainRankBonus, double qualityPenalty, int shortSentenceThreshold, double shortSentencePenalty, double bm25Weight, double tcfFirstPosition, double tcfVerbatim, double tcfProximity, TemporalBias temporalBias, double temporalBiasWeight, boolean disablePenalties, boolean exportDebugData) {
|
||||||
this.bm25Params = bm25Params;
|
this.bm25Params = bm25Params;
|
||||||
this.shortDocumentThreshold = shortDocumentThreshold;
|
this.shortDocumentThreshold = shortDocumentThreshold;
|
||||||
this.shortDocumentPenalty = shortDocumentPenalty;
|
this.shortDocumentPenalty = shortDocumentPenalty;
|
||||||
@@ -61,11 +63,11 @@ public class ResultRankingParameters {
|
|||||||
this.tcfProximity = tcfProximity;
|
this.tcfProximity = tcfProximity;
|
||||||
this.temporalBias = temporalBias;
|
this.temporalBias = temporalBias;
|
||||||
this.temporalBiasWeight = temporalBiasWeight;
|
this.temporalBiasWeight = temporalBiasWeight;
|
||||||
|
this.disablePenalties = disablePenalties;
|
||||||
this.exportDebugData = exportDebugData;
|
this.exportDebugData = exportDebugData;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static ResultRankingParameters sensibleDefaults() {
|
private static final ResultRankingParameters _sensibleDefaults = builder()
|
||||||
return builder()
|
|
||||||
.bm25Params(new Bm25Parameters(1.2, 0.5))
|
.bm25Params(new Bm25Parameters(1.2, 0.5))
|
||||||
.shortDocumentThreshold(2000)
|
.shortDocumentThreshold(2000)
|
||||||
.shortDocumentPenalty(2.)
|
.shortDocumentPenalty(2.)
|
||||||
@@ -80,7 +82,11 @@ public class ResultRankingParameters {
|
|||||||
.temporalBias(TemporalBias.NONE)
|
.temporalBias(TemporalBias.NONE)
|
||||||
.temporalBiasWeight(5.0)
|
.temporalBiasWeight(5.0)
|
||||||
.exportDebugData(false)
|
.exportDebugData(false)
|
||||||
|
.disablePenalties(false)
|
||||||
.build();
|
.build();
|
||||||
|
|
||||||
|
public static ResultRankingParameters sensibleDefaults() {
|
||||||
|
return _sensibleDefaults;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static ResultRankingParametersBuilder builder() {
|
public static ResultRankingParametersBuilder builder() {
|
||||||
@@ -139,6 +145,8 @@ public class ResultRankingParameters {
|
|||||||
return this.temporalBiasWeight;
|
return this.temporalBiasWeight;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public boolean isDisablePenalties() { return this.disablePenalties; }
|
||||||
|
|
||||||
public boolean isExportDebugData() {
|
public boolean isExportDebugData() {
|
||||||
return this.exportDebugData;
|
return this.exportDebugData;
|
||||||
}
|
}
|
||||||
@@ -166,6 +174,7 @@ public class ResultRankingParameters {
|
|||||||
result = 31 * result + Double.hashCode(tcfProximity);
|
result = 31 * result + Double.hashCode(tcfProximity);
|
||||||
result = 31 * result + Objects.hashCode(temporalBias);
|
result = 31 * result + Objects.hashCode(temporalBias);
|
||||||
result = 31 * result + Double.hashCode(temporalBiasWeight);
|
result = 31 * result + Double.hashCode(temporalBiasWeight);
|
||||||
|
result = 31 * result + Boolean.hashCode(disablePenalties);
|
||||||
result = 31 * result + Boolean.hashCode(exportDebugData);
|
result = 31 * result + Boolean.hashCode(exportDebugData);
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
@@ -192,6 +201,7 @@ public class ResultRankingParameters {
|
|||||||
private double tcfProximity;
|
private double tcfProximity;
|
||||||
private TemporalBias temporalBias;
|
private TemporalBias temporalBias;
|
||||||
private double temporalBiasWeight;
|
private double temporalBiasWeight;
|
||||||
|
private boolean disablePenalties;
|
||||||
private boolean exportDebugData;
|
private boolean exportDebugData;
|
||||||
|
|
||||||
ResultRankingParametersBuilder() {
|
ResultRankingParametersBuilder() {
|
||||||
@@ -262,17 +272,20 @@ public class ResultRankingParameters {
|
|||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public ResultRankingParametersBuilder disablePenalties(boolean disablePenalties) {
|
||||||
|
this.disablePenalties = disablePenalties;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
public ResultRankingParametersBuilder exportDebugData(boolean exportDebugData) {
|
public ResultRankingParametersBuilder exportDebugData(boolean exportDebugData) {
|
||||||
this.exportDebugData = exportDebugData;
|
this.exportDebugData = exportDebugData;
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
public ResultRankingParameters build() {
|
public ResultRankingParameters build() {
|
||||||
return new ResultRankingParameters(this.bm25Params, this.shortDocumentThreshold, this.shortDocumentPenalty, this.domainRankBonus, this.qualityPenalty, this.shortSentenceThreshold, this.shortSentencePenalty, this.bm25Weight, this.tcfFirstPosition, this.tcfVerbatim, this.tcfProximity, this.temporalBias, this.temporalBiasWeight, this.exportDebugData);
|
return new ResultRankingParameters(this.bm25Params, this.shortDocumentThreshold, this.shortDocumentPenalty, this.domainRankBonus, this.qualityPenalty, this.shortSentenceThreshold, this.shortSentencePenalty, this.bm25Weight, this.tcfFirstPosition, this.tcfVerbatim, this.tcfProximity, this.temporalBias, this.temporalBiasWeight, this.disablePenalties, this.exportDebugData);
|
||||||
}
|
}
|
||||||
|
|
||||||
public String toString() {
|
|
||||||
return "ResultRankingParameters.ResultRankingParametersBuilder(bm25Params=" + this.bm25Params + ", shortDocumentThreshold=" + this.shortDocumentThreshold + ", shortDocumentPenalty=" + this.shortDocumentPenalty + ", domainRankBonus=" + this.domainRankBonus + ", qualityPenalty=" + this.qualityPenalty + ", shortSentenceThreshold=" + this.shortSentenceThreshold + ", shortSentencePenalty=" + this.shortSentencePenalty + ", bm25Weight=" + this.bm25Weight + ", tcfFirstPosition=" + this.tcfFirstPosition + ", tcfVerbatim=" + this.tcfVerbatim + ", tcfProximity=" + this.tcfProximity + ", temporalBias=" + this.temporalBias + ", temporalBiasWeight=" + this.temporalBiasWeight + ", exportDebugData=" + this.exportDebugData + ")";
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -162,6 +162,7 @@ message RpcResultRankingParameters {
|
|||||||
double temporalBiasWeight = 17;
|
double temporalBiasWeight = 17;
|
||||||
|
|
||||||
bool exportDebugData = 18;
|
bool exportDebugData = 18;
|
||||||
|
bool disablePenalties = 19;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -248,6 +248,10 @@ public class IndexResultScoreCalculator {
|
|||||||
ResultRankingParameters rankingParams,
|
ResultRankingParameters rankingParams,
|
||||||
@Nullable DebugRankingFactors debugRankingFactors) {
|
@Nullable DebugRankingFactors debugRankingFactors) {
|
||||||
|
|
||||||
|
if (rankingParams.disablePenalties) {
|
||||||
|
return 0.;
|
||||||
|
}
|
||||||
|
|
||||||
int rank = DocumentMetadata.decodeRank(documentMetadata);
|
int rank = DocumentMetadata.decodeRank(documentMetadata);
|
||||||
int asl = DocumentMetadata.decodeAvgSentenceLength(documentMetadata);
|
int asl = DocumentMetadata.decodeAvgSentenceLength(documentMetadata);
|
||||||
int quality = DocumentMetadata.decodeQuality(documentMetadata);
|
int quality = DocumentMetadata.decodeQuality(documentMetadata);
|
||||||
|
@@ -1,15 +1,14 @@
|
|||||||
package nu.marginalia.search;
|
package nu.marginalia.search;
|
||||||
|
|
||||||
import com.google.inject.Inject;
|
import com.google.inject.Inject;
|
||||||
|
import io.jooby.Context;
|
||||||
import io.jooby.Jooby;
|
import io.jooby.Jooby;
|
||||||
import io.prometheus.client.Counter;
|
import io.prometheus.client.Counter;
|
||||||
import io.prometheus.client.Histogram;
|
import io.prometheus.client.Histogram;
|
||||||
import nu.marginalia.WebsiteUrl;
|
|
||||||
import nu.marginalia.search.svc.*;
|
import nu.marginalia.search.svc.*;
|
||||||
import nu.marginalia.service.discovery.property.ServicePartition;
|
import nu.marginalia.service.discovery.property.ServicePartition;
|
||||||
import nu.marginalia.service.server.BaseServiceParams;
|
import nu.marginalia.service.server.BaseServiceParams;
|
||||||
import nu.marginalia.service.server.JoobyService;
|
import nu.marginalia.service.server.JoobyService;
|
||||||
import nu.marginalia.service.server.StaticResources;
|
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
@@ -34,8 +33,6 @@ public class SearchService extends JoobyService {
|
|||||||
|
|
||||||
@Inject
|
@Inject
|
||||||
public SearchService(BaseServiceParams params,
|
public SearchService(BaseServiceParams params,
|
||||||
WebsiteUrl websiteUrl,
|
|
||||||
StaticResources staticResources,
|
|
||||||
SearchFrontPageService frontPageService,
|
SearchFrontPageService frontPageService,
|
||||||
SearchAddToCrawlQueueService addToCrawlQueueService,
|
SearchAddToCrawlQueueService addToCrawlQueueService,
|
||||||
SearchSiteSubscriptionService siteSubscriptionService,
|
SearchSiteSubscriptionService siteSubscriptionService,
|
||||||
@@ -62,7 +59,25 @@ public class SearchService extends JoobyService {
|
|||||||
public void startJooby(Jooby jooby) {
|
public void startJooby(Jooby jooby) {
|
||||||
super.startJooby(jooby);
|
super.startJooby(jooby);
|
||||||
|
|
||||||
|
final String startTimeAttribute = "start-time";
|
||||||
|
|
||||||
jooby.get("/export-opml", siteSubscriptionService::exportOpml);
|
jooby.get("/export-opml", siteSubscriptionService::exportOpml);
|
||||||
|
jooby.before((Context ctx) -> {
|
||||||
|
ctx.setAttribute(startTimeAttribute, System.nanoTime());
|
||||||
|
});
|
||||||
|
|
||||||
|
jooby.after((Context ctx, Object result, Throwable failure) -> {
|
||||||
|
if (failure != null) {
|
||||||
|
wmsa_search_service_error_count.labels(ctx.getRoute().getPattern(), ctx.getMethod()).inc();
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
Long startTime = ctx.getAttribute(startTimeAttribute);
|
||||||
|
if (startTime != null) {
|
||||||
|
wmsa_search_service_request_time.labels(ctx.getRoute().getPattern(), ctx.getMethod())
|
||||||
|
.observe((System.nanoTime() - startTime) / 1e9);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@@ -47,18 +47,23 @@ public class SearchAddToCrawlQueueService {
|
|||||||
return new MapModelAndView("redirect.jte", Map.of("url", "/site/"+domainName));
|
return new MapModelAndView("redirect.jte", Map.of("url", "/site/"+domainName));
|
||||||
}
|
}
|
||||||
|
|
||||||
private void addToCrawlQueue(int id) throws SQLException {
|
/** Mark a domain for crawling by setting node affinity to zero,
|
||||||
|
* unless it is already marked for crawling, then node affinity should
|
||||||
|
* be left unchanged.
|
||||||
|
* */
|
||||||
|
void addToCrawlQueue(int domainId) throws SQLException {
|
||||||
try (var conn = dataSource.getConnection();
|
try (var conn = dataSource.getConnection();
|
||||||
var stmt = conn.prepareStatement("""
|
var stmt = conn.prepareStatement("""
|
||||||
INSERT IGNORE INTO CRAWL_QUEUE(DOMAIN_NAME, SOURCE)
|
UPDATE EC_DOMAIN
|
||||||
SELECT DOMAIN_NAME, "user" FROM EC_DOMAIN WHERE ID=?
|
SET WMSA_prod.EC_DOMAIN.NODE_AFFINITY = 0
|
||||||
|
WHERE ID=? AND WMSA_prod.EC_DOMAIN.NODE_AFFINITY < 0
|
||||||
""")) {
|
""")) {
|
||||||
stmt.setInt(1, id);
|
stmt.setInt(1, domainId);
|
||||||
stmt.executeUpdate();
|
stmt.executeUpdate();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private String getDomainName(int id) {
|
String getDomainName(int id) {
|
||||||
var domain = domainQueries.getDomain(id);
|
var domain = domainQueries.getDomain(id);
|
||||||
if (domain.isEmpty())
|
if (domain.isEmpty())
|
||||||
throw new IllegalArgumentException();
|
throw new IllegalArgumentException();
|
||||||
|
@@ -352,7 +352,7 @@ public class SearchSiteInfoService {
|
|||||||
|
|
||||||
public record SiteInfoWithContext(String domain,
|
public record SiteInfoWithContext(String domain,
|
||||||
boolean isSubscribed,
|
boolean isSubscribed,
|
||||||
List<EdgeDomain> siblingDomains,
|
List<DbDomainQueries.DomainWithNode> siblingDomains,
|
||||||
int domainId,
|
int domainId,
|
||||||
String siteUrl,
|
String siteUrl,
|
||||||
boolean hasScreenshot,
|
boolean hasScreenshot,
|
||||||
|
@@ -2,13 +2,24 @@
|
|||||||
|
|
||||||
This service handles search traffic and is the service
|
This service handles search traffic and is the service
|
||||||
you're most directly interacting with when visiting
|
you're most directly interacting with when visiting
|
||||||
[search.marginalia.nu](https://search.marginalia.nu).
|
[marginalia-search.com](https://marginalia-search.com).
|
||||||
|
|
||||||
It interprets a "human" query and translates it into a
|
It interprets a "human" query and translates it into a
|
||||||
request that gets passed into to the index service, which finds
|
request that gets passed into to the index service, which finds
|
||||||
related documents, which this service then ranks and returns
|
related documents, which this service then ranks and returns
|
||||||
to the user.
|
to the user.
|
||||||
|
|
||||||
|
The UI is built using [JTE templates](https://jte.gg/syntax/) and the [Jooby framework](https://jooby.io), primarily using
|
||||||
|
its MVC facilities.
|
||||||
|
|
||||||
|
When developing, it's possible to set up a mock version of the UI by running
|
||||||
|
the gradle command
|
||||||
|
|
||||||
|
```$ ./gradlew paperDoll -i```
|
||||||
|
|
||||||
|
The UI will be available at http://localhost:9999/, and has hot reloading of JTE classes
|
||||||
|
and static resources.
|
||||||
|
|
||||||
|
|
||||||

|

|
||||||
|
|
||||||
|
@@ -1,3 +1,4 @@
|
|||||||
|
@import nu.marginalia.db.DbDomainQueries
|
||||||
@import nu.marginalia.model.EdgeDomain
|
@import nu.marginalia.model.EdgeDomain
|
||||||
@import nu.marginalia.search.svc.SearchSiteInfoService
|
@import nu.marginalia.search.svc.SearchSiteInfoService
|
||||||
@import nu.marginalia.search.svc.SearchSiteInfoService.*
|
@import nu.marginalia.search.svc.SearchSiteInfoService.*
|
||||||
@@ -94,10 +95,14 @@
|
|||||||
</tr>
|
</tr>
|
||||||
</thead>
|
</thead>
|
||||||
<tbody class="bg-white dark:bg-gray-800 divide-y divide-gray-200 dark:divide-gray-600 text-xs">
|
<tbody class="bg-white dark:bg-gray-800 divide-y divide-gray-200 dark:divide-gray-600 text-xs">
|
||||||
@for (EdgeDomain sibling : siteInfo.siblingDomains())
|
@for (DbDomainQueries.DomainWithNode sibling : siteInfo.siblingDomains())
|
||||||
<tr>
|
<tr>
|
||||||
<td class="px-3 py-6 md:py-3 whitespace-nowrap">
|
<td class="px-3 py-6 md:py-3 whitespace-nowrap">
|
||||||
<a class="text-liteblue dark:text-blue-200" href="/site/${sibling.toString()}">${sibling.toString()}</a>
|
<a class="text-liteblue dark:text-blue-200" href="/site/${sibling.domain().toString()}">${sibling.domain().toString()}</a>
|
||||||
|
|
||||||
|
@if (!sibling.isIndexed())
|
||||||
|
<i class="ml-1 fa-regular fa-question-circle text-gray-400 dark:text-gray-600 text-xs" title="Not indexed"></i>
|
||||||
|
@endif
|
||||||
</td>
|
</td>
|
||||||
</tr>
|
</tr>
|
||||||
@endfor
|
@endfor
|
||||||
|
@@ -6,6 +6,7 @@ import nu.marginalia.api.domains.model.SimilarDomain;
|
|||||||
import nu.marginalia.api.searchquery.model.results.SearchResultItem;
|
import nu.marginalia.api.searchquery.model.results.SearchResultItem;
|
||||||
import nu.marginalia.browse.model.BrowseResult;
|
import nu.marginalia.browse.model.BrowseResult;
|
||||||
import nu.marginalia.browse.model.BrowseResultSet;
|
import nu.marginalia.browse.model.BrowseResultSet;
|
||||||
|
import nu.marginalia.db.DbDomainQueries;
|
||||||
import nu.marginalia.model.EdgeDomain;
|
import nu.marginalia.model.EdgeDomain;
|
||||||
import nu.marginalia.model.EdgeUrl;
|
import nu.marginalia.model.EdgeUrl;
|
||||||
import nu.marginalia.model.crawl.DomainIndexingState;
|
import nu.marginalia.model.crawl.DomainIndexingState;
|
||||||
@@ -132,8 +133,9 @@ public class MockedSearchResults {
|
|||||||
return new SearchSiteInfoService.SiteInfoWithContext(
|
return new SearchSiteInfoService.SiteInfoWithContext(
|
||||||
"www.example.com",
|
"www.example.com",
|
||||||
false,
|
false,
|
||||||
List.of(new EdgeDomain("example.com"),
|
List.of(
|
||||||
new EdgeDomain("about.example.com")
|
new DbDomainQueries.DomainWithNode(new EdgeDomain("example.com"), 1),
|
||||||
|
new DbDomainQueries.DomainWithNode(new EdgeDomain("example.com"), 0)
|
||||||
),
|
),
|
||||||
14,
|
14,
|
||||||
"https://www.example.com",
|
"https://www.example.com",
|
||||||
|
@@ -0,0 +1,85 @@
|
|||||||
|
package nu.marginalia.search.svc;
|
||||||
|
|
||||||
|
import com.zaxxer.hikari.HikariConfig;
|
||||||
|
import com.zaxxer.hikari.HikariDataSource;
|
||||||
|
import nu.marginalia.db.DbDomainQueries;
|
||||||
|
import nu.marginalia.model.EdgeDomain;
|
||||||
|
import nu.marginalia.test.TestMigrationLoader;
|
||||||
|
import org.junit.jupiter.api.*;
|
||||||
|
import org.testcontainers.containers.MariaDBContainer;
|
||||||
|
import org.testcontainers.junit.jupiter.Container;
|
||||||
|
import org.testcontainers.junit.jupiter.Testcontainers;
|
||||||
|
|
||||||
|
import java.sql.SQLException;
|
||||||
|
|
||||||
|
@Tag("slow")
|
||||||
|
@Testcontainers
|
||||||
|
class SearchAddToCrawlQueueServiceTest {
|
||||||
|
@Container
|
||||||
|
static MariaDBContainer<?> mariaDBContainer = new MariaDBContainer<>("mariadb")
|
||||||
|
.withDatabaseName("WMSA_prod")
|
||||||
|
.withUsername("wmsa")
|
||||||
|
.withPassword("wmsa")
|
||||||
|
.withNetworkAliases("mariadb");
|
||||||
|
|
||||||
|
static HikariDataSource dataSource;
|
||||||
|
|
||||||
|
private DbDomainQueries domainQueries;
|
||||||
|
private SearchAddToCrawlQueueService addToCrawlQueueService;
|
||||||
|
|
||||||
|
@BeforeEach
|
||||||
|
public void setUp() throws SQLException {
|
||||||
|
try (var conn = dataSource.getConnection();
|
||||||
|
var stmt = conn.createStatement()) {
|
||||||
|
stmt.executeQuery("DELETE FROM EC_DOMAIN"); // Wipe any old state from other test runs
|
||||||
|
|
||||||
|
stmt.executeQuery("INSERT INTO EC_DOMAIN (DOMAIN_NAME, DOMAIN_TOP, NODE_AFFINITY) VALUES ('known.example.com', 'example.com', -1)");
|
||||||
|
stmt.executeQuery("INSERT INTO EC_DOMAIN (DOMAIN_NAME, DOMAIN_TOP, NODE_AFFINITY) VALUES ('added.example.com', 'example.com', 0)");
|
||||||
|
stmt.executeQuery("INSERT INTO EC_DOMAIN (DOMAIN_NAME, DOMAIN_TOP, NODE_AFFINITY) VALUES ('indexed.example.com', 'example.com', 1)");
|
||||||
|
}
|
||||||
|
|
||||||
|
domainQueries = new DbDomainQueries(dataSource);
|
||||||
|
addToCrawlQueueService = new SearchAddToCrawlQueueService(domainQueries, dataSource);
|
||||||
|
}
|
||||||
|
|
||||||
|
@BeforeAll
|
||||||
|
public static void setUpAll() {
|
||||||
|
HikariConfig config = new HikariConfig();
|
||||||
|
config.setJdbcUrl(mariaDBContainer.getJdbcUrl());
|
||||||
|
config.setUsername("wmsa");
|
||||||
|
config.setPassword("wmsa");
|
||||||
|
|
||||||
|
dataSource = new HikariDataSource(config);
|
||||||
|
TestMigrationLoader.flywayMigration(dataSource);
|
||||||
|
}
|
||||||
|
|
||||||
|
private int getNodeAffinity(String domainName) throws SQLException {
|
||||||
|
try (var conn = dataSource.getConnection();
|
||||||
|
var stmt = conn.prepareStatement("SELECT NODE_AFFINITY FROM EC_DOMAIN WHERE DOMAIN_NAME=?"))
|
||||||
|
{
|
||||||
|
stmt.setString(1, domainName);
|
||||||
|
var rsp = stmt.executeQuery();
|
||||||
|
if (rsp.next()) {
|
||||||
|
return rsp.getInt(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void addToCrawlQueue() throws SQLException {
|
||||||
|
int knownId = domainQueries.getDomainId(new EdgeDomain("known.example.com"));
|
||||||
|
int addedId = domainQueries.getDomainId(new EdgeDomain("added.example.com"));
|
||||||
|
int indexedId = domainQueries.getDomainId(new EdgeDomain("indexed.example.com"));
|
||||||
|
|
||||||
|
addToCrawlQueueService.addToCrawlQueue(knownId);
|
||||||
|
addToCrawlQueueService.addToCrawlQueue(addedId);
|
||||||
|
addToCrawlQueueService.addToCrawlQueue(indexedId);
|
||||||
|
|
||||||
|
Assertions.assertEquals(0, getNodeAffinity("known.example.com"));
|
||||||
|
Assertions.assertEquals(0, getNodeAffinity("added.example.com"));
|
||||||
|
Assertions.assertEquals(1, getNodeAffinity("indexed.example.com"));
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@@ -146,6 +146,7 @@ public class QueryBasicInterface {
|
|||||||
.shortSentenceThreshold(intFromRequest(request, "shortSentenceThreshold", sensibleDefaults.shortSentenceThreshold))
|
.shortSentenceThreshold(intFromRequest(request, "shortSentenceThreshold", sensibleDefaults.shortSentenceThreshold))
|
||||||
.shortSentencePenalty(doubleFromRequest(request, "shortSentencePenalty", sensibleDefaults.shortSentencePenalty))
|
.shortSentencePenalty(doubleFromRequest(request, "shortSentencePenalty", sensibleDefaults.shortSentencePenalty))
|
||||||
.bm25Weight(doubleFromRequest(request, "bm25Weight", sensibleDefaults.bm25Weight))
|
.bm25Weight(doubleFromRequest(request, "bm25Weight", sensibleDefaults.bm25Weight))
|
||||||
|
.disablePenalties(boolFromRequest(request, "disablePenalties", sensibleDefaults.disablePenalties))
|
||||||
.exportDebugData(true)
|
.exportDebugData(true)
|
||||||
.build();
|
.build();
|
||||||
}
|
}
|
||||||
@@ -154,6 +155,13 @@ public class QueryBasicInterface {
|
|||||||
return Strings.isNullOrEmpty(request.queryParams(param)) ? defaultValue : Double.parseDouble(request.queryParams(param));
|
return Strings.isNullOrEmpty(request.queryParams(param)) ? defaultValue : Double.parseDouble(request.queryParams(param));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
boolean boolFromRequest(Request request, String param, boolean defaultValue) {
|
||||||
|
if (param == null)
|
||||||
|
return defaultValue;
|
||||||
|
|
||||||
|
return Strings.isNullOrEmpty(request.queryParams(param)) ? defaultValue : Boolean.parseBoolean(request.queryParams(param));
|
||||||
|
}
|
||||||
|
|
||||||
int intFromRequest(Request request, String param, int defaultValue) {
|
int intFromRequest(Request request, String param, int defaultValue) {
|
||||||
return Strings.isNullOrEmpty(request.queryParams(param)) ? defaultValue : parseInt(request.queryParams(param));
|
return Strings.isNullOrEmpty(request.queryParams(param)) ? defaultValue : parseInt(request.queryParams(param));
|
||||||
}
|
}
|
||||||
|
@@ -67,6 +67,14 @@
|
|||||||
<div class="row my-2">
|
<div class="row my-2">
|
||||||
<div class="col-sm-2"><label for="bm25FullWeight">BM25 Weight</label></div>
|
<div class="col-sm-2"><label for="bm25FullWeight">BM25 Weight</label></div>
|
||||||
<div class="col-sm-2"><input type="text" class="form-control" id="bm25Weight" name="bm25Weight" value="{{bm25Weight}}"></div>
|
<div class="col-sm-2"><input type="text" class="form-control" id="bm25Weight" name="bm25Weight" value="{{bm25Weight}}"></div>
|
||||||
|
|
||||||
|
<div class="col-sm-2"><label for="disablePenalties">Disable Penalties</label></div>
|
||||||
|
<div class="col-sm-2">
|
||||||
|
<select class="form-select" id="disablePenalties" name="disablePenalties">
|
||||||
|
<option value="FALSE" {{#unless disablePenalties}}selected{{/unless}}>FALSE</option>
|
||||||
|
<option value="TRUE" {{#if disablePenalties}}selected{{/if}}>TRUE</option>
|
||||||
|
</select>
|
||||||
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
{{/with}}
|
{{/with}}
|
||||||
|
Reference in New Issue
Block a user