mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-10-08 00:02:38 +02:00
Compare commits
12 Commits
deploy-003
...
deploy-004
Author | SHA1 | Date | |
---|---|---|---|
|
a84a06975c | ||
|
d2864c13ec | ||
|
03ba53ce51 | ||
|
d4a6684931 | ||
|
6f0485287a | ||
|
59e2dd4c26 | ||
|
ca1807caae | ||
|
26c20e18ac | ||
|
7c90b6b414 | ||
|
b63c54c4ce | ||
|
fecd2f4ec3 | ||
|
39e420de88 |
@@ -83,6 +83,11 @@ public class QueryParams {
|
|||||||
if (path.endsWith("StoryView.py")) { // folklore.org is neat
|
if (path.endsWith("StoryView.py")) { // folklore.org is neat
|
||||||
return param.startsWith("project=") || param.startsWith("story=");
|
return param.startsWith("project=") || param.startsWith("story=");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// www.perseus.tufts.edu:
|
||||||
|
if (param.startsWith("collection=")) return true;
|
||||||
|
if (param.startsWith("doc=")) return true;
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -121,6 +121,7 @@ public class IndexProtobufCodec {
|
|||||||
params.getTcfProximityWeight(),
|
params.getTcfProximityWeight(),
|
||||||
ResultRankingParameters.TemporalBias.valueOf(params.getTemporalBias().getBias().name()),
|
ResultRankingParameters.TemporalBias.valueOf(params.getTemporalBias().getBias().name()),
|
||||||
params.getTemporalBiasWeight(),
|
params.getTemporalBiasWeight(),
|
||||||
|
params.getDisablePenalties(),
|
||||||
params.getExportDebugData()
|
params.getExportDebugData()
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
@@ -146,6 +147,7 @@ public class IndexProtobufCodec {
|
|||||||
.setTcfProximityWeight(rankingParams.tcfProximity)
|
.setTcfProximityWeight(rankingParams.tcfProximity)
|
||||||
.setTcfVerbatimWeight(rankingParams.tcfVerbatim)
|
.setTcfVerbatimWeight(rankingParams.tcfVerbatim)
|
||||||
.setTemporalBiasWeight(rankingParams.temporalBiasWeight)
|
.setTemporalBiasWeight(rankingParams.temporalBiasWeight)
|
||||||
|
.setDisablePenalties(rankingParams.disablePenalties)
|
||||||
.setExportDebugData(rankingParams.exportDebugData);
|
.setExportDebugData(rankingParams.exportDebugData);
|
||||||
|
|
||||||
if (temporalBias != null && temporalBias.getBias() != RpcTemporalBias.Bias.NONE) {
|
if (temporalBias != null && temporalBias.getBias() != RpcTemporalBias.Bias.NONE) {
|
||||||
|
@@ -42,12 +42,14 @@ public class ResultRankingParameters {
|
|||||||
public double tcfVerbatim;
|
public double tcfVerbatim;
|
||||||
public double tcfProximity;
|
public double tcfProximity;
|
||||||
|
|
||||||
|
|
||||||
public TemporalBias temporalBias;
|
public TemporalBias temporalBias;
|
||||||
public double temporalBiasWeight;
|
public double temporalBiasWeight;
|
||||||
|
|
||||||
|
public boolean disablePenalties;
|
||||||
public boolean exportDebugData;
|
public boolean exportDebugData;
|
||||||
|
|
||||||
public ResultRankingParameters(Bm25Parameters bm25Params, int shortDocumentThreshold, double shortDocumentPenalty, double domainRankBonus, double qualityPenalty, int shortSentenceThreshold, double shortSentencePenalty, double bm25Weight, double tcfFirstPosition, double tcfVerbatim, double tcfProximity, TemporalBias temporalBias, double temporalBiasWeight, boolean exportDebugData) {
|
public ResultRankingParameters(Bm25Parameters bm25Params, int shortDocumentThreshold, double shortDocumentPenalty, double domainRankBonus, double qualityPenalty, int shortSentenceThreshold, double shortSentencePenalty, double bm25Weight, double tcfFirstPosition, double tcfVerbatim, double tcfProximity, TemporalBias temporalBias, double temporalBiasWeight, boolean disablePenalties, boolean exportDebugData) {
|
||||||
this.bm25Params = bm25Params;
|
this.bm25Params = bm25Params;
|
||||||
this.shortDocumentThreshold = shortDocumentThreshold;
|
this.shortDocumentThreshold = shortDocumentThreshold;
|
||||||
this.shortDocumentPenalty = shortDocumentPenalty;
|
this.shortDocumentPenalty = shortDocumentPenalty;
|
||||||
@@ -61,6 +63,7 @@ public class ResultRankingParameters {
|
|||||||
this.tcfProximity = tcfProximity;
|
this.tcfProximity = tcfProximity;
|
||||||
this.temporalBias = temporalBias;
|
this.temporalBias = temporalBias;
|
||||||
this.temporalBiasWeight = temporalBiasWeight;
|
this.temporalBiasWeight = temporalBiasWeight;
|
||||||
|
this.disablePenalties = disablePenalties;
|
||||||
this.exportDebugData = exportDebugData;
|
this.exportDebugData = exportDebugData;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -80,6 +83,7 @@ public class ResultRankingParameters {
|
|||||||
.temporalBias(TemporalBias.NONE)
|
.temporalBias(TemporalBias.NONE)
|
||||||
.temporalBiasWeight(5.0)
|
.temporalBiasWeight(5.0)
|
||||||
.exportDebugData(false)
|
.exportDebugData(false)
|
||||||
|
.disablePenalties(false)
|
||||||
.build();
|
.build();
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -139,6 +143,8 @@ public class ResultRankingParameters {
|
|||||||
return this.temporalBiasWeight;
|
return this.temporalBiasWeight;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public boolean isDisablePenalties() { return this.disablePenalties; }
|
||||||
|
|
||||||
public boolean isExportDebugData() {
|
public boolean isExportDebugData() {
|
||||||
return this.exportDebugData;
|
return this.exportDebugData;
|
||||||
}
|
}
|
||||||
@@ -166,6 +172,7 @@ public class ResultRankingParameters {
|
|||||||
result = 31 * result + Double.hashCode(tcfProximity);
|
result = 31 * result + Double.hashCode(tcfProximity);
|
||||||
result = 31 * result + Objects.hashCode(temporalBias);
|
result = 31 * result + Objects.hashCode(temporalBias);
|
||||||
result = 31 * result + Double.hashCode(temporalBiasWeight);
|
result = 31 * result + Double.hashCode(temporalBiasWeight);
|
||||||
|
result = 31 * result + Boolean.hashCode(disablePenalties);
|
||||||
result = 31 * result + Boolean.hashCode(exportDebugData);
|
result = 31 * result + Boolean.hashCode(exportDebugData);
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
@@ -192,6 +199,7 @@ public class ResultRankingParameters {
|
|||||||
private double tcfProximity;
|
private double tcfProximity;
|
||||||
private TemporalBias temporalBias;
|
private TemporalBias temporalBias;
|
||||||
private double temporalBiasWeight;
|
private double temporalBiasWeight;
|
||||||
|
private boolean disablePenalties;
|
||||||
private boolean exportDebugData;
|
private boolean exportDebugData;
|
||||||
|
|
||||||
ResultRankingParametersBuilder() {
|
ResultRankingParametersBuilder() {
|
||||||
@@ -262,17 +270,20 @@ public class ResultRankingParameters {
|
|||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public ResultRankingParametersBuilder disablePenalties(boolean disablePenalties) {
|
||||||
|
this.disablePenalties = disablePenalties;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
public ResultRankingParametersBuilder exportDebugData(boolean exportDebugData) {
|
public ResultRankingParametersBuilder exportDebugData(boolean exportDebugData) {
|
||||||
this.exportDebugData = exportDebugData;
|
this.exportDebugData = exportDebugData;
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
public ResultRankingParameters build() {
|
public ResultRankingParameters build() {
|
||||||
return new ResultRankingParameters(this.bm25Params, this.shortDocumentThreshold, this.shortDocumentPenalty, this.domainRankBonus, this.qualityPenalty, this.shortSentenceThreshold, this.shortSentencePenalty, this.bm25Weight, this.tcfFirstPosition, this.tcfVerbatim, this.tcfProximity, this.temporalBias, this.temporalBiasWeight, this.exportDebugData);
|
return new ResultRankingParameters(this.bm25Params, this.shortDocumentThreshold, this.shortDocumentPenalty, this.domainRankBonus, this.qualityPenalty, this.shortSentenceThreshold, this.shortSentencePenalty, this.bm25Weight, this.tcfFirstPosition, this.tcfVerbatim, this.tcfProximity, this.temporalBias, this.temporalBiasWeight, this.disablePenalties, this.exportDebugData);
|
||||||
}
|
}
|
||||||
|
|
||||||
public String toString() {
|
|
||||||
return "ResultRankingParameters.ResultRankingParametersBuilder(bm25Params=" + this.bm25Params + ", shortDocumentThreshold=" + this.shortDocumentThreshold + ", shortDocumentPenalty=" + this.shortDocumentPenalty + ", domainRankBonus=" + this.domainRankBonus + ", qualityPenalty=" + this.qualityPenalty + ", shortSentenceThreshold=" + this.shortSentenceThreshold + ", shortSentencePenalty=" + this.shortSentencePenalty + ", bm25Weight=" + this.bm25Weight + ", tcfFirstPosition=" + this.tcfFirstPosition + ", tcfVerbatim=" + this.tcfVerbatim + ", tcfProximity=" + this.tcfProximity + ", temporalBias=" + this.temporalBias + ", temporalBiasWeight=" + this.temporalBiasWeight + ", exportDebugData=" + this.exportDebugData + ")";
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -162,6 +162,7 @@ message RpcResultRankingParameters {
|
|||||||
double temporalBiasWeight = 17;
|
double temporalBiasWeight = 17;
|
||||||
|
|
||||||
bool exportDebugData = 18;
|
bool exportDebugData = 18;
|
||||||
|
bool disablePenalties = 19;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -233,9 +233,19 @@ public class QueryParser {
|
|||||||
entity.replace(new QueryToken.RankTerm(limit, str));
|
entity.replace(new QueryToken.RankTerm(limit, str));
|
||||||
} else if (str.startsWith("qs=")) {
|
} else if (str.startsWith("qs=")) {
|
||||||
entity.replace(new QueryToken.QsTerm(str.substring(3)));
|
entity.replace(new QueryToken.QsTerm(str.substring(3)));
|
||||||
} else if (str.contains(":")) {
|
} else if (str.startsWith("site:")
|
||||||
|
|| str.startsWith("format:")
|
||||||
|
|| str.startsWith("file:")
|
||||||
|
|| str.startsWith("tld:")
|
||||||
|
|| str.startsWith("ip:")
|
||||||
|
|| str.startsWith("as:")
|
||||||
|
|| str.startsWith("asn:")
|
||||||
|
|| str.startsWith("generator:")
|
||||||
|
)
|
||||||
|
{
|
||||||
entity.replace(new QueryToken.AdviceTerm(str, t.displayStr()));
|
entity.replace(new QueryToken.AdviceTerm(str, t.displayStr()));
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private static SpecificationLimit parseSpecificationLimit(String str) {
|
private static SpecificationLimit parseSpecificationLimit(String str) {
|
||||||
|
@@ -208,6 +208,12 @@ public class QueryFactoryTest {
|
|||||||
System.out.println(subquery);
|
System.out.println(subquery);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testCplusPlus() {
|
||||||
|
var subquery = parseAndGetSpecs("std::vector::push_back vector");
|
||||||
|
System.out.println(subquery);
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testQuotedApostrophe() {
|
public void testQuotedApostrophe() {
|
||||||
var subquery = parseAndGetSpecs("\"bob's cars\"");
|
var subquery = parseAndGetSpecs("\"bob's cars\"");
|
||||||
|
@@ -248,6 +248,10 @@ public class IndexResultScoreCalculator {
|
|||||||
ResultRankingParameters rankingParams,
|
ResultRankingParameters rankingParams,
|
||||||
@Nullable DebugRankingFactors debugRankingFactors) {
|
@Nullable DebugRankingFactors debugRankingFactors) {
|
||||||
|
|
||||||
|
if (rankingParams.disablePenalties) {
|
||||||
|
return 0.;
|
||||||
|
}
|
||||||
|
|
||||||
int rank = DocumentMetadata.decodeRank(documentMetadata);
|
int rank = DocumentMetadata.decodeRank(documentMetadata);
|
||||||
int asl = DocumentMetadata.decodeAvgSentenceLength(documentMetadata);
|
int asl = DocumentMetadata.decodeAvgSentenceLength(documentMetadata);
|
||||||
int quality = DocumentMetadata.decodeQuality(documentMetadata);
|
int quality = DocumentMetadata.decodeQuality(documentMetadata);
|
||||||
|
@@ -152,7 +152,10 @@ public class DocumentPositionMapper {
|
|||||||
}
|
}
|
||||||
|
|
||||||
boolean matchesWordPattern(String s) {
|
boolean matchesWordPattern(String s) {
|
||||||
// this function is an unrolled version of the regexp [\da-zA-Z]{1,15}([.\-_/:+*][\da-zA-Z]{1,10}){0,4}
|
if (s.length() > 48)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
// this function is an unrolled version of the regexp [\da-zA-Z]{1,15}([.\-_/:+*][\da-zA-Z]{1,10}){0,8}
|
||||||
|
|
||||||
String wordPartSeparator = ".-_/:+*";
|
String wordPartSeparator = ".-_/:+*";
|
||||||
|
|
||||||
@@ -169,7 +172,7 @@ public class DocumentPositionMapper {
|
|||||||
if (i == 0)
|
if (i == 0)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
for (int j = 0; j < 5; j++) {
|
for (int j = 0; j < 8; j++) {
|
||||||
if (i == s.length()) return true;
|
if (i == s.length()) return true;
|
||||||
|
|
||||||
if (wordPartSeparator.indexOf(s.charAt(i)) < 0) {
|
if (wordPartSeparator.indexOf(s.charAt(i)) < 0) {
|
||||||
|
@@ -30,9 +30,11 @@ class DocumentPositionMapperTest {
|
|||||||
Assertions.assertFalse(positionMapper.matchesWordPattern("1234567890abcdef"));
|
Assertions.assertFalse(positionMapper.matchesWordPattern("1234567890abcdef"));
|
||||||
|
|
||||||
Assertions.assertTrue(positionMapper.matchesWordPattern("test-test-test-test-test"));
|
Assertions.assertTrue(positionMapper.matchesWordPattern("test-test-test-test-test"));
|
||||||
Assertions.assertFalse(positionMapper.matchesWordPattern("test-test-test-test-test-test"));
|
Assertions.assertFalse(positionMapper.matchesWordPattern("test-test-test-test-test-test-test-test-test"));
|
||||||
Assertions.assertTrue(positionMapper.matchesWordPattern("192.168.1.100/24"));
|
Assertions.assertTrue(positionMapper.matchesWordPattern("192.168.1.100/24"));
|
||||||
Assertions.assertTrue(positionMapper.matchesWordPattern("std::vector"));
|
Assertions.assertTrue(positionMapper.matchesWordPattern("std::vector"));
|
||||||
|
Assertions.assertTrue(positionMapper.matchesWordPattern("std::vector::push_back"));
|
||||||
|
|
||||||
Assertions.assertTrue(positionMapper.matchesWordPattern("c++"));
|
Assertions.assertTrue(positionMapper.matchesWordPattern("c++"));
|
||||||
Assertions.assertTrue(positionMapper.matchesWordPattern("m*a*s*h"));
|
Assertions.assertTrue(positionMapper.matchesWordPattern("m*a*s*h"));
|
||||||
Assertions.assertFalse(positionMapper.matchesWordPattern("Stulpnagelstrasse"));
|
Assertions.assertFalse(positionMapper.matchesWordPattern("Stulpnagelstrasse"));
|
||||||
|
@@ -0,0 +1,113 @@
|
|||||||
|
package nu.marginalia.converting.processor.plugin.specialization;
|
||||||
|
|
||||||
|
import com.google.inject.Inject;
|
||||||
|
import com.google.inject.Singleton;
|
||||||
|
import nu.marginalia.converting.processor.logic.TitleExtractor;
|
||||||
|
import nu.marginalia.converting.processor.summary.SummaryExtractor;
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
import org.apache.logging.log4j.util.Strings;
|
||||||
|
import org.jsoup.nodes.Document;
|
||||||
|
import org.jsoup.nodes.Element;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
@Singleton
|
||||||
|
public class CppreferenceSpecialization extends WikiSpecialization {
|
||||||
|
|
||||||
|
@Inject
|
||||||
|
public CppreferenceSpecialization(SummaryExtractor summaryExtractor, TitleExtractor titleExtractor) {
|
||||||
|
super(summaryExtractor, titleExtractor);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Document prune(Document original) {
|
||||||
|
var doc = original.clone();
|
||||||
|
|
||||||
|
doc.getElementsByClass("t-nv").remove();
|
||||||
|
doc.getElementsByClass("toc").remove();
|
||||||
|
doc.getElementsByClass("mw-head").remove();
|
||||||
|
doc.getElementsByClass("printfooter").remove();
|
||||||
|
doc.getElementsByClass("cpp-footer-base").remove();
|
||||||
|
|
||||||
|
doc.title(doc.title() + " " + Strings.join(extractExtraTokens(doc.title()), ' '));
|
||||||
|
|
||||||
|
return doc;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getSummary(Document doc, Set<String> importantWords) {
|
||||||
|
|
||||||
|
Element declTable = doc.getElementsByClass("t-dcl-begin").first();
|
||||||
|
if (declTable != null) {
|
||||||
|
var nextPar = declTable.nextElementSibling();
|
||||||
|
if (nextPar != null) {
|
||||||
|
return nextPar.text();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return super.getSummary(doc, importantWords);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public List<String> extractExtraTokens(String title) {
|
||||||
|
|
||||||
|
if (!title.contains("::")) {
|
||||||
|
return List.of();
|
||||||
|
}
|
||||||
|
if (!title.contains("-")) {
|
||||||
|
return List.of();
|
||||||
|
}
|
||||||
|
|
||||||
|
title = StringUtils.split(title, '-')[0];
|
||||||
|
|
||||||
|
String name = title;
|
||||||
|
for (;;) {
|
||||||
|
int lbidx = name.indexOf('<');
|
||||||
|
int rbidx = name.indexOf('>');
|
||||||
|
|
||||||
|
if (lbidx > 0 && rbidx > lbidx) {
|
||||||
|
String className = name.substring(0, lbidx);
|
||||||
|
String methodName = name.substring(rbidx + 1);
|
||||||
|
name = className + methodName;
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
List<String> tokens = new ArrayList<>();
|
||||||
|
|
||||||
|
for (var part : name.split("\\s*,\\s*")) {
|
||||||
|
if (part.endsWith(")") && !part.endsWith("()")) {
|
||||||
|
int parenStart = part.indexOf('(');
|
||||||
|
if (parenStart > 0) { // foo(...) -> foo
|
||||||
|
part = part.substring(0, parenStart);
|
||||||
|
}
|
||||||
|
else if (parenStart == 0) { // (foo) -> foo
|
||||||
|
part = part.substring(1, part.length() - 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
part = part.trim();
|
||||||
|
if (part.contains("::")) {
|
||||||
|
tokens.add(part);
|
||||||
|
if (part.startsWith("std::")) {
|
||||||
|
tokens.add(part.substring(5));
|
||||||
|
|
||||||
|
int ss = part.indexOf("::", 5);
|
||||||
|
if (ss > 0) {
|
||||||
|
tokens.add(part.substring(0, ss));
|
||||||
|
tokens.add(part.substring(ss+2));
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return tokens;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
@@ -24,6 +24,7 @@ public class HtmlProcessorSpecializations {
|
|||||||
private final WikiSpecialization wikiSpecialization;
|
private final WikiSpecialization wikiSpecialization;
|
||||||
private final BlogSpecialization blogSpecialization;
|
private final BlogSpecialization blogSpecialization;
|
||||||
private final GogStoreSpecialization gogStoreSpecialization;
|
private final GogStoreSpecialization gogStoreSpecialization;
|
||||||
|
private final CppreferenceSpecialization cppreferenceSpecialization;
|
||||||
private final DefaultSpecialization defaultSpecialization;
|
private final DefaultSpecialization defaultSpecialization;
|
||||||
|
|
||||||
@Inject
|
@Inject
|
||||||
@@ -37,6 +38,7 @@ public class HtmlProcessorSpecializations {
|
|||||||
WikiSpecialization wikiSpecialization,
|
WikiSpecialization wikiSpecialization,
|
||||||
BlogSpecialization blogSpecialization,
|
BlogSpecialization blogSpecialization,
|
||||||
GogStoreSpecialization gogStoreSpecialization,
|
GogStoreSpecialization gogStoreSpecialization,
|
||||||
|
CppreferenceSpecialization cppreferenceSpecialization,
|
||||||
DefaultSpecialization defaultSpecialization) {
|
DefaultSpecialization defaultSpecialization) {
|
||||||
this.domainTypes = domainTypes;
|
this.domainTypes = domainTypes;
|
||||||
this.lemmySpecialization = lemmySpecialization;
|
this.lemmySpecialization = lemmySpecialization;
|
||||||
@@ -48,6 +50,7 @@ public class HtmlProcessorSpecializations {
|
|||||||
this.wikiSpecialization = wikiSpecialization;
|
this.wikiSpecialization = wikiSpecialization;
|
||||||
this.blogSpecialization = blogSpecialization;
|
this.blogSpecialization = blogSpecialization;
|
||||||
this.gogStoreSpecialization = gogStoreSpecialization;
|
this.gogStoreSpecialization = gogStoreSpecialization;
|
||||||
|
this.cppreferenceSpecialization = cppreferenceSpecialization;
|
||||||
this.defaultSpecialization = defaultSpecialization;
|
this.defaultSpecialization = defaultSpecialization;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -66,6 +69,10 @@ public class HtmlProcessorSpecializations {
|
|||||||
return mariadbKbSpecialization;
|
return mariadbKbSpecialization;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (url.domain.getTopDomain().equals("cppreference.com")) {
|
||||||
|
return cppreferenceSpecialization;
|
||||||
|
}
|
||||||
|
|
||||||
if (url.domain.toString().equals("store.steampowered.com")) {
|
if (url.domain.toString().equals("store.steampowered.com")) {
|
||||||
return steamStoreSpecialization;
|
return steamStoreSpecialization;
|
||||||
}
|
}
|
||||||
@@ -86,6 +93,9 @@ public class HtmlProcessorSpecializations {
|
|||||||
if (generator.keywords().contains("javadoc")) {
|
if (generator.keywords().contains("javadoc")) {
|
||||||
return javadocSpecialization;
|
return javadocSpecialization;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Must be toward the end, as some specializations are for
|
||||||
|
// wiki-generator content
|
||||||
if (generator.type() == GeneratorType.WIKI) {
|
if (generator.type() == GeneratorType.WIKI) {
|
||||||
return wikiSpecialization;
|
return wikiSpecialization;
|
||||||
}
|
}
|
||||||
@@ -105,7 +115,7 @@ public class HtmlProcessorSpecializations {
|
|||||||
|
|
||||||
boolean shouldIndex(EdgeUrl url);
|
boolean shouldIndex(EdgeUrl url);
|
||||||
double lengthModifier();
|
double lengthModifier();
|
||||||
void amendWords(Document doc, DocumentKeywordsBuilder words);
|
|
||||||
|
|
||||||
|
default void amendWords(Document doc, DocumentKeywordsBuilder words) {}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -4,7 +4,6 @@ import com.google.inject.Inject;
|
|||||||
import com.google.inject.Singleton;
|
import com.google.inject.Singleton;
|
||||||
import nu.marginalia.converting.processor.logic.TitleExtractor;
|
import nu.marginalia.converting.processor.logic.TitleExtractor;
|
||||||
import nu.marginalia.converting.processor.summary.SummaryExtractor;
|
import nu.marginalia.converting.processor.summary.SummaryExtractor;
|
||||||
import nu.marginalia.keyword.model.DocumentKeywordsBuilder;
|
|
||||||
import nu.marginalia.model.EdgeUrl;
|
import nu.marginalia.model.EdgeUrl;
|
||||||
import org.jsoup.nodes.Document;
|
import org.jsoup.nodes.Document;
|
||||||
import org.jsoup.nodes.Element;
|
import org.jsoup.nodes.Element;
|
||||||
@@ -93,6 +92,8 @@ public class WikiSpecialization extends DefaultSpecialization {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void amendWords(Document doc, DocumentKeywordsBuilder words) {
|
@Override
|
||||||
|
public double lengthModifier() {
|
||||||
|
return 2.5;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -0,0 +1,27 @@
|
|||||||
|
package nu.marginalia.converting.processor.plugin.specialization;
|
||||||
|
|
||||||
|
import org.junit.jupiter.api.Assertions;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
class CppreferenceSpecializationTest {
|
||||||
|
CppreferenceSpecialization specialization = new CppreferenceSpecialization(null, null);
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testTitleMagic() {
|
||||||
|
|
||||||
|
List<String> ret;
|
||||||
|
|
||||||
|
ret = specialization.extractExtraTokens("std::multimap<Key, T, Compare, Allocator>::crend - cppreference.com");
|
||||||
|
Assertions.assertTrue(ret.contains("std::multimap::crend"));
|
||||||
|
Assertions.assertTrue(ret.contains("multimap::crend"));
|
||||||
|
Assertions.assertTrue(ret.contains("std::multimap"));
|
||||||
|
Assertions.assertTrue(ret.contains("crend"));
|
||||||
|
|
||||||
|
ret = specialization.extractExtraTokens("std::coroutine_handle<Promise>::operator(), std::coroutine_handle<Promise>::resume - cppreference.com");
|
||||||
|
Assertions.assertTrue(ret.contains("std::coroutine_handle::operator()"));
|
||||||
|
Assertions.assertTrue(ret.contains("std::coroutine_handle::resume"));
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@@ -8,8 +8,8 @@
|
|||||||
<ShortName>Marginalia</ShortName>
|
<ShortName>Marginalia</ShortName>
|
||||||
<Description>Search Marginalia</Description>
|
<Description>Search Marginalia</Description>
|
||||||
<InputEncoding>UTF-8</InputEncoding>
|
<InputEncoding>UTF-8</InputEncoding>
|
||||||
<Image width="16" height="16" type="image/x-icon">https://search.marginalia.nu/favicon.ico</Image>
|
<Image width="16" height="16" type="image/x-icon">https://old-search.marginalia.nu/favicon.ico</Image>
|
||||||
<Url type="text/html" method="get"
|
<Url type="text/html" method="get"
|
||||||
template="https://search.marginalia.nu/search?query={searchTerms}&ref=opensearch"/>
|
template="https://old-search.marginalia.nu/search?query={searchTerms}&ref=opensearch"/>
|
||||||
<moz:SearchForm>https://search.marginalia.nu/</moz:SearchForm>
|
<moz:SearchForm>https://old-search.marginalia.nu/</moz:SearchForm>
|
||||||
</OpenSearchDescription>
|
</OpenSearchDescription>
|
@@ -3,9 +3,9 @@
|
|||||||
<nav>
|
<nav>
|
||||||
<a href="#" class="screenreader-only" onClick="">Skip to content</a>
|
<a href="#" class="screenreader-only" onClick="">Skip to content</a>
|
||||||
<a href="https://www.marginalia.nu/">Marginalia</a>
|
<a href="https://www.marginalia.nu/">Marginalia</a>
|
||||||
<a href="https://memex.marginalia.nu/projects/edge/about.gmi">About</a>
|
<a href="https://about.marginalia-search.com/">About</a>
|
||||||
<a href="https://memex.marginalia.nu/projects/edge/supporting.gmi">Donate</a>
|
<a href="https://about.marginalia-search.com/article/supporting/">Donate</a>
|
||||||
<a class="extra" href="https://search.marginalia.nu/explore/random">Random</a>
|
<a class="extra" href="https://old-search.marginalia.nu/explore/random">Random</a>
|
||||||
</nav>
|
</nav>
|
||||||
<div id="theme">
|
<div id="theme">
|
||||||
<label for="theme-select" class="screenreader-only">Color Theme</label>
|
<label for="theme-select" class="screenreader-only">Color Theme</label>
|
||||||
|
@@ -14,7 +14,12 @@
|
|||||||
<span>${siteInfo.domain()}</span>
|
<span>${siteInfo.domain()}</span>
|
||||||
<div class="grow">
|
<div class="grow">
|
||||||
</div>
|
</div>
|
||||||
<a rel="nofollow noopener external" href="${siteInfo.siteUrl()}" class="fa-solid fa-arrow-up-right-from-square" ></a>
|
<a href="https://web.archive.org/web/*/${siteInfo.domain()}"
|
||||||
|
class="p-1.5 text-white px-4"
|
||||||
|
title="Wayback Machine">
|
||||||
|
<i class="fas fa-clock-rotate-left text-sm"></i>
|
||||||
|
</a>
|
||||||
|
<a title="Visit ${siteInfo.domain()}" rel="nofollow noopener external" href="${siteInfo.siteUrl()}" class="fa-solid fa-arrow-up-right-from-square" ></a>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
@if (siteInfo.hasScreenshot())
|
@if (siteInfo.hasScreenshot())
|
||||||
|
@@ -8,8 +8,8 @@
|
|||||||
<ShortName>Marginalia</ShortName>
|
<ShortName>Marginalia</ShortName>
|
||||||
<Description>Search Marginalia</Description>
|
<Description>Search Marginalia</Description>
|
||||||
<InputEncoding>UTF-8</InputEncoding>
|
<InputEncoding>UTF-8</InputEncoding>
|
||||||
<Image width="16" height="16" type="image/x-icon">https://search.marginalia.nu/favicon.ico</Image>
|
<Image width="16" height="16" type="image/x-icon">https://marginalia-search.com/favicon.ico</Image>
|
||||||
<Url type="text/html" method="get"
|
<Url type="text/html" method="get"
|
||||||
template="https://search.marginalia.nu/search?query={searchTerms}&ref=opensearch"/>
|
template="https://marginalia-search.com/search?query={searchTerms}&ref=opensearch"/>
|
||||||
<moz:SearchForm>https://search.marginalia.nu/</moz:SearchForm>
|
<moz:SearchForm>https://marginalia-search.com/</moz:SearchForm>
|
||||||
</OpenSearchDescription>
|
</OpenSearchDescription>
|
@@ -146,6 +146,7 @@ public class QueryBasicInterface {
|
|||||||
.shortSentenceThreshold(intFromRequest(request, "shortSentenceThreshold", sensibleDefaults.shortSentenceThreshold))
|
.shortSentenceThreshold(intFromRequest(request, "shortSentenceThreshold", sensibleDefaults.shortSentenceThreshold))
|
||||||
.shortSentencePenalty(doubleFromRequest(request, "shortSentencePenalty", sensibleDefaults.shortSentencePenalty))
|
.shortSentencePenalty(doubleFromRequest(request, "shortSentencePenalty", sensibleDefaults.shortSentencePenalty))
|
||||||
.bm25Weight(doubleFromRequest(request, "bm25Weight", sensibleDefaults.bm25Weight))
|
.bm25Weight(doubleFromRequest(request, "bm25Weight", sensibleDefaults.bm25Weight))
|
||||||
|
.disablePenalties(boolFromRequest(request, "disablePenalties", sensibleDefaults.disablePenalties))
|
||||||
.exportDebugData(true)
|
.exportDebugData(true)
|
||||||
.build();
|
.build();
|
||||||
}
|
}
|
||||||
@@ -154,6 +155,13 @@ public class QueryBasicInterface {
|
|||||||
return Strings.isNullOrEmpty(request.queryParams(param)) ? defaultValue : Double.parseDouble(request.queryParams(param));
|
return Strings.isNullOrEmpty(request.queryParams(param)) ? defaultValue : Double.parseDouble(request.queryParams(param));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
boolean boolFromRequest(Request request, String param, boolean defaultValue) {
|
||||||
|
if (param == null)
|
||||||
|
return defaultValue;
|
||||||
|
|
||||||
|
return Strings.isNullOrEmpty(request.queryParams(param)) ? defaultValue : Boolean.parseBoolean(request.queryParams(param));
|
||||||
|
}
|
||||||
|
|
||||||
int intFromRequest(Request request, String param, int defaultValue) {
|
int intFromRequest(Request request, String param, int defaultValue) {
|
||||||
return Strings.isNullOrEmpty(request.queryParams(param)) ? defaultValue : parseInt(request.queryParams(param));
|
return Strings.isNullOrEmpty(request.queryParams(param)) ? defaultValue : parseInt(request.queryParams(param));
|
||||||
}
|
}
|
||||||
|
@@ -67,6 +67,14 @@
|
|||||||
<div class="row my-2">
|
<div class="row my-2">
|
||||||
<div class="col-sm-2"><label for="bm25FullWeight">BM25 Weight</label></div>
|
<div class="col-sm-2"><label for="bm25FullWeight">BM25 Weight</label></div>
|
||||||
<div class="col-sm-2"><input type="text" class="form-control" id="bm25Weight" name="bm25Weight" value="{{bm25Weight}}"></div>
|
<div class="col-sm-2"><input type="text" class="form-control" id="bm25Weight" name="bm25Weight" value="{{bm25Weight}}"></div>
|
||||||
|
|
||||||
|
<div class="col-sm-2"><label for="disablePenalties">Disable Penalties</label></div>
|
||||||
|
<div class="col-sm-2">
|
||||||
|
<select class="form-select" id="disablePenalties" name="disablePenalties">
|
||||||
|
<option value="FALSE" {{#unless disablePenalties}}selected{{/unless}}>FALSE</option>
|
||||||
|
<option value="TRUE" {{#if disablePenalties}}selected{{/if}}>TRUE</option>
|
||||||
|
</select>
|
||||||
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
{{/with}}
|
{{/with}}
|
||||||
|
@@ -1,6 +1,3 @@
|
|||||||
## This is a token file for automatic deployment
|
## This is a token file for automatic deployment
|
||||||
|
|
||||||
A master HEAD tagged with deploy-core*, deploy-executor*, or deploy-index* will trigger a commit.
|
2025-01-07: Deploy executor.
|
||||||
|
|
||||||
2024-12-19-00002: Test deployment of executor
|
|
||||||
2024-12-19-00001: Test deployment of executor
|
|
@@ -258,6 +258,13 @@ if __name__ == '__main__':
|
|||||||
deploy_tier=2,
|
deploy_tier=2,
|
||||||
groups={"all", "frontend", "core"}
|
groups={"all", "frontend", "core"}
|
||||||
),
|
),
|
||||||
|
'search-legacy': ServiceConfig(
|
||||||
|
gradle_target=':code:services-application:search-service-legacy:docker',
|
||||||
|
docker_name='search-service-legacy',
|
||||||
|
instances=None,
|
||||||
|
deploy_tier=3,
|
||||||
|
groups={"all", "frontend", "core"}
|
||||||
|
),
|
||||||
'api': ServiceConfig(
|
'api': ServiceConfig(
|
||||||
gradle_target=':code:services-application:api-service:docker',
|
gradle_target=':code:services-application:api-service:docker',
|
||||||
docker_name='api-service',
|
docker_name='api-service',
|
||||||
|
Reference in New Issue
Block a user