mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-10-05 21:22:39 +02:00
Compare commits
12 Commits
deploy-003
...
deploy-003
Author | SHA1 | Date | |
---|---|---|---|
|
6f0485287a | ||
|
59e2dd4c26 | ||
|
ca1807caae | ||
|
26c20e18ac | ||
|
7c90b6b414 | ||
|
b63c54c4ce | ||
|
fecd2f4ec3 | ||
|
39e420de88 | ||
|
dc83619861 | ||
|
87d1c89701 | ||
|
a42a7769e2 | ||
|
202bda884f |
@@ -8,17 +8,18 @@ import com.google.inject.Inject;
|
||||
import com.google.inject.Singleton;
|
||||
import com.zaxxer.hikari.HikariDataSource;
|
||||
import nu.marginalia.model.EdgeDomain;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.sql.SQLException;
|
||||
import java.util.NoSuchElementException;
|
||||
import java.util.Optional;
|
||||
import java.util.OptionalInt;
|
||||
import java.util.*;
|
||||
import java.util.concurrent.ExecutionException;
|
||||
|
||||
@Singleton
|
||||
public class DbDomainQueries {
|
||||
private final HikariDataSource dataSource;
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(DbDomainQueries.class);
|
||||
private final Cache<EdgeDomain, Integer> domainIdCache = CacheBuilder.newBuilder().maximumSize(10_000).build();
|
||||
|
||||
@Inject
|
||||
@@ -101,4 +102,28 @@ public class DbDomainQueries {
|
||||
throw new RuntimeException(ex);
|
||||
}
|
||||
}
|
||||
|
||||
public List<EdgeDomain> otherSubdomains(EdgeDomain domain, int cnt) {
|
||||
List<EdgeDomain> ret = new ArrayList<>();
|
||||
|
||||
try (var conn = dataSource.getConnection();
|
||||
var stmt = conn.prepareStatement("SELECT DOMAIN_NAME FROM EC_DOMAIN WHERE DOMAIN_TOP = ? LIMIT ?")) {
|
||||
stmt.setString(1, domain.topDomain);
|
||||
stmt.setInt(2, cnt);
|
||||
|
||||
var rs = stmt.executeQuery();
|
||||
while (rs.next()) {
|
||||
var sibling = new EdgeDomain(rs.getString(1));
|
||||
|
||||
if (sibling.equals(domain))
|
||||
continue;
|
||||
|
||||
ret.add(sibling);
|
||||
}
|
||||
} catch (SQLException e) {
|
||||
logger.error("Failed to get domain neighbors");
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
@@ -233,9 +233,19 @@ public class QueryParser {
|
||||
entity.replace(new QueryToken.RankTerm(limit, str));
|
||||
} else if (str.startsWith("qs=")) {
|
||||
entity.replace(new QueryToken.QsTerm(str.substring(3)));
|
||||
} else if (str.contains(":")) {
|
||||
} else if (str.startsWith("site:")
|
||||
|| str.startsWith("format:")
|
||||
|| str.startsWith("file:")
|
||||
|| str.startsWith("tld:")
|
||||
|| str.startsWith("ip:")
|
||||
|| str.startsWith("as:")
|
||||
|| str.startsWith("asn:")
|
||||
|| str.startsWith("generator:")
|
||||
)
|
||||
{
|
||||
entity.replace(new QueryToken.AdviceTerm(str, t.displayStr()));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private static SpecificationLimit parseSpecificationLimit(String str) {
|
||||
|
@@ -208,6 +208,12 @@ public class QueryFactoryTest {
|
||||
System.out.println(subquery);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCplusPlus() {
|
||||
var subquery = parseAndGetSpecs("std::vector::push_back vector");
|
||||
System.out.println(subquery);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testQuotedApostrophe() {
|
||||
var subquery = parseAndGetSpecs("\"bob's cars\"");
|
||||
|
@@ -152,7 +152,10 @@ public class DocumentPositionMapper {
|
||||
}
|
||||
|
||||
boolean matchesWordPattern(String s) {
|
||||
// this function is an unrolled version of the regexp [\da-zA-Z]{1,15}([.\-_/:+*][\da-zA-Z]{1,10}){0,4}
|
||||
if (s.length() > 48)
|
||||
return false;
|
||||
|
||||
// this function is an unrolled version of the regexp [\da-zA-Z]{1,15}([.\-_/:+*][\da-zA-Z]{1,10}){0,8}
|
||||
|
||||
String wordPartSeparator = ".-_/:+*";
|
||||
|
||||
@@ -169,7 +172,7 @@ public class DocumentPositionMapper {
|
||||
if (i == 0)
|
||||
return false;
|
||||
|
||||
for (int j = 0; j < 5; j++) {
|
||||
for (int j = 0; j < 8; j++) {
|
||||
if (i == s.length()) return true;
|
||||
|
||||
if (wordPartSeparator.indexOf(s.charAt(i)) < 0) {
|
||||
|
@@ -30,9 +30,11 @@ class DocumentPositionMapperTest {
|
||||
Assertions.assertFalse(positionMapper.matchesWordPattern("1234567890abcdef"));
|
||||
|
||||
Assertions.assertTrue(positionMapper.matchesWordPattern("test-test-test-test-test"));
|
||||
Assertions.assertFalse(positionMapper.matchesWordPattern("test-test-test-test-test-test"));
|
||||
Assertions.assertFalse(positionMapper.matchesWordPattern("test-test-test-test-test-test-test-test-test"));
|
||||
Assertions.assertTrue(positionMapper.matchesWordPattern("192.168.1.100/24"));
|
||||
Assertions.assertTrue(positionMapper.matchesWordPattern("std::vector"));
|
||||
Assertions.assertTrue(positionMapper.matchesWordPattern("std::vector::push_back"));
|
||||
|
||||
Assertions.assertTrue(positionMapper.matchesWordPattern("c++"));
|
||||
Assertions.assertTrue(positionMapper.matchesWordPattern("m*a*s*h"));
|
||||
Assertions.assertFalse(positionMapper.matchesWordPattern("Stulpnagelstrasse"));
|
||||
|
@@ -0,0 +1,113 @@
|
||||
package nu.marginalia.converting.processor.plugin.specialization;
|
||||
|
||||
import com.google.inject.Inject;
|
||||
import com.google.inject.Singleton;
|
||||
import nu.marginalia.converting.processor.logic.TitleExtractor;
|
||||
import nu.marginalia.converting.processor.summary.SummaryExtractor;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.logging.log4j.util.Strings;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
@Singleton
|
||||
public class CppreferenceSpecialization extends WikiSpecialization {
|
||||
|
||||
@Inject
|
||||
public CppreferenceSpecialization(SummaryExtractor summaryExtractor, TitleExtractor titleExtractor) {
|
||||
super(summaryExtractor, titleExtractor);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Document prune(Document original) {
|
||||
var doc = original.clone();
|
||||
|
||||
doc.getElementsByClass("t-nv").remove();
|
||||
doc.getElementsByClass("toc").remove();
|
||||
doc.getElementsByClass("mw-head").remove();
|
||||
doc.getElementsByClass("printfooter").remove();
|
||||
doc.getElementsByClass("cpp-footer-base").remove();
|
||||
|
||||
doc.title(doc.title() + " " + Strings.join(extractExtraTokens(doc.title()), ' '));
|
||||
|
||||
return doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getSummary(Document doc, Set<String> importantWords) {
|
||||
|
||||
Element declTable = doc.getElementsByClass("t-dcl-begin").first();
|
||||
if (declTable != null) {
|
||||
var nextPar = declTable.nextElementSibling();
|
||||
if (nextPar != null) {
|
||||
return nextPar.text();
|
||||
}
|
||||
}
|
||||
|
||||
return super.getSummary(doc, importantWords);
|
||||
}
|
||||
|
||||
|
||||
public List<String> extractExtraTokens(String title) {
|
||||
|
||||
if (!title.contains("::")) {
|
||||
return List.of();
|
||||
}
|
||||
if (!title.contains("-")) {
|
||||
return List.of();
|
||||
}
|
||||
|
||||
title = StringUtils.split(title, '-')[0];
|
||||
|
||||
String name = title;
|
||||
for (;;) {
|
||||
int lbidx = name.indexOf('<');
|
||||
int rbidx = name.indexOf('>');
|
||||
|
||||
if (lbidx > 0 && rbidx > lbidx) {
|
||||
String className = name.substring(0, lbidx);
|
||||
String methodName = name.substring(rbidx + 1);
|
||||
name = className + methodName;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
List<String> tokens = new ArrayList<>();
|
||||
|
||||
for (var part : name.split("\\s*,\\s*")) {
|
||||
if (part.endsWith(")") && !part.endsWith("()")) {
|
||||
int parenStart = part.indexOf('(');
|
||||
if (parenStart > 0) { // foo(...) -> foo
|
||||
part = part.substring(0, parenStart);
|
||||
}
|
||||
else if (parenStart == 0) { // (foo) -> foo
|
||||
part = part.substring(1, part.length() - 1);
|
||||
}
|
||||
}
|
||||
|
||||
part = part.trim();
|
||||
if (part.contains("::")) {
|
||||
tokens.add(part);
|
||||
if (part.startsWith("std::")) {
|
||||
tokens.add(part.substring(5));
|
||||
|
||||
int ss = part.indexOf("::", 5);
|
||||
if (ss > 0) {
|
||||
tokens.add(part.substring(0, ss));
|
||||
tokens.add(part.substring(ss+2));
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return tokens;
|
||||
}
|
||||
|
||||
|
||||
}
|
@@ -24,6 +24,7 @@ public class HtmlProcessorSpecializations {
|
||||
private final WikiSpecialization wikiSpecialization;
|
||||
private final BlogSpecialization blogSpecialization;
|
||||
private final GogStoreSpecialization gogStoreSpecialization;
|
||||
private final CppreferenceSpecialization cppreferenceSpecialization;
|
||||
private final DefaultSpecialization defaultSpecialization;
|
||||
|
||||
@Inject
|
||||
@@ -37,6 +38,7 @@ public class HtmlProcessorSpecializations {
|
||||
WikiSpecialization wikiSpecialization,
|
||||
BlogSpecialization blogSpecialization,
|
||||
GogStoreSpecialization gogStoreSpecialization,
|
||||
CppreferenceSpecialization cppreferenceSpecialization,
|
||||
DefaultSpecialization defaultSpecialization) {
|
||||
this.domainTypes = domainTypes;
|
||||
this.lemmySpecialization = lemmySpecialization;
|
||||
@@ -48,6 +50,7 @@ public class HtmlProcessorSpecializations {
|
||||
this.wikiSpecialization = wikiSpecialization;
|
||||
this.blogSpecialization = blogSpecialization;
|
||||
this.gogStoreSpecialization = gogStoreSpecialization;
|
||||
this.cppreferenceSpecialization = cppreferenceSpecialization;
|
||||
this.defaultSpecialization = defaultSpecialization;
|
||||
}
|
||||
|
||||
@@ -66,6 +69,10 @@ public class HtmlProcessorSpecializations {
|
||||
return mariadbKbSpecialization;
|
||||
}
|
||||
|
||||
if (url.domain.getTopDomain().equals("cppreference.com")) {
|
||||
return cppreferenceSpecialization;
|
||||
}
|
||||
|
||||
if (url.domain.toString().equals("store.steampowered.com")) {
|
||||
return steamStoreSpecialization;
|
||||
}
|
||||
@@ -86,6 +93,9 @@ public class HtmlProcessorSpecializations {
|
||||
if (generator.keywords().contains("javadoc")) {
|
||||
return javadocSpecialization;
|
||||
}
|
||||
|
||||
// Must be toward the end, as some specializations are for
|
||||
// wiki-generator content
|
||||
if (generator.type() == GeneratorType.WIKI) {
|
||||
return wikiSpecialization;
|
||||
}
|
||||
@@ -105,7 +115,7 @@ public class HtmlProcessorSpecializations {
|
||||
|
||||
boolean shouldIndex(EdgeUrl url);
|
||||
double lengthModifier();
|
||||
void amendWords(Document doc, DocumentKeywordsBuilder words);
|
||||
|
||||
default void amendWords(Document doc, DocumentKeywordsBuilder words) {}
|
||||
}
|
||||
}
|
||||
|
@@ -4,7 +4,6 @@ import com.google.inject.Inject;
|
||||
import com.google.inject.Singleton;
|
||||
import nu.marginalia.converting.processor.logic.TitleExtractor;
|
||||
import nu.marginalia.converting.processor.summary.SummaryExtractor;
|
||||
import nu.marginalia.keyword.model.DocumentKeywordsBuilder;
|
||||
import nu.marginalia.model.EdgeUrl;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
@@ -93,6 +92,8 @@ public class WikiSpecialization extends DefaultSpecialization {
|
||||
return true;
|
||||
}
|
||||
|
||||
public void amendWords(Document doc, DocumentKeywordsBuilder words) {
|
||||
@Override
|
||||
public double lengthModifier() {
|
||||
return 2.5;
|
||||
}
|
||||
}
|
||||
|
@@ -0,0 +1,27 @@
|
||||
package nu.marginalia.converting.processor.plugin.specialization;
|
||||
|
||||
import org.junit.jupiter.api.Assertions;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
class CppreferenceSpecializationTest {
|
||||
CppreferenceSpecialization specialization = new CppreferenceSpecialization(null, null);
|
||||
|
||||
@Test
|
||||
public void testTitleMagic() {
|
||||
|
||||
List<String> ret;
|
||||
|
||||
ret = specialization.extractExtraTokens("std::multimap<Key, T, Compare, Allocator>::crend - cppreference.com");
|
||||
Assertions.assertTrue(ret.contains("std::multimap::crend"));
|
||||
Assertions.assertTrue(ret.contains("multimap::crend"));
|
||||
Assertions.assertTrue(ret.contains("std::multimap"));
|
||||
Assertions.assertTrue(ret.contains("crend"));
|
||||
|
||||
ret = specialization.extractExtraTokens("std::coroutine_handle<Promise>::operator(), std::coroutine_handle<Promise>::resume - cppreference.com");
|
||||
Assertions.assertTrue(ret.contains("std::coroutine_handle::operator()"));
|
||||
Assertions.assertTrue(ret.contains("std::coroutine_handle::resume"));
|
||||
}
|
||||
|
||||
}
|
@@ -8,8 +8,8 @@
|
||||
<ShortName>Marginalia</ShortName>
|
||||
<Description>Search Marginalia</Description>
|
||||
<InputEncoding>UTF-8</InputEncoding>
|
||||
<Image width="16" height="16" type="image/x-icon">https://search.marginalia.nu/favicon.ico</Image>
|
||||
<Image width="16" height="16" type="image/x-icon">https://old-search.marginalia.nu/favicon.ico</Image>
|
||||
<Url type="text/html" method="get"
|
||||
template="https://search.marginalia.nu/search?query={searchTerms}&ref=opensearch"/>
|
||||
<moz:SearchForm>https://search.marginalia.nu/</moz:SearchForm>
|
||||
template="https://old-search.marginalia.nu/search?query={searchTerms}&ref=opensearch"/>
|
||||
<moz:SearchForm>https://old-search.marginalia.nu/</moz:SearchForm>
|
||||
</OpenSearchDescription>
|
@@ -1,362 +0,0 @@
|
||||
package nu.marginalia.search.paperdoll;
|
||||
|
||||
import com.google.gson.Gson;
|
||||
import com.google.inject.AbstractModule;
|
||||
import com.google.inject.Guice;
|
||||
import com.zaxxer.hikari.HikariConfig;
|
||||
import com.zaxxer.hikari.HikariDataSource;
|
||||
import nu.marginalia.api.domains.DomainInfoClient;
|
||||
import nu.marginalia.api.domains.model.DomainInformation;
|
||||
import nu.marginalia.api.domains.model.SimilarDomain;
|
||||
import nu.marginalia.api.searchquery.QueryClient;
|
||||
import nu.marginalia.api.searchquery.model.query.QueryResponse;
|
||||
import nu.marginalia.api.searchquery.model.query.SearchQuery;
|
||||
import nu.marginalia.api.searchquery.model.query.SearchSpecification;
|
||||
import nu.marginalia.api.searchquery.model.results.DecoratedSearchResultItem;
|
||||
import nu.marginalia.api.searchquery.model.results.ResultRankingParameters;
|
||||
import nu.marginalia.api.searchquery.model.results.SearchResultItem;
|
||||
import nu.marginalia.index.query.limit.QueryLimits;
|
||||
import nu.marginalia.index.query.limit.QueryStrategy;
|
||||
import nu.marginalia.index.query.limit.SpecificationLimit;
|
||||
import nu.marginalia.model.EdgeDomain;
|
||||
import nu.marginalia.model.EdgeUrl;
|
||||
import nu.marginalia.model.crawl.HtmlFeature;
|
||||
import nu.marginalia.model.gson.GsonFactory;
|
||||
import nu.marginalia.screenshot.ScreenshotService;
|
||||
import nu.marginalia.search.SearchModule;
|
||||
import nu.marginalia.search.SearchService;
|
||||
import nu.marginalia.service.ServiceId;
|
||||
import nu.marginalia.service.discovery.ServiceRegistryIf;
|
||||
import nu.marginalia.service.discovery.property.ServiceEndpoint;
|
||||
import nu.marginalia.service.module.ServiceConfigurationModule;
|
||||
import nu.marginalia.test.TestMigrationLoader;
|
||||
import org.junit.jupiter.api.BeforeAll;
|
||||
import org.junit.jupiter.api.Tag;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.mockito.Mockito;
|
||||
import org.testcontainers.containers.MariaDBContainer;
|
||||
import org.testcontainers.junit.jupiter.Container;
|
||||
import org.testcontainers.junit.jupiter.Testcontainers;
|
||||
import spark.Spark;
|
||||
|
||||
import java.net.URISyntaxException;
|
||||
import java.sql.SQLException;
|
||||
import java.util.*;
|
||||
import java.util.concurrent.CompletableFuture;
|
||||
|
||||
import static org.mockito.ArgumentMatchers.any;
|
||||
import static org.mockito.ArgumentMatchers.anyInt;
|
||||
import static org.mockito.Mockito.when;
|
||||
|
||||
|
||||
/** This class is a special test class that sets up a search service
|
||||
* and registers some search results, without actually starting the rest
|
||||
* of the environment. This is used to test the search service in isolation
|
||||
* when working on the frontend.
|
||||
* <p></p>
|
||||
* It's not actually a test, but it's in the test directory because it's
|
||||
* using test related classes.
|
||||
* <p></p>
|
||||
* When using gradle, run ./gradlew paperDoll --info to run this test,
|
||||
* the system will wait for you to kill the process to stop the test,
|
||||
* and the UI is available at port 9999.
|
||||
*/
|
||||
@Testcontainers
|
||||
@Tag("paperdoll")
|
||||
public class SearchServicePaperDoll extends AbstractModule {
|
||||
|
||||
@Container
|
||||
static MariaDBContainer<?> mariaDBContainer = new MariaDBContainer<>("mariadb")
|
||||
.withDatabaseName("WMSA_prod")
|
||||
.withUsername("wmsa")
|
||||
.withPassword("wmsa")
|
||||
.withNetworkAliases("mariadb");
|
||||
|
||||
private static HikariDataSource dataSource;
|
||||
|
||||
private static List<DecoratedSearchResultItem> results = new ArrayList<>();
|
||||
private static List<SimilarDomain> dummyLinks = new ArrayList<>();
|
||||
private static QueryResponse searchResponse;
|
||||
private static final Gson gson = GsonFactory.get();
|
||||
|
||||
void registerSearchResult(
|
||||
String url,
|
||||
String title,
|
||||
String description,
|
||||
Collection<HtmlFeature> features,
|
||||
double quality,
|
||||
double score,
|
||||
long positions)
|
||||
{
|
||||
try {
|
||||
results.add(new DecoratedSearchResultItem(
|
||||
new SearchResultItem(url.hashCode(), 2, 3, score, 0),
|
||||
new EdgeUrl(url),
|
||||
title,
|
||||
description,
|
||||
quality,
|
||||
"HTML5",
|
||||
HtmlFeature.encode(features),
|
||||
null,
|
||||
url.hashCode(),
|
||||
400,
|
||||
positions,
|
||||
score,
|
||||
4,
|
||||
null)
|
||||
);
|
||||
}
|
||||
catch (Exception e) {
|
||||
throw new RuntimeException();
|
||||
}
|
||||
}
|
||||
|
||||
@BeforeAll
|
||||
public static void setup() throws URISyntaxException {
|
||||
if (!Boolean.getBoolean("runPaperDoll")) {
|
||||
return;
|
||||
}
|
||||
|
||||
HikariConfig config = new HikariConfig();
|
||||
config.setJdbcUrl(mariaDBContainer.getJdbcUrl());
|
||||
config.setUsername("wmsa");
|
||||
config.setPassword("wmsa");
|
||||
|
||||
dataSource = new HikariDataSource(config);
|
||||
|
||||
TestMigrationLoader.flywayMigration(dataSource);
|
||||
|
||||
System.setProperty("service-name", "search");
|
||||
System.setProperty("search.websiteUrl", "http://localhost:9999/");
|
||||
|
||||
try (var conn = dataSource.getConnection();
|
||||
var newsStmt = conn.prepareStatement("""
|
||||
INSERT INTO SEARCH_NEWS_FEED(TITLE, LINK, SOURCE, LIST_DATE)
|
||||
VALUES (?, ?, ?, ?)
|
||||
""");
|
||||
var domainStmt = conn.prepareStatement("""
|
||||
INSERT INTO EC_DOMAIN(ID, DOMAIN_NAME, DOMAIN_TOP, NODE_AFFINITY)
|
||||
VALUES (?, ?, ?, ?)
|
||||
""");
|
||||
var randomStmt = conn.prepareStatement("""
|
||||
INSERT INTO EC_RANDOM_DOMAINS(DOMAIN_ID, DOMAIN_SET)
|
||||
VALUES (?, ?)
|
||||
""")
|
||||
) {
|
||||
newsStmt.setString(1, "Lex Luthor elected president");
|
||||
newsStmt.setString(2, "https://www.example.com/foo");
|
||||
newsStmt.setString(3, "Daily Planet");
|
||||
newsStmt.setDate(4, new java.sql.Date(System.currentTimeMillis()));
|
||||
newsStmt.execute();
|
||||
|
||||
newsStmt.setString(1, "Besieged Alesian onlookers confused as Caesar builds a wall around his wall around the city walls");
|
||||
newsStmt.setString(2, "https://www.example2.com/bar");
|
||||
newsStmt.setString(3, "The Gaulish Observer");
|
||||
newsStmt.setDate(4, new java.sql.Date(System.currentTimeMillis()));
|
||||
newsStmt.execute();
|
||||
|
||||
newsStmt.setString(1, "Marginalia acquires Google");
|
||||
newsStmt.setString(2, "https://www.example3.com/baz");
|
||||
newsStmt.setString(3, "The Dependent");
|
||||
newsStmt.setDate(4, new java.sql.Date(System.currentTimeMillis()));
|
||||
newsStmt.execute();
|
||||
|
||||
domainStmt.setInt(1, 1);
|
||||
domainStmt.setString(2, "www.example.com");
|
||||
domainStmt.setString(3, "example.com");
|
||||
domainStmt.setInt(4, 1);
|
||||
domainStmt.execute();
|
||||
|
||||
domainStmt.setInt(1, 2);
|
||||
domainStmt.setString(2, "www.example2.com");
|
||||
domainStmt.setString(3, "example2.com");
|
||||
domainStmt.setInt(4, 2);
|
||||
domainStmt.execute();
|
||||
|
||||
domainStmt.setInt(1, 3);
|
||||
domainStmt.setString(2, "www.example3.com");
|
||||
domainStmt.setString(3, "example3.com");
|
||||
domainStmt.setInt(4, 3);
|
||||
domainStmt.execute();
|
||||
|
||||
randomStmt.setInt(1, 1);
|
||||
randomStmt.setInt(2, 0);
|
||||
randomStmt.execute();
|
||||
|
||||
randomStmt.setInt(1, 2);
|
||||
randomStmt.setInt(2, 0);
|
||||
randomStmt.execute();
|
||||
|
||||
randomStmt.setInt(1, 3);
|
||||
randomStmt.setInt(2, 0);
|
||||
randomStmt.execute();
|
||||
} catch (SQLException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
|
||||
searchResponse = new QueryResponse(
|
||||
new SearchSpecification(new SearchQuery(), List.of(), "", "test",
|
||||
SpecificationLimit.none(),
|
||||
SpecificationLimit.none(),
|
||||
SpecificationLimit.none(),
|
||||
SpecificationLimit.none(),
|
||||
new QueryLimits(10, 20, 3, 4),
|
||||
QueryStrategy.AUTO,
|
||||
ResultRankingParameters.sensibleDefaults()
|
||||
),
|
||||
results,
|
||||
List.of(),
|
||||
List.of(),
|
||||
1,
|
||||
1,
|
||||
null
|
||||
);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void run() throws Exception {
|
||||
if (!Boolean.getBoolean("runPaperDoll")) {
|
||||
return;
|
||||
}
|
||||
|
||||
var injector = Guice.createInjector(
|
||||
new ServiceConfigurationModule(ServiceId.Search),
|
||||
new SearchModule(),
|
||||
this);
|
||||
|
||||
injector.getInstance(SearchService.class);
|
||||
|
||||
List<String> suggestions = List.of("foo", "bar", "baz");
|
||||
|
||||
Spark.get("/suggest/", (rq, rsp) -> {
|
||||
rsp.type("application/json");
|
||||
return gson.toJson(suggestions);
|
||||
});
|
||||
|
||||
Spark.get("/screenshot/*", (rq, rsp) -> {
|
||||
rsp.type("image/svg+xml");
|
||||
return """
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||
<svg
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
width="640px"
|
||||
height="480px"
|
||||
viewBox="0 0 640 480"
|
||||
version="1.1">
|
||||
<g>
|
||||
<rect
|
||||
style="fill:#808080"
|
||||
id="rect288"
|
||||
width="595.41992"
|
||||
height="430.01825"
|
||||
x="23.034981"
|
||||
y="27.850344" />
|
||||
<text
|
||||
xml:space="preserve"
|
||||
style="font-size:100px;fill:#909090;font-family:sans-serif;"
|
||||
x="20"
|
||||
y="120">Placeholder</text>
|
||||
<text
|
||||
xml:space="preserve"
|
||||
style="font-size:32px;fill:#000000;font-family:monospace;"
|
||||
x="320" y="240" dominant-baseline="middle" text-anchor="middle">Lorem Ipsum As F</text>
|
||||
</g>
|
||||
</svg>
|
||||
""";
|
||||
});
|
||||
|
||||
registerSearchResult("https://www.example.com/foo", "Foo", "Lorem ipsum dolor sit amet", Set.of(), 0.5, 0.5, ~0L);
|
||||
registerSearchResult("https://www.example2.com/bar", "Bar", "Some text goes here", Set.of(), 0.5, 0.5, 1L);
|
||||
registerSearchResult("https://www.example3.com/baz", "All HTML Features", "This one's got every feature", EnumSet.allOf(HtmlFeature.class), 0.5, 0.5, 1L);
|
||||
|
||||
|
||||
|
||||
|
||||
dummyLinks.add(new SimilarDomain(
|
||||
new EdgeUrl("https://www.example.com/foo"),
|
||||
1,
|
||||
0.5,
|
||||
0.5,
|
||||
true,
|
||||
true,
|
||||
true,
|
||||
true,
|
||||
SimilarDomain.LinkType.FOWARD
|
||||
));
|
||||
dummyLinks.add(new SimilarDomain(
|
||||
new EdgeUrl("https://www.example2.com/foo"),
|
||||
2,
|
||||
0.5,
|
||||
1,
|
||||
false,
|
||||
false,
|
||||
true,
|
||||
true,
|
||||
SimilarDomain.LinkType.BACKWARD
|
||||
));
|
||||
dummyLinks.add(new SimilarDomain(
|
||||
new EdgeUrl("https://www.example3.com/foo"),
|
||||
3,
|
||||
0,
|
||||
0.5,
|
||||
false,
|
||||
false,
|
||||
false,
|
||||
false,
|
||||
SimilarDomain.LinkType.BIDIRECTIONAL
|
||||
));
|
||||
|
||||
|
||||
for (;;);
|
||||
}
|
||||
|
||||
public void configure() {
|
||||
try {
|
||||
var serviceRegistry = Mockito.mock(ServiceRegistryIf.class);
|
||||
when(serviceRegistry.registerService(any(), any(), any())).thenReturn(new ServiceEndpoint("localhost", 9999));
|
||||
|
||||
bind(ServiceRegistryIf.class).toInstance(serviceRegistry);
|
||||
bind(HikariDataSource.class).toInstance(dataSource);
|
||||
|
||||
var qsMock = Mockito.mock(QueryClient.class);
|
||||
when(qsMock.search(any())).thenReturn(searchResponse);
|
||||
bind(QueryClient.class).toInstance(qsMock);
|
||||
|
||||
var asMock = Mockito.mock(DomainInfoClient.class);
|
||||
|
||||
when(asMock.isAccepting()).thenReturn(true);
|
||||
when(asMock.linkedDomains(anyInt(), anyInt())).thenReturn(CompletableFuture.completedFuture(dummyLinks));
|
||||
when(asMock.similarDomains(anyInt(), anyInt())).thenReturn(CompletableFuture.completedFuture(dummyLinks));
|
||||
when(asMock.domainInformation(anyInt())).thenReturn(CompletableFuture.completedFuture(
|
||||
new DomainInformation(new EdgeDomain("www.example.com"),
|
||||
false,
|
||||
123,
|
||||
123,
|
||||
123,
|
||||
123,
|
||||
123,
|
||||
1,
|
||||
0.5,
|
||||
false,
|
||||
false,
|
||||
false,
|
||||
"127.0.0.1",
|
||||
1,
|
||||
"ACME",
|
||||
"CA",
|
||||
"CA",
|
||||
"Exemplary")
|
||||
));
|
||||
|
||||
bind(DomainInfoClient.class).toInstance(asMock);
|
||||
|
||||
var sss = Mockito.mock(ScreenshotService.class);
|
||||
when(sss.hasScreenshot(anyInt())).thenReturn(true);
|
||||
bind(ScreenshotService.class).toInstance(sss);
|
||||
}
|
||||
catch (Exception e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@@ -197,7 +197,6 @@ public class SearchSiteInfoService {
|
||||
|
||||
var domain = new EdgeDomain(domainName);
|
||||
final int domainId = domainQueries.tryGetDomainId(domain).orElse(-1);
|
||||
boolean viableAliasDomain = domain.aliasDomain().map(alias -> domainQueries.tryGetDomainId(alias).isPresent()).orElse(false);
|
||||
|
||||
final Future<DomainInformation> domainInfoFuture;
|
||||
final Future<List<SimilarDomain>> similarSetFuture;
|
||||
@@ -232,9 +231,10 @@ public class SearchSiteInfoService {
|
||||
url = sampleResults.getFirst().url.withPathAndParam("/", null).toString();
|
||||
}
|
||||
|
||||
|
||||
var result = new SiteInfoWithContext(domainName,
|
||||
isSubscribed,
|
||||
viableAliasDomain ? domain.aliasDomain().map(EdgeDomain::toString) : Optional.empty(),
|
||||
domainQueries.otherSubdomains(domain, 5),
|
||||
domainId,
|
||||
url,
|
||||
hasScreenshot,
|
||||
@@ -352,7 +352,7 @@ public class SearchSiteInfoService {
|
||||
|
||||
public record SiteInfoWithContext(String domain,
|
||||
boolean isSubscribed,
|
||||
Optional<String> aliasDomain,
|
||||
List<EdgeDomain> siblingDomains,
|
||||
int domainId,
|
||||
String siteUrl,
|
||||
boolean hasScreenshot,
|
||||
|
@@ -1,3 +1,4 @@
|
||||
@import nu.marginalia.model.EdgeDomain
|
||||
@import nu.marginalia.search.svc.SearchSiteInfoService
|
||||
@import nu.marginalia.search.svc.SearchSiteInfoService.*
|
||||
@import nu.marginalia.search.model.UrlDetails
|
||||
@@ -13,18 +14,18 @@
|
||||
<span>${siteInfo.domain()}</span>
|
||||
<div class="grow">
|
||||
</div>
|
||||
<a rel="nofollow noopener external" href="${siteInfo.siteUrl()}" class="fa-solid fa-arrow-up-right-from-square" ></a>
|
||||
<a href="https://web.archive.org/web/*/${siteInfo.domain()}"
|
||||
class="p-1.5 text-white px-4"
|
||||
title="Wayback Machine">
|
||||
<i class="fas fa-clock-rotate-left text-sm"></i>
|
||||
</a>
|
||||
<a title="Visit ${siteInfo.domain()}" rel="nofollow noopener external" href="${siteInfo.siteUrl()}" class="fa-solid fa-arrow-up-right-from-square" ></a>
|
||||
</div>
|
||||
|
||||
@if (siteInfo.hasScreenshot())
|
||||
<a class="mx-3 " tabindex="-1" rel="nofollow noopener external" href="${siteInfo.siteUrl()}">
|
||||
<img class="border dark:border-gray-600 shadow-inner" src="/screenshot/${siteInfo.domainId()}" alt="Screenshot of ${siteInfo.domain()}">
|
||||
</a>
|
||||
@elseif (siteInfo.aliasDomain().isPresent())
|
||||
<div class="mx-3 my-3 text-xs text-slate-800 dark:text-white">
|
||||
The search engine is also aware of links to <a class="underline text-liteblue dark:text-blue-200" href="/site/${siteInfo.aliasDomain().get()}">${siteInfo.aliasDomain().get()}</a>,
|
||||
this may be the canonical address.
|
||||
</div>
|
||||
@endif
|
||||
|
||||
@if (siteInfo.hasFeed())
|
||||
@@ -80,6 +81,30 @@
|
||||
@endif
|
||||
|
||||
|
||||
@if (!siteInfo.siblingDomains().isEmpty())
|
||||
<div class="mx-3 flex place-items-baseline space-x-2 p-2 bg-gray-100 dark:bg-gray-600 rounded">
|
||||
<i class="fas fa-globe"></i>
|
||||
<span>Related Subdomains</span>
|
||||
</div>
|
||||
|
||||
<table class="min-w-full divide-y divide-gray-200 dark:divide-gray-600 mx-4">
|
||||
<thead>
|
||||
<tr class="bg-gray-50 dark:bg-gray-700">
|
||||
<th scope="col" class="px-2 py-2 text-left text-xs font-medium text-gray-500 dark:text-gray-100 uppercase tracking-wider">Domain Name</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody class="bg-white dark:bg-gray-800 divide-y divide-gray-200 dark:divide-gray-600 text-xs">
|
||||
@for (EdgeDomain sibling : siteInfo.siblingDomains())
|
||||
<tr>
|
||||
<td class="px-3 py-6 md:py-3 whitespace-nowrap">
|
||||
<a class="text-liteblue dark:text-blue-200" href="/site/${sibling.toString()}">${sibling.toString()}</a>
|
||||
</td>
|
||||
</tr>
|
||||
@endfor
|
||||
</tbody>
|
||||
</table>
|
||||
@endif
|
||||
|
||||
@if (siteInfo.domainInformation().isUnknownDomain())
|
||||
<div class="mx-3 flex place-items-baseline space-x-2 p-2 bg-gray-100 dark:bg-gray-600 rounded">
|
||||
<i class="fa-regular fa-circle-question"></i>
|
||||
|
@@ -8,8 +8,8 @@
|
||||
<ShortName>Marginalia</ShortName>
|
||||
<Description>Search Marginalia</Description>
|
||||
<InputEncoding>UTF-8</InputEncoding>
|
||||
<Image width="16" height="16" type="image/x-icon">https://search.marginalia.nu/favicon.ico</Image>
|
||||
<Image width="16" height="16" type="image/x-icon">https://marginalia-search.com/favicon.ico</Image>
|
||||
<Url type="text/html" method="get"
|
||||
template="https://search.marginalia.nu/search?query={searchTerms}&ref=opensearch"/>
|
||||
<moz:SearchForm>https://search.marginalia.nu/</moz:SearchForm>
|
||||
template="https://marginalia-search.com/search?query={searchTerms}&ref=opensearch"/>
|
||||
<moz:SearchForm>https://marginalia-search.com/</moz:SearchForm>
|
||||
</OpenSearchDescription>
|
@@ -18,7 +18,6 @@ import nu.marginalia.search.svc.SearchSiteInfoService;
|
||||
import java.net.URISyntaxException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
import java.util.concurrent.ThreadLocalRandom;
|
||||
|
||||
public class MockedSearchResults {
|
||||
@@ -133,7 +132,9 @@ public class MockedSearchResults {
|
||||
return new SearchSiteInfoService.SiteInfoWithContext(
|
||||
"www.example.com",
|
||||
false,
|
||||
Optional.of("other.example.com"),
|
||||
List.of(new EdgeDomain("example.com"),
|
||||
new EdgeDomain("about.example.com")
|
||||
),
|
||||
14,
|
||||
"https://www.example.com",
|
||||
true,
|
||||
|
@@ -11,10 +11,13 @@ platforms, but for lack of suitable hardware, this can not be guaranteed.
|
||||
**Docker** - It is a bit of a pain to install, but if you follow
|
||||
[this guide](https://docs.docker.com/engine/install/ubuntu/#install-using-the-repository) you're on the right track for ubuntu-like systems.
|
||||
|
||||
**JDK 21** - The code uses Java 21 preview features.
|
||||
**JDK 23** - The code uses Java 23 preview features.
|
||||
|
||||
The civilized way of installing this is to use [SDKMAN](https://sdkman.io/);
|
||||
graalce is a good distribution choice but it doesn't matter too much.
|
||||
|
||||
**Tailwindcss** - Install NPM and run `npm install -D tailwindcss`
|
||||
|
||||
## Quick Set up
|
||||
|
||||
[https://docs.marginalia.nu/](https://docs.marginalia.nu/) has a more comprehensive guide for the install
|
||||
|
@@ -701,7 +701,7 @@ public abstract class AbstractRssReader<C extends Channel, I extends Item> {
|
||||
}
|
||||
}
|
||||
} catch (XMLStreamException e) {
|
||||
LOGGER.log(Level.WARNING, "Failed to parse XML.", e);
|
||||
LOGGER.log(Level.FINE, "Failed to parse XML.", e);
|
||||
}
|
||||
|
||||
close();
|
||||
|
@@ -258,6 +258,13 @@ if __name__ == '__main__':
|
||||
deploy_tier=2,
|
||||
groups={"all", "frontend", "core"}
|
||||
),
|
||||
'search-legacy': ServiceConfig(
|
||||
gradle_target=':code:services-application:search-service-legacy:docker',
|
||||
docker_name='search-service-legacy',
|
||||
instances=None,
|
||||
deploy_tier=3,
|
||||
groups={"all", "frontend", "core"}
|
||||
),
|
||||
'api': ServiceConfig(
|
||||
gradle_target=':code:services-application:api-service:docker',
|
||||
docker_name='api-service',
|
||||
|
Reference in New Issue
Block a user