1
1
mirror of https://github.com/MarginaliaSearch/MarginaliaSearch.git synced 2025-10-06 07:32:38 +02:00

Compare commits

...

5 Commits

Author SHA1 Message Date
Viktor Lofgren
b03c43224c (search) Fix redirects in new search UI 2025-07-11 23:44:45 +02:00
Viktor Lofgren
9b4ce9e9eb (search) Fix !w redirect 2025-07-11 23:28:09 +02:00
Viktor
81ac02a695 Merge pull request #209 from us3r1d/master
added converter.insertFoundDomains property
2025-07-11 21:34:04 +02:00
krystal
47f624fb3b changed converter.insertFoundDomains to loader.insertFoundDomains 2025-07-11 12:13:45 -07:00
krystal
c866f19cbb added converter.insertFoundDomains property 2025-07-10 15:36:59 -07:00
6 changed files with 74 additions and 23 deletions

View File

@@ -47,6 +47,8 @@ public class ScrapeFeedsActor extends RecordActorPrototype {
private final Path feedPath = WmsaHome.getHomePath().resolve("data/scrape-urls.txt"); private final Path feedPath = WmsaHome.getHomePath().resolve("data/scrape-urls.txt");
private static boolean insertFoundDomains = Boolean.getBoolean("loader.insertFoundDomains");
public record Initial() implements ActorStep {} public record Initial() implements ActorStep {}
@Resume(behavior = ActorResumeBehavior.RETRY) @Resume(behavior = ActorResumeBehavior.RETRY)
public record Wait(String ts) implements ActorStep {} public record Wait(String ts) implements ActorStep {}
@@ -57,6 +59,8 @@ public class ScrapeFeedsActor extends RecordActorPrototype {
public ActorStep transition(ActorStep self) throws Exception { public ActorStep transition(ActorStep self) throws Exception {
return switch(self) { return switch(self) {
case Initial() -> { case Initial() -> {
if (!insertFoundDomains) yield new Error("Domain insertion prohibited, aborting");
if (nodeConfigurationService.get(nodeId).profile() != NodeProfile.REALTIME) { if (nodeConfigurationService.get(nodeId).profile() != NodeProfile.REALTIME) {
yield new Error("Invalid node profile for RSS update"); yield new Error("Invalid node profile for RSS update");
} }

View File

@@ -40,6 +40,8 @@ public class LoaderMain extends ProcessMainClass {
private final KeywordLoaderService keywordLoaderService; private final KeywordLoaderService keywordLoaderService;
private final DocumentLoaderService documentLoaderService; private final DocumentLoaderService documentLoaderService;
private static boolean insertFoundDomains = Boolean.getBoolean("loader.insertFoundDomains");
public static void main(String... args) { public static void main(String... args) {
try { try {
new org.mariadb.jdbc.Driver(); new org.mariadb.jdbc.Driver();
@@ -99,6 +101,10 @@ public class LoaderMain extends ProcessMainClass {
try { try {
var results = ForkJoinPool.commonPool() var results = ForkJoinPool.commonPool()
.invokeAll(List.of());
if ( true == insertFoundDomains ) {
results = ForkJoinPool.commonPool()
.invokeAll( .invokeAll(
List.of( List.of(
() -> linksService.loadLinks(domainIdRegistry, heartbeat, inputData), () -> linksService.loadLinks(domainIdRegistry, heartbeat, inputData),
@@ -107,6 +113,17 @@ public class LoaderMain extends ProcessMainClass {
() -> domainService.loadDomainMetadata(domainIdRegistry, heartbeat, inputData) () -> domainService.loadDomainMetadata(domainIdRegistry, heartbeat, inputData)
) )
); );
}
else {
results = ForkJoinPool.commonPool()
.invokeAll(
List.of(
() -> keywordLoaderService.loadKeywords(domainIdRegistry, heartbeat, inputData),
() -> documentLoaderService.loadDocuments(domainIdRegistry, heartbeat, inputData),
() -> domainService.loadDomainMetadata(domainIdRegistry, heartbeat, inputData)
)
);
}
for (var result : results) { for (var result : results) {
if (result.state() == Future.State.FAILED) { if (result.state() == Future.State.FAILED) {

View File

@@ -25,6 +25,8 @@ import java.util.Set;
@Singleton @Singleton
public class DomainLoaderService { public class DomainLoaderService {
private static boolean insertFoundDomains = Boolean.getBoolean("loader.insertFoundDomains");
private final HikariDataSource dataSource; private final HikariDataSource dataSource;
private final Logger logger = LoggerFactory.getLogger(DomainLoaderService.class); private final Logger logger = LoggerFactory.getLogger(DomainLoaderService.class);
private final int nodeId; private final int nodeId;
@@ -84,6 +86,9 @@ public class DomainLoaderService {
// Add domains that are linked to from the domains we've just crawled, but with -1 affinity meaning they // Add domains that are linked to from the domains we've just crawled, but with -1 affinity meaning they
// can be grabbed by any index node // can be grabbed by any index node
if ( true == insertFoundDomains ) {
logger.info("Adding found domains");
try (var inserter = new DomainInserter(conn, -1); try (var inserter = new DomainInserter(conn, -1);
var processHeartbeat = heartbeat.createAdHocTaskHeartbeat("INSERT_LINKED_DOMAINS")) { var processHeartbeat = heartbeat.createAdHocTaskHeartbeat("INSERT_LINKED_DOMAINS")) {
// Add linked domains, but with -1 affinity meaning they can be grabbed by any index node // Add linked domains, but with -1 affinity meaning they can be grabbed by any index node
@@ -101,8 +106,14 @@ public class DomainLoaderService {
} }
} }
} }
} }
} }
}
else {
logger.info("Skipping found domains");
}
taskHeartbeat.progress(Steps.UPDATE_AFFINITY_AND_IP); taskHeartbeat.progress(Steps.UPDATE_AFFINITY_AND_IP);

View File

@@ -20,7 +20,7 @@ public class BangCommand implements SearchCommandInterface {
{ {
bangsToPattern.put("!g", "https://www.google.com/search?q=%s"); bangsToPattern.put("!g", "https://www.google.com/search?q=%s");
bangsToPattern.put("!ddg", "https://duckduckgo.com/?q=%s"); bangsToPattern.put("!ddg", "https://duckduckgo.com/?q=%s");
bangsToPattern.put("!w", "https://search.marginalia.nu/search?query=%s+site:en.wikipedia.org&profile=wiki"); bangsToPattern.put("!w", "https://old-search.marginalia.nu/search?query=%s+site:en.wikipedia.org&profile=wiki");
} }
@Override @Override

View File

@@ -20,7 +20,7 @@ public class BangCommand implements SearchCommandInterface {
{ {
bangsToPattern.put("!g", "https://www.google.com/search?q=%s"); bangsToPattern.put("!g", "https://www.google.com/search?q=%s");
bangsToPattern.put("!ddg", "https://duckduckgo.com/?q=%s"); bangsToPattern.put("!ddg", "https://duckduckgo.com/?q=%s");
bangsToPattern.put("!w", "https://search.marginalia.nu/search?query=%s+site:en.wikipedia.org&profile=wiki"); bangsToPattern.put("!w", "/search?query=%s+site:en.wikipedia.org");
} }
@Override @Override
@@ -34,7 +34,7 @@ public class BangCommand implements SearchCommandInterface {
if (match.isPresent()) { if (match.isPresent()) {
var url = String.format(redirectPattern, URLEncoder.encode(match.get(), StandardCharsets.UTF_8)); var url = String.format(redirectPattern, URLEncoder.encode(match.get(), StandardCharsets.UTF_8));
new MapModelAndView("redirect.jte", Map.of("url", url)); return Optional.of(new MapModelAndView("redirect.jte", Map.of("url", url)));
} }
} }

View File

@@ -0,0 +1,19 @@
package nu.marginalia.search.command.commands;
import nu.marginalia.WebsiteUrl;
import nu.marginalia.search.command.SearchParameters;
import org.junit.jupiter.api.Test;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertTrue;
class BangCommandTest {
@Test
void testWikipediaRedirect() {
BangCommand bc = new BangCommand();
assertTrue(bc.process(SearchParameters.defaultsForQuery(new WebsiteUrl("test"), "!w plato", 1)).isPresent());
assertFalse(bc.process(SearchParameters.defaultsForQuery(new WebsiteUrl("test"), "plato", 1)).isPresent());
}
}