mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-10-06 07:32:38 +02:00
Compare commits
5 Commits
deploy-027
...
deploy-027
Author | SHA1 | Date | |
---|---|---|---|
|
b03c43224c | ||
|
9b4ce9e9eb | ||
|
81ac02a695 | ||
|
47f624fb3b | ||
|
c866f19cbb |
@@ -47,6 +47,8 @@ public class ScrapeFeedsActor extends RecordActorPrototype {
|
|||||||
|
|
||||||
private final Path feedPath = WmsaHome.getHomePath().resolve("data/scrape-urls.txt");
|
private final Path feedPath = WmsaHome.getHomePath().resolve("data/scrape-urls.txt");
|
||||||
|
|
||||||
|
private static boolean insertFoundDomains = Boolean.getBoolean("loader.insertFoundDomains");
|
||||||
|
|
||||||
public record Initial() implements ActorStep {}
|
public record Initial() implements ActorStep {}
|
||||||
@Resume(behavior = ActorResumeBehavior.RETRY)
|
@Resume(behavior = ActorResumeBehavior.RETRY)
|
||||||
public record Wait(String ts) implements ActorStep {}
|
public record Wait(String ts) implements ActorStep {}
|
||||||
@@ -57,6 +59,8 @@ public class ScrapeFeedsActor extends RecordActorPrototype {
|
|||||||
public ActorStep transition(ActorStep self) throws Exception {
|
public ActorStep transition(ActorStep self) throws Exception {
|
||||||
return switch(self) {
|
return switch(self) {
|
||||||
case Initial() -> {
|
case Initial() -> {
|
||||||
|
if (!insertFoundDomains) yield new Error("Domain insertion prohibited, aborting");
|
||||||
|
|
||||||
if (nodeConfigurationService.get(nodeId).profile() != NodeProfile.REALTIME) {
|
if (nodeConfigurationService.get(nodeId).profile() != NodeProfile.REALTIME) {
|
||||||
yield new Error("Invalid node profile for RSS update");
|
yield new Error("Invalid node profile for RSS update");
|
||||||
}
|
}
|
||||||
|
@@ -40,6 +40,8 @@ public class LoaderMain extends ProcessMainClass {
|
|||||||
private final KeywordLoaderService keywordLoaderService;
|
private final KeywordLoaderService keywordLoaderService;
|
||||||
private final DocumentLoaderService documentLoaderService;
|
private final DocumentLoaderService documentLoaderService;
|
||||||
|
|
||||||
|
private static boolean insertFoundDomains = Boolean.getBoolean("loader.insertFoundDomains");
|
||||||
|
|
||||||
public static void main(String... args) {
|
public static void main(String... args) {
|
||||||
try {
|
try {
|
||||||
new org.mariadb.jdbc.Driver();
|
new org.mariadb.jdbc.Driver();
|
||||||
@@ -99,6 +101,10 @@ public class LoaderMain extends ProcessMainClass {
|
|||||||
|
|
||||||
try {
|
try {
|
||||||
var results = ForkJoinPool.commonPool()
|
var results = ForkJoinPool.commonPool()
|
||||||
|
.invokeAll(List.of());
|
||||||
|
|
||||||
|
if ( true == insertFoundDomains ) {
|
||||||
|
results = ForkJoinPool.commonPool()
|
||||||
.invokeAll(
|
.invokeAll(
|
||||||
List.of(
|
List.of(
|
||||||
() -> linksService.loadLinks(domainIdRegistry, heartbeat, inputData),
|
() -> linksService.loadLinks(domainIdRegistry, heartbeat, inputData),
|
||||||
@@ -107,6 +113,17 @@ public class LoaderMain extends ProcessMainClass {
|
|||||||
() -> domainService.loadDomainMetadata(domainIdRegistry, heartbeat, inputData)
|
() -> domainService.loadDomainMetadata(domainIdRegistry, heartbeat, inputData)
|
||||||
)
|
)
|
||||||
);
|
);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
results = ForkJoinPool.commonPool()
|
||||||
|
.invokeAll(
|
||||||
|
List.of(
|
||||||
|
() -> keywordLoaderService.loadKeywords(domainIdRegistry, heartbeat, inputData),
|
||||||
|
() -> documentLoaderService.loadDocuments(domainIdRegistry, heartbeat, inputData),
|
||||||
|
() -> domainService.loadDomainMetadata(domainIdRegistry, heartbeat, inputData)
|
||||||
|
)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
for (var result : results) {
|
for (var result : results) {
|
||||||
if (result.state() == Future.State.FAILED) {
|
if (result.state() == Future.State.FAILED) {
|
||||||
|
@@ -25,6 +25,8 @@ import java.util.Set;
|
|||||||
@Singleton
|
@Singleton
|
||||||
public class DomainLoaderService {
|
public class DomainLoaderService {
|
||||||
|
|
||||||
|
private static boolean insertFoundDomains = Boolean.getBoolean("loader.insertFoundDomains");
|
||||||
|
|
||||||
private final HikariDataSource dataSource;
|
private final HikariDataSource dataSource;
|
||||||
private final Logger logger = LoggerFactory.getLogger(DomainLoaderService.class);
|
private final Logger logger = LoggerFactory.getLogger(DomainLoaderService.class);
|
||||||
private final int nodeId;
|
private final int nodeId;
|
||||||
@@ -84,6 +86,9 @@ public class DomainLoaderService {
|
|||||||
|
|
||||||
// Add domains that are linked to from the domains we've just crawled, but with -1 affinity meaning they
|
// Add domains that are linked to from the domains we've just crawled, but with -1 affinity meaning they
|
||||||
// can be grabbed by any index node
|
// can be grabbed by any index node
|
||||||
|
if ( true == insertFoundDomains ) {
|
||||||
|
logger.info("Adding found domains");
|
||||||
|
|
||||||
try (var inserter = new DomainInserter(conn, -1);
|
try (var inserter = new DomainInserter(conn, -1);
|
||||||
var processHeartbeat = heartbeat.createAdHocTaskHeartbeat("INSERT_LINKED_DOMAINS")) {
|
var processHeartbeat = heartbeat.createAdHocTaskHeartbeat("INSERT_LINKED_DOMAINS")) {
|
||||||
// Add linked domains, but with -1 affinity meaning they can be grabbed by any index node
|
// Add linked domains, but with -1 affinity meaning they can be grabbed by any index node
|
||||||
@@ -101,8 +106,14 @@ public class DomainLoaderService {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
logger.info("Skipping found domains");
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
taskHeartbeat.progress(Steps.UPDATE_AFFINITY_AND_IP);
|
taskHeartbeat.progress(Steps.UPDATE_AFFINITY_AND_IP);
|
||||||
|
|
||||||
|
@@ -20,7 +20,7 @@ public class BangCommand implements SearchCommandInterface {
|
|||||||
{
|
{
|
||||||
bangsToPattern.put("!g", "https://www.google.com/search?q=%s");
|
bangsToPattern.put("!g", "https://www.google.com/search?q=%s");
|
||||||
bangsToPattern.put("!ddg", "https://duckduckgo.com/?q=%s");
|
bangsToPattern.put("!ddg", "https://duckduckgo.com/?q=%s");
|
||||||
bangsToPattern.put("!w", "https://search.marginalia.nu/search?query=%s+site:en.wikipedia.org&profile=wiki");
|
bangsToPattern.put("!w", "https://old-search.marginalia.nu/search?query=%s+site:en.wikipedia.org&profile=wiki");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@@ -20,7 +20,7 @@ public class BangCommand implements SearchCommandInterface {
|
|||||||
{
|
{
|
||||||
bangsToPattern.put("!g", "https://www.google.com/search?q=%s");
|
bangsToPattern.put("!g", "https://www.google.com/search?q=%s");
|
||||||
bangsToPattern.put("!ddg", "https://duckduckgo.com/?q=%s");
|
bangsToPattern.put("!ddg", "https://duckduckgo.com/?q=%s");
|
||||||
bangsToPattern.put("!w", "https://search.marginalia.nu/search?query=%s+site:en.wikipedia.org&profile=wiki");
|
bangsToPattern.put("!w", "/search?query=%s+site:en.wikipedia.org");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@@ -34,7 +34,7 @@ public class BangCommand implements SearchCommandInterface {
|
|||||||
|
|
||||||
if (match.isPresent()) {
|
if (match.isPresent()) {
|
||||||
var url = String.format(redirectPattern, URLEncoder.encode(match.get(), StandardCharsets.UTF_8));
|
var url = String.format(redirectPattern, URLEncoder.encode(match.get(), StandardCharsets.UTF_8));
|
||||||
new MapModelAndView("redirect.jte", Map.of("url", url));
|
return Optional.of(new MapModelAndView("redirect.jte", Map.of("url", url)));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -0,0 +1,19 @@
|
|||||||
|
package nu.marginalia.search.command.commands;
|
||||||
|
|
||||||
|
import nu.marginalia.WebsiteUrl;
|
||||||
|
import nu.marginalia.search.command.SearchParameters;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
|
import static org.junit.jupiter.api.Assertions.assertFalse;
|
||||||
|
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||||
|
|
||||||
|
class BangCommandTest {
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testWikipediaRedirect() {
|
||||||
|
BangCommand bc = new BangCommand();
|
||||||
|
|
||||||
|
assertTrue(bc.process(SearchParameters.defaultsForQuery(new WebsiteUrl("test"), "!w plato", 1)).isPresent());
|
||||||
|
assertFalse(bc.process(SearchParameters.defaultsForQuery(new WebsiteUrl("test"), "plato", 1)).isPresent());
|
||||||
|
}
|
||||||
|
}
|
Reference in New Issue
Block a user