1
1
mirror of https://github.com/MarginaliaSearch/MarginaliaSearch.git synced 2025-10-06 17:32:39 +02:00

Compare commits

...

6 Commits

Author SHA1 Message Date
Viktor Lofgren
94e1aa0baf (search) Tweak token formation to still break apart emails in brackets. 2025-01-05 20:55:44 +01:00
Viktor Lofgren
b62f043910 (search) Adjust token formation rules to be more lenient to C++ and PHP code.
This addresses Issue #142
2025-01-05 20:50:27 +01:00
Viktor
9b2ceaf37c Merge pull request #141 from MarginaliaSearch/vlofgren-patch-1
Update FUNDING.yml
2025-01-05 18:40:20 +01:00
Viktor
8019c2ce18 Update FUNDING.yml 2025-01-05 18:40:06 +01:00
Viktor Lofgren
4da3563d8a (service) Clean up exceptions when requestScreengrab is not available 2025-01-04 14:45:51 +01:00
Viktor Lofgren
48d0a3089a (service) Improve logging around grpc
This change adds a marker for the gRPC-specific logging, as well as improves the clarity and meaningfulness of the log messages.
2025-01-02 20:40:53 +01:00
9 changed files with 71 additions and 12 deletions

1
.github/FUNDING.yml vendored
View File

@@ -1,5 +1,6 @@
# These are supported funding model platforms
polar: marginalia-search
github: MarginaliaSearch
patreon: marginalia_nu
open_collective: # Replace with a single Open Collective username

View File

@@ -7,8 +7,6 @@ import nu.marginalia.service.discovery.property.PartitionTraits;
import nu.marginalia.service.discovery.property.ServiceEndpoint;
import nu.marginalia.service.discovery.property.ServiceKey;
import nu.marginalia.service.discovery.property.ServicePartition;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.List;
import java.util.concurrent.CompletableFuture;
@@ -24,7 +22,7 @@ import java.util.function.Function;
public class GrpcMultiNodeChannelPool<STUB> {
private final ConcurrentHashMap<Integer, GrpcSingleNodeChannelPool<STUB>> pools =
new ConcurrentHashMap<>();
private static final Logger logger = LoggerFactory.getLogger(GrpcMultiNodeChannelPool.class);
private final ServiceRegistryIf serviceRegistryIf;
private final ServiceKey<? extends PartitionTraits.Multicast> serviceKey;
private final Function<ServiceEndpoint.InstanceAddress, ManagedChannel> channelConstructor;

View File

@@ -10,6 +10,8 @@ import nu.marginalia.service.discovery.property.ServiceKey;
import org.jetbrains.annotations.NotNull;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.slf4j.Marker;
import org.slf4j.MarkerFactory;
import java.time.Duration;
import java.util.*;
@@ -26,6 +28,7 @@ import java.util.function.Function;
public class GrpcSingleNodeChannelPool<STUB> extends ServiceChangeMonitor {
private final Map<InstanceAddress, ConnectionHolder> channels = new ConcurrentHashMap<>();
private final Marker grpcMarker = MarkerFactory.getMarker("GRPC");
private static final Logger logger = LoggerFactory.getLogger(GrpcSingleNodeChannelPool.class);
private final ServiceRegistryIf serviceRegistryIf;
@@ -59,10 +62,10 @@ public class GrpcSingleNodeChannelPool<STUB> extends ServiceChangeMonitor {
for (var route : Sets.symmetricDifference(oldRoutes, newRoutes)) {
ConnectionHolder oldChannel;
if (newRoutes.contains(route)) {
logger.info("Adding route {}", route);
logger.info(grpcMarker, "Adding route {} => {}", serviceKey, route);
oldChannel = channels.put(route, new ConnectionHolder(route));
} else {
logger.info("Expelling route {}", route);
logger.info(grpcMarker, "Expelling route {} => {}", serviceKey, route);
oldChannel = channels.remove(route);
}
if (oldChannel != null) {
@@ -100,7 +103,7 @@ public class GrpcSingleNodeChannelPool<STUB> extends ServiceChangeMonitor {
}
try {
logger.info("Creating channel for {}:{}", serviceKey, address);
logger.info(grpcMarker, "Creating channel for {} => {}", serviceKey, address);
value = channelConstructor.apply(address);
if (channel.compareAndSet(null, value)) {
return value;
@@ -111,7 +114,7 @@ public class GrpcSingleNodeChannelPool<STUB> extends ServiceChangeMonitor {
}
}
catch (Exception e) {
logger.error("Failed to get channel for " + address, e);
logger.error(grpcMarker, "Failed to get channel for " + address, e);
return null;
}
}
@@ -203,7 +206,7 @@ public class GrpcSingleNodeChannelPool<STUB> extends ServiceChangeMonitor {
}
for (var e : exceptions) {
logger.error("Failed to call service {}", serviceKey, e);
logger.error(grpcMarker, "Failed to call service {}", serviceKey, e);
}
throw new ServiceNotAvailableException(serviceKey);

View File

@@ -4,6 +4,11 @@ import nu.marginalia.service.discovery.property.ServiceKey;
public class ServiceNotAvailableException extends RuntimeException {
public ServiceNotAvailableException(ServiceKey<?> key) {
super("Service " + key + " not available");
super(key.toString());
}
@Override
public StackTraceElement[] getStackTrace() { // Suppress stack trace
return new StackTraceElement[0];
}
}

View File

@@ -48,5 +48,10 @@ public record ServiceEndpoint(String host, int port) {
public int port() {
return endpoint.port();
}
@Override
public String toString() {
return endpoint().host() + ":" + endpoint.port() + " [" + instance + "]";
}
}
}

View File

@@ -48,6 +48,19 @@ public sealed interface ServiceKey<P extends ServicePartition> {
{
throw new UnsupportedOperationException();
}
@Override
public String toString() {
final String shortName;
int periodIndex = name.lastIndexOf('.');
if (periodIndex >= 0) shortName = name.substring(periodIndex+1);
else shortName = name;
return "rest:" + shortName;
}
}
record Grpc<P extends ServicePartition>(String name, P partition) implements ServiceKey<P> {
public String baseName() {
@@ -64,6 +77,18 @@ public sealed interface ServiceKey<P extends ServicePartition> {
{
return new Grpc<>(name, partition);
}
@Override
public String toString() {
final String shortName;
int periodIndex = name.lastIndexOf('.');
if (periodIndex >= 0) shortName = name.substring(periodIndex+1);
else shortName = name;
return "grpc:" + shortName + "[" + partition.identifier() + "]";
}
}
}

View File

@@ -5,6 +5,7 @@ import com.google.inject.Singleton;
import nu.marginalia.api.livecapture.LiveCaptureApiGrpc.LiveCaptureApiBlockingStub;
import nu.marginalia.service.client.GrpcChannelPoolFactory;
import nu.marginalia.service.client.GrpcSingleNodeChannelPool;
import nu.marginalia.service.client.ServiceNotAvailableException;
import nu.marginalia.service.discovery.property.ServiceKey;
import nu.marginalia.service.discovery.property.ServicePartition;
import org.slf4j.Logger;
@@ -29,6 +30,9 @@ public class LiveCaptureClient {
channelPool.call(LiveCaptureApiBlockingStub::requestScreengrab)
.run(RpcDomainId.newBuilder().setDomainId(domainId).build());
}
catch (ServiceNotAvailableException e) {
logger.info("requestScreengrab() failed since the service is not available");
}
catch (Exception e) {
logger.error("API Exception", e);
}

View File

@@ -27,7 +27,7 @@ public class SentenceSegmentSplitter {
else {
// If we flatten unicode, we do this...
// FIXME: This can almost definitely be cleaned up and simplified.
wordBreakPattern = Pattern.compile("([^/_#@.a-zA-Z'+\\-0-9\\u00C0-\\u00D6\\u00D8-\\u00f6\\u00f8-\\u00ff]+)|[|]|(\\.(\\s+|$))");
wordBreakPattern = Pattern.compile("([^/<>$:_#@.a-zA-Z'+\\-0-9\\u00C0-\\u00D6\\u00D8-\\u00f6\\u00f8-\\u00ff]+)|[|]|(\\.(\\s+|$))");
}
}
@@ -90,12 +90,16 @@ public class SentenceSegmentSplitter {
for (int i = 0; i < ret.size(); i++) {
String part = ret.get(i);
if (part.startsWith("'") && part.length() > 1) {
if (part.startsWith("<") && part.endsWith(">") && part.length() > 2) {
ret.set(i, part.substring(1, part.length() - 1));
}
else if (part.startsWith("'") && part.length() > 1) {
ret.set(i, part.substring(1));
}
if (part.endsWith("'") && part.length() > 1) {
else if (part.endsWith("'") && part.length() > 1) {
ret.set(i, part.substring(0, part.length()-1));
}
while (part.endsWith(".")) {
part = part.substring(0, part.length()-1);
ret.set(i, part);

View File

@@ -28,6 +28,20 @@ class SentenceExtractorTest {
System.out.println(dld);
}
@Test
void testCplusplus() {
var dld = sentenceExtractor.extractSentence("std::vector", EnumSet.noneOf(HtmlTag.class));
assertEquals(1, dld.length());
assertEquals("std::vector", dld.wordsLowerCase[0]);
}
@Test
void testPHP() {
var dld = sentenceExtractor.extractSentence("$_GET", EnumSet.noneOf(HtmlTag.class));
assertEquals(1, dld.length());
assertEquals("$_get", dld.wordsLowerCase[0]);
}
@Test
void testPolishArtist() {
var dld = sentenceExtractor.extractSentence("Uklański", EnumSet.noneOf(HtmlTag.class));