mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-10-06 17:32:39 +02:00
Compare commits
21 Commits
deploy-014
...
deploy-015
Author | SHA1 | Date | |
---|---|---|---|
|
251006d4f9 | ||
|
c3e99dc12a | ||
|
aaaa2de022 | ||
|
fc1388422a | ||
|
b07080db16 | ||
|
e9d86dca4a | ||
|
1d693f0efa | ||
|
5874a163dc | ||
|
5ec7a1deab | ||
|
7fea2808ed | ||
|
8da74484f0 | ||
|
923d5a7234 | ||
|
58f88749b8 | ||
|
77f727a5ba | ||
|
667cfb53dc | ||
|
fe36d4ed20 | ||
|
acf4bef98d | ||
|
2a737c34bb | ||
|
90a577af82 | ||
|
f0c9b935d8 | ||
|
7b5493dd51 |
@@ -5,7 +5,7 @@ plugins {
|
|||||||
|
|
||||||
// This is a workaround for a bug in the Jib plugin that causes it to stall randomly
|
// This is a workaround for a bug in the Jib plugin that causes it to stall randomly
|
||||||
// https://github.com/GoogleContainerTools/jib/issues/3347
|
// https://github.com/GoogleContainerTools/jib/issues/3347
|
||||||
id 'com.google.cloud.tools.jib' version '3.4.4' apply(false)
|
id 'com.google.cloud.tools.jib' version '3.4.5' apply(false)
|
||||||
}
|
}
|
||||||
|
|
||||||
group 'marginalia'
|
group 'marginalia'
|
||||||
@@ -47,7 +47,7 @@ ext {
|
|||||||
dockerImageBase='container-registry.oracle.com/graalvm/jdk:24'
|
dockerImageBase='container-registry.oracle.com/graalvm/jdk:24'
|
||||||
dockerImageTag='latest'
|
dockerImageTag='latest'
|
||||||
dockerImageRegistry='marginalia'
|
dockerImageRegistry='marginalia'
|
||||||
jibVersion = '3.4.4'
|
jibVersion = '3.4.5'
|
||||||
}
|
}
|
||||||
|
|
||||||
idea {
|
idea {
|
||||||
|
@@ -1,16 +1,14 @@
|
|||||||
package nu.marginalia.model;
|
package nu.marginalia.model;
|
||||||
|
|
||||||
import nu.marginalia.util.QueryParams;
|
import nu.marginalia.util.QueryParams;
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
|
||||||
import javax.annotation.Nullable;
|
import javax.annotation.Nullable;
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
import java.net.MalformedURLException;
|
import java.net.*;
|
||||||
import java.net.URI;
|
import java.nio.charset.StandardCharsets;
|
||||||
import java.net.URISyntaxException;
|
|
||||||
import java.net.URL;
|
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
import java.util.regex.Pattern;
|
|
||||||
|
|
||||||
public class EdgeUrl implements Serializable {
|
public class EdgeUrl implements Serializable {
|
||||||
public final String proto;
|
public final String proto;
|
||||||
@@ -33,7 +31,7 @@ public class EdgeUrl implements Serializable {
|
|||||||
|
|
||||||
private static URI parseURI(String url) throws URISyntaxException {
|
private static URI parseURI(String url) throws URISyntaxException {
|
||||||
try {
|
try {
|
||||||
return new URI(urlencodeFixer(url));
|
return EdgeUriFactory.uriFromString(url);
|
||||||
} catch (URISyntaxException ex) {
|
} catch (URISyntaxException ex) {
|
||||||
throw new URISyntaxException("Failed to parse URI '" + url + "'", ex.getMessage());
|
throw new URISyntaxException("Failed to parse URI '" + url + "'", ex.getMessage());
|
||||||
}
|
}
|
||||||
@@ -51,58 +49,6 @@ public class EdgeUrl implements Serializable {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static Pattern badCharPattern = Pattern.compile("[ \t\n\"<>\\[\\]()',|]");
|
|
||||||
|
|
||||||
/* Java's URI parser is a bit too strict in throwing exceptions when there's an error.
|
|
||||||
|
|
||||||
Here on the Internet, standards are like the picture on the box of the frozen pizza,
|
|
||||||
and what you get is more like what's on the inside, we try to patch things instead,
|
|
||||||
just give it a best-effort attempt att cleaning out broken or unnecessary constructions
|
|
||||||
like bad or missing URLEncoding
|
|
||||||
*/
|
|
||||||
public static String urlencodeFixer(String url) throws URISyntaxException {
|
|
||||||
var s = new StringBuilder();
|
|
||||||
String goodChars = "&.?:/-;+$#";
|
|
||||||
String hexChars = "0123456789abcdefABCDEF";
|
|
||||||
|
|
||||||
int pathIdx = findPathIdx(url);
|
|
||||||
if (pathIdx < 0) { // url looks like http://marginalia.nu
|
|
||||||
return url + "/";
|
|
||||||
}
|
|
||||||
s.append(url, 0, pathIdx);
|
|
||||||
|
|
||||||
// We don't want the fragment, and multiple fragments breaks the Java URIParser for some reason
|
|
||||||
int end = url.indexOf("#");
|
|
||||||
if (end < 0) end = url.length();
|
|
||||||
|
|
||||||
for (int i = pathIdx; i < end; i++) {
|
|
||||||
int c = url.charAt(i);
|
|
||||||
|
|
||||||
if (goodChars.indexOf(c) >= 0 || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9')) {
|
|
||||||
s.appendCodePoint(c);
|
|
||||||
} else if (c == '%' && i + 2 < end) {
|
|
||||||
int cn = url.charAt(i + 1);
|
|
||||||
int cnn = url.charAt(i + 2);
|
|
||||||
if (hexChars.indexOf(cn) >= 0 && hexChars.indexOf(cnn) >= 0) {
|
|
||||||
s.appendCodePoint(c);
|
|
||||||
} else {
|
|
||||||
s.append("%25");
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
s.append(String.format("%%%02X", c));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return s.toString();
|
|
||||||
}
|
|
||||||
|
|
||||||
private static int findPathIdx(String url) throws URISyntaxException {
|
|
||||||
int colonIdx = url.indexOf(':');
|
|
||||||
if (colonIdx < 0 || colonIdx + 2 >= url.length()) {
|
|
||||||
throw new URISyntaxException(url, "Lacking protocol");
|
|
||||||
}
|
|
||||||
return url.indexOf('/', colonIdx + 2);
|
|
||||||
}
|
|
||||||
|
|
||||||
public EdgeUrl(URI URI) {
|
public EdgeUrl(URI URI) {
|
||||||
try {
|
try {
|
||||||
@@ -247,3 +193,123 @@ public class EdgeUrl implements Serializable {
|
|||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Java's URI parser is a bit too strict in throwing exceptions when there's an error.
|
||||||
|
|
||||||
|
Here on the Internet, standards are like the picture on the box of the frozen pizza,
|
||||||
|
and what you get is more like what's on the inside, we try to patch things instead,
|
||||||
|
just give it a best-effort attempt att cleaning out broken or unnecessary constructions
|
||||||
|
like bad or missing URLEncoding
|
||||||
|
*/
|
||||||
|
class EdgeUriFactory {
|
||||||
|
public static URI uriFromString(String url) throws URISyntaxException {
|
||||||
|
var s = new StringBuilder();
|
||||||
|
|
||||||
|
int pathIdx = findPathIdx(url);
|
||||||
|
if (pathIdx < 0) { // url looks like http://marginalia.nu
|
||||||
|
return new URI(url + "/");
|
||||||
|
}
|
||||||
|
s.append(url, 0, pathIdx);
|
||||||
|
|
||||||
|
// We don't want the fragment, and multiple fragments breaks the Java URIParser for some reason
|
||||||
|
int end = url.indexOf("#");
|
||||||
|
if (end < 0) end = url.length();
|
||||||
|
|
||||||
|
int queryIdx = url.indexOf('?');
|
||||||
|
if (queryIdx < 0) queryIdx = end;
|
||||||
|
|
||||||
|
recombinePaths(s, url.substring(pathIdx, queryIdx));
|
||||||
|
if (queryIdx < end) {
|
||||||
|
recombineQueryString(s, url.substring(queryIdx + 1, end));
|
||||||
|
}
|
||||||
|
return new URI(s.toString());
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void recombinePaths(StringBuilder sb, String path) {
|
||||||
|
if (path == null || path.isEmpty()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
String[] pathParts = StringUtils.split(path, '/');
|
||||||
|
if (pathParts.length == 0) {
|
||||||
|
sb.append('/');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (String pathPart : pathParts) {
|
||||||
|
if (pathPart.isEmpty()) continue;
|
||||||
|
|
||||||
|
if (needsUrlEncode(pathPart)) {
|
||||||
|
sb.append('/');
|
||||||
|
sb.append(URLEncoder.encode(pathPart, StandardCharsets.UTF_8));
|
||||||
|
} else {
|
||||||
|
sb.append('/');
|
||||||
|
sb.append(pathPart);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void recombineQueryString(StringBuilder sb, String param) {
|
||||||
|
if (param == null || param.isEmpty()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
sb.append('?');
|
||||||
|
String[] pathParts = StringUtils.split(param, '&');
|
||||||
|
boolean first = true;
|
||||||
|
for (String pathPart : pathParts) {
|
||||||
|
if (pathPart.isEmpty()) continue;
|
||||||
|
|
||||||
|
if (first) {
|
||||||
|
first = false;
|
||||||
|
} else {
|
||||||
|
sb.append('&');
|
||||||
|
}
|
||||||
|
if (needsUrlEncode(pathPart)) {
|
||||||
|
sb.append(URLEncoder.encode(pathPart, StandardCharsets.UTF_8));
|
||||||
|
} else {
|
||||||
|
sb.append(pathPart);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/** Test if the url element needs URL encoding.
|
||||||
|
* <p></p>
|
||||||
|
* Note we may have been given an already encoded path element,
|
||||||
|
* so we include % and + in the list of good characters
|
||||||
|
*/
|
||||||
|
private static boolean needsUrlEncode(String urlElement) {
|
||||||
|
for (int i = 0; i < urlElement.length(); i++) {
|
||||||
|
char c = urlElement.charAt(i);
|
||||||
|
|
||||||
|
if (c >= 'a' && c <= 'z') continue;
|
||||||
|
if (c >= 'A' && c <= 'Z') continue;
|
||||||
|
if (c >= '0' && c <= '9') continue;
|
||||||
|
if ("-_.~+?=&".indexOf(c) >= 0) continue;
|
||||||
|
if (c == '%' && i + 2 < urlElement.length()) {
|
||||||
|
char c1 = urlElement.charAt(i + 1);
|
||||||
|
char c2 = urlElement.charAt(i + 2);
|
||||||
|
if (c1 >= '0' && c1 <= '9' && c2 >= '0' && c2 <= '9') {
|
||||||
|
i += 2;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static int findPathIdx(String url) throws URISyntaxException {
|
||||||
|
int colonIdx = url.indexOf(':');
|
||||||
|
if (colonIdx < 0 || colonIdx + 3 >= url.length()) {
|
||||||
|
throw new URISyntaxException(url, "Lacking protocol");
|
||||||
|
}
|
||||||
|
return url.indexOf('/', colonIdx + 3);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
@@ -1,6 +1,6 @@
|
|||||||
package nu.marginalia.model;
|
package nu.marginalia.model;
|
||||||
|
|
||||||
import nu.marginalia.model.EdgeUrl;
|
import org.junit.jupiter.api.Assertions;
|
||||||
import org.junit.jupiter.api.Test;
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
import java.net.URISyntaxException;
|
import java.net.URISyntaxException;
|
||||||
@@ -21,25 +21,25 @@ class EdgeUrlTest {
|
|||||||
new EdgeUrl("https://memex.marginalia.nu/#here")
|
new EdgeUrl("https://memex.marginalia.nu/#here")
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testParam() throws URISyntaxException {
|
void testUriFromString() throws URISyntaxException {
|
||||||
System.out.println(new EdgeUrl("https://memex.marginalia.nu/index.php?id=1").toString());
|
Assertions.assertEquals("https://www.example.com/", EdgeUriFactory.uriFromString("https://www.example.com/#heredoc").toString());
|
||||||
System.out.println(new EdgeUrl("https://memex.marginalia.nu/showthread.php?id=1&count=5&tracking=123").toString());
|
Assertions.assertEquals("https://www.example.com/%25-sign", EdgeUriFactory.uriFromString("https://www.example.com/%-sign").toString());
|
||||||
}
|
Assertions.assertEquals("https://www.example.com/%22-sign", EdgeUriFactory.uriFromString("https://www.example.com/%22-sign").toString());
|
||||||
@Test
|
Assertions.assertEquals("https://www.example.com/%0A+%22huh%22", EdgeUriFactory.uriFromString("https://www.example.com/\n \"huh\"").toString());
|
||||||
void urlencodeFixer() throws URISyntaxException {
|
Assertions.assertEquals("https://en.wikipedia.org/wiki/S%C3%A1mi", EdgeUriFactory.uriFromString("https://en.wikipedia.org/wiki/Sámi").toString());
|
||||||
System.out.println(EdgeUrl.urlencodeFixer("https://www.example.com/#heredoc"));
|
|
||||||
System.out.println(EdgeUrl.urlencodeFixer("https://www.example.com/%-sign"));
|
|
||||||
System.out.println(EdgeUrl.urlencodeFixer("https://www.example.com/%22-sign"));
|
|
||||||
System.out.println(EdgeUrl.urlencodeFixer("https://www.example.com/\n \"huh\""));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
void testParms() throws URISyntaxException {
|
void testParms() throws URISyntaxException {
|
||||||
System.out.println(new EdgeUrl("https://search.marginalia.nu/?id=123"));
|
Assertions.assertEquals("id=123", new EdgeUrl("https://search.marginalia.nu/?id=123").param);
|
||||||
System.out.println(new EdgeUrl("https://search.marginalia.nu/?t=123"));
|
Assertions.assertEquals("t=123", new EdgeUrl("https://search.marginalia.nu/?t=123").param);
|
||||||
System.out.println(new EdgeUrl("https://search.marginalia.nu/?v=123"));
|
Assertions.assertEquals("v=123", new EdgeUrl("https://search.marginalia.nu/?v=123").param);
|
||||||
System.out.println(new EdgeUrl("https://search.marginalia.nu/?m=123"));
|
Assertions.assertEquals("id=1", new EdgeUrl("https://memex.marginalia.nu/showthread.php?id=1&count=5&tracking=123").param);
|
||||||
System.out.println(new EdgeUrl("https://search.marginalia.nu/?follow=123"));
|
Assertions.assertEquals("id=1&t=5", new EdgeUrl("https://memex.marginalia.nu/shöwthrëad.php?id=1&t=5&tracking=123").param);
|
||||||
|
Assertions.assertEquals("id=1&t=5", new EdgeUrl("https://memex.marginalia.nu/shöwthrëad.php?trëaking=123&id=1&t=5&").param);
|
||||||
|
Assertions.assertNull(new EdgeUrl("https://search.marginalia.nu/?m=123").param);
|
||||||
|
Assertions.assertNull(new EdgeUrl("https://search.marginalia.nu/?follow=123").param);
|
||||||
}
|
}
|
||||||
}
|
}
|
@@ -59,16 +59,13 @@ public class ProcessAdHocTaskHeartbeatImpl implements AutoCloseable, ProcessAdHo
|
|||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public void progress(String step, int stepProgress, int stepCount) {
|
public void progress(String step, int stepProgress, int stepCount) {
|
||||||
|
int lastProgress = this.progress;
|
||||||
this.step = step;
|
this.step = step;
|
||||||
|
|
||||||
|
|
||||||
// off by one since we calculate the progress based on the number of steps,
|
|
||||||
// and Enum.ordinal() is zero-based (so the 5th step in a 5 step task is 4, not 5; resulting in the
|
|
||||||
// final progress being 80% and not 100%)
|
|
||||||
|
|
||||||
this.progress = (int) Math.round(100. * stepProgress / (double) stepCount);
|
this.progress = (int) Math.round(100. * stepProgress / (double) stepCount);
|
||||||
|
|
||||||
logger.info("ProcessTask {} progress: {}%", taskBase, progress);
|
if (this.progress / 10 != lastProgress / 10) {
|
||||||
|
logger.info("ProcessTask {} progress: {}%", taskBase, progress);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Wrap a collection to provide heartbeat progress updates as it's iterated through */
|
/** Wrap a collection to provide heartbeat progress updates as it's iterated through */
|
||||||
|
@@ -57,16 +57,13 @@ public class ServiceAdHocTaskHeartbeatImpl implements AutoCloseable, ServiceAdHo
|
|||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public void progress(String step, int stepProgress, int stepCount) {
|
public void progress(String step, int stepProgress, int stepCount) {
|
||||||
|
int lastProgress = this.progress;
|
||||||
this.step = step;
|
this.step = step;
|
||||||
|
|
||||||
|
|
||||||
// off by one since we calculate the progress based on the number of steps,
|
|
||||||
// and Enum.ordinal() is zero-based (so the 5th step in a 5 step task is 4, not 5; resulting in the
|
|
||||||
// final progress being 80% and not 100%)
|
|
||||||
|
|
||||||
this.progress = (int) Math.round(100. * stepProgress / (double) stepCount);
|
this.progress = (int) Math.round(100. * stepProgress / (double) stepCount);
|
||||||
|
|
||||||
logger.info("ServiceTask {} progress: {}%", taskBase, progress);
|
if (this.progress / 10 != lastProgress / 10) {
|
||||||
|
logger.info("ProcessTask {} progress: {}%", taskBase, progress);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void shutDown() {
|
public void shutDown() {
|
||||||
|
@@ -48,12 +48,13 @@ public class ExecutorExportClient {
|
|||||||
return msgId;
|
return msgId;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void exportSampleData(int node, FileStorageId fid, int size, String name) {
|
public void exportSampleData(int node, FileStorageId fid, int size, String ctFilter, String name) {
|
||||||
channelPool.call(ExecutorExportApiBlockingStub::exportSampleData)
|
channelPool.call(ExecutorExportApiBlockingStub::exportSampleData)
|
||||||
.forNode(node)
|
.forNode(node)
|
||||||
.run(RpcExportSampleData.newBuilder()
|
.run(RpcExportSampleData.newBuilder()
|
||||||
.setFileStorageId(fid.id())
|
.setFileStorageId(fid.id())
|
||||||
.setSize(size)
|
.setSize(size)
|
||||||
|
.setCtFilter(ctFilter)
|
||||||
.setName(name)
|
.setName(name)
|
||||||
.build());
|
.build());
|
||||||
}
|
}
|
||||||
|
@@ -100,6 +100,7 @@ message RpcExportSampleData {
|
|||||||
int64 fileStorageId = 1;
|
int64 fileStorageId = 1;
|
||||||
int32 size = 2;
|
int32 size = 2;
|
||||||
string name = 3;
|
string name = 3;
|
||||||
|
string ctFilter = 4;
|
||||||
}
|
}
|
||||||
message RpcDownloadSampleData {
|
message RpcDownloadSampleData {
|
||||||
string sampleSet = 1;
|
string sampleSet = 1;
|
||||||
|
@@ -26,32 +26,32 @@ public class ExportSampleDataActor extends RecordActorPrototype {
|
|||||||
private final MqOutbox exportTasksOutbox;
|
private final MqOutbox exportTasksOutbox;
|
||||||
private final Logger logger = LoggerFactory.getLogger(getClass());
|
private final Logger logger = LoggerFactory.getLogger(getClass());
|
||||||
|
|
||||||
public record Export(FileStorageId crawlId, int size, String name) implements ActorStep {}
|
public record Export(FileStorageId crawlId, int size, String ctFilter, String name) implements ActorStep {}
|
||||||
public record Run(FileStorageId crawlId, FileStorageId destId, int size, String name, long msgId) implements ActorStep {
|
public record Run(FileStorageId crawlId, FileStorageId destId, int size, String ctFilter, String name, long msgId) implements ActorStep {
|
||||||
public Run(FileStorageId crawlId, FileStorageId destId, int size, String name) {
|
public Run(FileStorageId crawlId, FileStorageId destId, int size, String name, String ctFilter) {
|
||||||
this(crawlId, destId, size, name, -1);
|
this(crawlId, destId, size, name, ctFilter,-1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public ActorStep transition(ActorStep self) throws Exception {
|
public ActorStep transition(ActorStep self) throws Exception {
|
||||||
return switch(self) {
|
return switch(self) {
|
||||||
case Export(FileStorageId crawlId, int size, String name) -> {
|
case Export(FileStorageId crawlId, int size, String ctFilter, String name) -> {
|
||||||
var storage = storageService.allocateStorage(FileStorageType.EXPORT,
|
var storage = storageService.allocateStorage(FileStorageType.EXPORT,
|
||||||
"crawl-sample-export",
|
"crawl-sample-export",
|
||||||
"Crawl Data Sample " + name + "/" + size + " " + LocalDateTime.now()
|
"Crawl Data Sample " + name + "/" + size + " " + LocalDateTime.now()
|
||||||
);
|
);
|
||||||
|
|
||||||
if (storage == null) yield new Error("Bad storage id");
|
if (storage == null) yield new Error("Bad storage id");
|
||||||
yield new Run(crawlId, storage.id(), size, name);
|
yield new Run(crawlId, storage.id(), size, ctFilter, name);
|
||||||
}
|
}
|
||||||
case Run(FileStorageId crawlId, FileStorageId destId, int size, String name, long msgId) when msgId < 0 -> {
|
case Run(FileStorageId crawlId, FileStorageId destId, int size, String ctFilter, String name, long msgId) when msgId < 0 -> {
|
||||||
storageService.setFileStorageState(destId, FileStorageState.NEW);
|
storageService.setFileStorageState(destId, FileStorageState.NEW);
|
||||||
|
|
||||||
long newMsgId = exportTasksOutbox.sendAsync(ExportTaskRequest.sampleData(crawlId, destId, size, name));
|
long newMsgId = exportTasksOutbox.sendAsync(ExportTaskRequest.sampleData(crawlId, destId, ctFilter, size, name));
|
||||||
yield new Run(crawlId, destId, size, name, newMsgId);
|
yield new Run(crawlId, destId, size, ctFilter, name, newMsgId);
|
||||||
}
|
}
|
||||||
case Run(_, FileStorageId destId, _, _, long msgId) -> {
|
case Run(_, FileStorageId destId, _, _, _, long msgId) -> {
|
||||||
var rsp = processWatcher.waitResponse(exportTasksOutbox, ProcessService.ProcessId.EXPORT_TASKS, msgId);
|
var rsp = processWatcher.waitResponse(exportTasksOutbox, ProcessService.ProcessId.EXPORT_TASKS, msgId);
|
||||||
|
|
||||||
if (rsp.state() != MqMessageState.OK) {
|
if (rsp.state() != MqMessageState.OK) {
|
||||||
@@ -70,7 +70,7 @@ public class ExportSampleDataActor extends RecordActorPrototype {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String describe() {
|
public String describe() {
|
||||||
return "Export RSS/Atom feeds from crawl data";
|
return "Export sample crawl data";
|
||||||
}
|
}
|
||||||
|
|
||||||
@Inject
|
@Inject
|
||||||
|
@@ -49,6 +49,7 @@ public class ExecutorExportGrpcService
|
|||||||
new ExportSampleDataActor.Export(
|
new ExportSampleDataActor.Export(
|
||||||
FileStorageId.of(request.getFileStorageId()),
|
FileStorageId.of(request.getFileStorageId()),
|
||||||
request.getSize(),
|
request.getSize(),
|
||||||
|
request.getCtFilter(),
|
||||||
request.getName()
|
request.getName()
|
||||||
)
|
)
|
||||||
);
|
);
|
||||||
|
@@ -229,13 +229,15 @@ public class FeedFetcherService {
|
|||||||
.timeout(Duration.ofSeconds(15))
|
.timeout(Duration.ofSeconds(15))
|
||||||
;
|
;
|
||||||
|
|
||||||
if (ifModifiedSinceDate != null) {
|
// Set the If-Modified-Since or If-None-Match headers if we have them
|
||||||
|
// though since there are certain idiosyncrasies in server implementations,
|
||||||
|
// we avoid setting both at the same time as that may turn a 304 into a 200.
|
||||||
|
if (ifNoneMatchTag != null) {
|
||||||
|
requestBuilder.header("If-None-Match", ifNoneMatchTag);
|
||||||
|
} else if (ifModifiedSinceDate != null) {
|
||||||
requestBuilder.header("If-Modified-Since", ifModifiedSinceDate);
|
requestBuilder.header("If-Modified-Since", ifModifiedSinceDate);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ifNoneMatchTag != null) {
|
|
||||||
requestBuilder.header("If-None-Match", ifNoneMatchTag);
|
|
||||||
}
|
|
||||||
|
|
||||||
HttpRequest getRequest = requestBuilder.build();
|
HttpRequest getRequest = requestBuilder.build();
|
||||||
|
|
||||||
|
@@ -264,17 +264,16 @@ public class CrawlerMain extends ProcessMainClass {
|
|||||||
if (workLog.isJobFinished(crawlSpec.domain))
|
if (workLog.isJobFinished(crawlSpec.domain))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
var task = new CrawlTask(
|
var task = new CrawlTask(crawlSpec, anchorTagsSource, outputDir, warcArchiver, domainStateDb, workLog);
|
||||||
crawlSpec,
|
|
||||||
anchorTagsSource,
|
|
||||||
outputDir,
|
|
||||||
warcArchiver,
|
|
||||||
domainStateDb,
|
|
||||||
workLog);
|
|
||||||
|
|
||||||
// Try to run immediately, to avoid unnecessarily keeping the entire work set in RAM
|
// Try to run immediately, to avoid unnecessarily keeping the entire work set in RAM
|
||||||
if (!trySubmitDeferredTask(task)) {
|
if (!trySubmitDeferredTask(task)) {
|
||||||
// Otherwise add to the taskList for deferred execution
|
|
||||||
|
// Drain the retry queue to the taskList, and try to submit any tasks that are in the retry queue
|
||||||
|
retryQueue.drainTo(taskList);
|
||||||
|
taskList.removeIf(this::trySubmitDeferredTask);
|
||||||
|
|
||||||
|
// Then add this new task to the retry queue
|
||||||
taskList.add(task);
|
taskList.add(task);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -19,11 +19,13 @@ public record ContentTags(String etag, String lastMod) {
|
|||||||
/** Paints the tags onto the request builder. */
|
/** Paints the tags onto the request builder. */
|
||||||
public void paint(HttpGet request) {
|
public void paint(HttpGet request) {
|
||||||
|
|
||||||
|
// Paint the ETag header if present,
|
||||||
|
// otherwise paint the Last-Modified header
|
||||||
|
// (but not both at the same time due to some servers not liking it)
|
||||||
|
|
||||||
if (etag != null) {
|
if (etag != null) {
|
||||||
request.addHeader("If-None-Match", etag);
|
request.addHeader("If-None-Match", etag);
|
||||||
}
|
} else if (lastMod != null) {
|
||||||
|
|
||||||
if (lastMod != null) {
|
|
||||||
request.addHeader("If-Modified-Since", lastMod);
|
request.addHeader("If-Modified-Since", lastMod);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -51,6 +51,7 @@ import javax.net.ssl.SSLException;
|
|||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.net.SocketTimeoutException;
|
import java.net.SocketTimeoutException;
|
||||||
import java.net.URISyntaxException;
|
import java.net.URISyntaxException;
|
||||||
|
import java.net.UnknownHostException;
|
||||||
import java.security.NoSuchAlgorithmException;
|
import java.security.NoSuchAlgorithmException;
|
||||||
import java.time.Duration;
|
import java.time.Duration;
|
||||||
import java.time.Instant;
|
import java.time.Instant;
|
||||||
@@ -635,14 +636,12 @@ public class HttpFetcherImpl implements HttpFetcher, HttpRequestRetryStrategy {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean retryRequest(HttpRequest request, IOException exception, int executionCount, HttpContext context) {
|
public boolean retryRequest(HttpRequest request, IOException exception, int executionCount, HttpContext context) {
|
||||||
if (exception instanceof SocketTimeoutException) { // Timeouts are not recoverable
|
return switch (exception) {
|
||||||
return false;
|
case SocketTimeoutException ste -> false;
|
||||||
}
|
case SSLException ssle -> false;
|
||||||
if (exception instanceof SSLException) { // SSL exceptions are unlikely to be recoverable
|
case UnknownHostException uhe -> false;
|
||||||
return false;
|
default -> executionCount <= 3;
|
||||||
}
|
};
|
||||||
|
|
||||||
return executionCount <= 3;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@@ -57,6 +57,7 @@ public abstract class WarcInputBuffer implements AutoCloseable {
|
|||||||
return new ErrorBuffer();
|
return new ErrorBuffer();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Instant start = Instant.now();
|
||||||
InputStream is = null;
|
InputStream is = null;
|
||||||
try {
|
try {
|
||||||
is = entity.getContent();
|
is = entity.getContent();
|
||||||
@@ -71,8 +72,25 @@ public abstract class WarcInputBuffer implements AutoCloseable {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
finally {
|
finally {
|
||||||
|
// We're required to consume the stream to avoid leaking connections,
|
||||||
|
// but we also don't want to get stuck on slow or malicious connections
|
||||||
|
// forever, so we set a time limit on this phase and call abort() if it's exceeded.
|
||||||
try {
|
try {
|
||||||
is.skip(Long.MAX_VALUE);
|
while (is != null) {
|
||||||
|
// Consume some data
|
||||||
|
if (is.skip(65536) == 0) {
|
||||||
|
// Note that skip may return 0 if the stream is empty
|
||||||
|
// or for other unspecified reasons, so we need to check
|
||||||
|
// with read() as well to determine if the stream is done
|
||||||
|
if (is.read() == -1)
|
||||||
|
is = null;
|
||||||
|
}
|
||||||
|
// Check if the time limit has been exceeded
|
||||||
|
else if (Duration.between(start, Instant.now()).compareTo(timeLimit) > 0) {
|
||||||
|
request.abort();
|
||||||
|
is = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
catch (IOException e) {
|
catch (IOException e) {
|
||||||
// Ignore the exception
|
// Ignore the exception
|
||||||
|
@@ -53,6 +53,8 @@ dependencies {
|
|||||||
implementation libs.commons.compress
|
implementation libs.commons.compress
|
||||||
implementation libs.commons.codec
|
implementation libs.commons.codec
|
||||||
implementation libs.jsoup
|
implementation libs.jsoup
|
||||||
|
implementation libs.slop
|
||||||
|
implementation libs.jwarc
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@@ -3,11 +3,15 @@ package nu.marginalia.extractor;
|
|||||||
import com.google.inject.Inject;
|
import com.google.inject.Inject;
|
||||||
import nu.marginalia.process.log.WorkLog;
|
import nu.marginalia.process.log.WorkLog;
|
||||||
import nu.marginalia.process.log.WorkLogEntry;
|
import nu.marginalia.process.log.WorkLogEntry;
|
||||||
|
import nu.marginalia.slop.SlopCrawlDataRecord;
|
||||||
|
import nu.marginalia.slop.SlopTablePacker;
|
||||||
import nu.marginalia.storage.FileStorageService;
|
import nu.marginalia.storage.FileStorageService;
|
||||||
import nu.marginalia.storage.model.FileStorage;
|
import nu.marginalia.storage.model.FileStorage;
|
||||||
import nu.marginalia.storage.model.FileStorageId;
|
import nu.marginalia.storage.model.FileStorageId;
|
||||||
import org.apache.commons.compress.archivers.tar.TarArchiveOutputStream;
|
import org.apache.commons.compress.archivers.tar.TarArchiveOutputStream;
|
||||||
import org.apache.commons.compress.utils.IOUtils;
|
import org.apache.commons.compress.utils.IOUtils;
|
||||||
|
import org.apache.commons.io.FileUtils;
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.nio.file.Files;
|
import java.nio.file.Files;
|
||||||
@@ -27,7 +31,7 @@ public class SampleDataExporter {
|
|||||||
public SampleDataExporter(FileStorageService storageService) {
|
public SampleDataExporter(FileStorageService storageService) {
|
||||||
this.storageService = storageService;
|
this.storageService = storageService;
|
||||||
}
|
}
|
||||||
public void export(FileStorageId crawlId, FileStorageId destId, int size, String name) throws SQLException, IOException {
|
public void export(FileStorageId crawlId, FileStorageId destId, int size, String ctFilter, String name) throws SQLException, IOException {
|
||||||
FileStorage destStorage = storageService.getStorage(destId);
|
FileStorage destStorage = storageService.getStorage(destId);
|
||||||
Path inputDir = storageService.getStorage(crawlId).asPath();
|
Path inputDir = storageService.getStorage(crawlId).asPath();
|
||||||
|
|
||||||
@@ -54,6 +58,7 @@ public class SampleDataExporter {
|
|||||||
|
|
||||||
Path newCrawlerLogFile = Files.createTempFile(destStorage.asPath(), "crawler", ".log",
|
Path newCrawlerLogFile = Files.createTempFile(destStorage.asPath(), "crawler", ".log",
|
||||||
PosixFilePermissions.asFileAttribute(PosixFilePermissions.fromString("rw-r--r--")));
|
PosixFilePermissions.asFileAttribute(PosixFilePermissions.fromString("rw-r--r--")));
|
||||||
|
|
||||||
try (var bw = Files.newBufferedWriter(newCrawlerLogFile, StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING)) {
|
try (var bw = Files.newBufferedWriter(newCrawlerLogFile, StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING)) {
|
||||||
for (var item : entriesAll) {
|
for (var item : entriesAll) {
|
||||||
bw.write(item.id() + " " + item.ts() + " " + item.relPath() + " " + item.cnt() + "\n");
|
bw.write(item.id() + " " + item.ts() + " " + item.relPath() + " " + item.cnt() + "\n");
|
||||||
@@ -72,7 +77,22 @@ public class SampleDataExporter {
|
|||||||
Path crawlDataPath = inputDir.resolve(item.relPath());
|
Path crawlDataPath = inputDir.resolve(item.relPath());
|
||||||
if (!Files.exists(crawlDataPath)) continue;
|
if (!Files.exists(crawlDataPath)) continue;
|
||||||
|
|
||||||
addFileToTar(stream, crawlDataPath, item.relPath());
|
if (StringUtils.isBlank(ctFilter)) {
|
||||||
|
addFileToTar(stream, crawlDataPath, item.relPath());
|
||||||
|
}
|
||||||
|
else /* filter != null */ {
|
||||||
|
boolean didFilterData = false;
|
||||||
|
try {
|
||||||
|
crawlDataPath = filterEntries(crawlDataPath, ctFilter);
|
||||||
|
didFilterData = true;
|
||||||
|
addFileToTar(stream, crawlDataPath, item.relPath());
|
||||||
|
}
|
||||||
|
finally {
|
||||||
|
if (didFilterData) {
|
||||||
|
Files.deleteIfExists(crawlDataPath);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
addFileToTar(stream, newCrawlerLogFile, "crawler.log");
|
addFileToTar(stream, newCrawlerLogFile, "crawler.log");
|
||||||
@@ -86,6 +106,46 @@ public class SampleDataExporter {
|
|||||||
Files.move(tmpTarFile, destStorage.asPath().resolve("crawl-data.tar"), StandardCopyOption.ATOMIC_MOVE, StandardCopyOption.REPLACE_EXISTING);
|
Files.move(tmpTarFile, destStorage.asPath().resolve("crawl-data.tar"), StandardCopyOption.ATOMIC_MOVE, StandardCopyOption.REPLACE_EXISTING);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Filters the entries in the crawl data file based on the content type.
|
||||||
|
* @param crawlDataPath The path to the crawl data file.
|
||||||
|
* @param contentTypeFilter The content type to filter by.
|
||||||
|
* @return The path to the filtered crawl data file, or null if an error occurred.
|
||||||
|
*/
|
||||||
|
private Path filterEntries(Path crawlDataPath, String contentTypeFilter) throws IOException {
|
||||||
|
Path tempDir = crawlDataPath.resolveSibling(crawlDataPath.getFileName() + ".filtered");
|
||||||
|
Path tempFile = crawlDataPath.resolveSibling(crawlDataPath.getFileName() + ".filtered.slop.zip");
|
||||||
|
|
||||||
|
Files.createDirectory(tempDir);
|
||||||
|
|
||||||
|
try (var writer = new SlopCrawlDataRecord.Writer(tempDir);
|
||||||
|
var reader = new SlopCrawlDataRecord.FilteringReader(crawlDataPath) {
|
||||||
|
@Override
|
||||||
|
public boolean filter(String url, int status, String contentType) {
|
||||||
|
if (contentTypeFilter.equals(contentType))
|
||||||
|
return true;
|
||||||
|
else if (contentType.startsWith("x-marginalia/"))
|
||||||
|
// This is a metadata entry, typically domain or redirect information
|
||||||
|
// let's keep those to not confuse the consumer of the data, which might
|
||||||
|
// expect at least the domain summary
|
||||||
|
return true;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
) {
|
||||||
|
while (reader.hasRemaining()) {
|
||||||
|
writer.write(reader.get());
|
||||||
|
}
|
||||||
|
|
||||||
|
SlopTablePacker.packToSlopZip(tempDir, tempFile);
|
||||||
|
}
|
||||||
|
finally {
|
||||||
|
FileUtils.deleteDirectory(tempDir.toFile());
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
return tempFile;
|
||||||
|
}
|
||||||
|
|
||||||
private void addFileToTar(TarArchiveOutputStream outputStream, Path file, String fileName) throws IOException {
|
private void addFileToTar(TarArchiveOutputStream outputStream, Path file, String fileName) throws IOException {
|
||||||
var entry = outputStream.createArchiveEntry(file.toFile(), fileName);
|
var entry = outputStream.createArchiveEntry(file.toFile(), fileName);
|
||||||
entry.setSize(Files.size(file));
|
entry.setSize(Files.size(file));
|
||||||
|
@@ -92,7 +92,7 @@ public class ExportTasksMain extends ProcessMainClass {
|
|||||||
termFrequencyExporter.export(request.crawlId, request.destId);
|
termFrequencyExporter.export(request.crawlId, request.destId);
|
||||||
break;
|
break;
|
||||||
case SAMPLE_DATA:
|
case SAMPLE_DATA:
|
||||||
sampleDataExporter.export(request.crawlId, request.destId, request.size, request.name);
|
sampleDataExporter.export(request.crawlId, request.destId, request.size, request.ctFilter, request.name);
|
||||||
break;
|
break;
|
||||||
case ADJACENCIES:
|
case ADJACENCIES:
|
||||||
websiteAdjacenciesCalculator.export();
|
websiteAdjacenciesCalculator.export();
|
||||||
|
@@ -16,6 +16,7 @@ public class ExportTaskRequest {
|
|||||||
public FileStorageId destId;
|
public FileStorageId destId;
|
||||||
public int size;
|
public int size;
|
||||||
public String name;
|
public String name;
|
||||||
|
public String ctFilter;
|
||||||
|
|
||||||
public ExportTaskRequest(Task task) {
|
public ExportTaskRequest(Task task) {
|
||||||
this.task = task;
|
this.task = task;
|
||||||
@@ -42,12 +43,13 @@ public class ExportTaskRequest {
|
|||||||
return request;
|
return request;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static ExportTaskRequest sampleData(FileStorageId crawlId, FileStorageId destId, int size, String name) {
|
public static ExportTaskRequest sampleData(FileStorageId crawlId, FileStorageId destId, String ctFilter, int size, String name) {
|
||||||
ExportTaskRequest request = new ExportTaskRequest(Task.SAMPLE_DATA);
|
ExportTaskRequest request = new ExportTaskRequest(Task.SAMPLE_DATA);
|
||||||
request.crawlId = crawlId;
|
request.crawlId = crawlId;
|
||||||
request.destId = destId;
|
request.destId = destId;
|
||||||
request.size = size;
|
request.size = size;
|
||||||
request.name = name;
|
request.name = name;
|
||||||
|
request.ctFilter = ctFilter;
|
||||||
return request;
|
return request;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -3,7 +3,7 @@ plugins {
|
|||||||
|
|
||||||
id 'application'
|
id 'application'
|
||||||
id 'jvm-test-suite'
|
id 'jvm-test-suite'
|
||||||
id 'com.google.cloud.tools.jib' version '3.4.4'
|
id 'com.google.cloud.tools.jib' version '3.4.5'
|
||||||
}
|
}
|
||||||
|
|
||||||
java {
|
java {
|
||||||
|
@@ -3,7 +3,7 @@ plugins {
|
|||||||
|
|
||||||
id 'application'
|
id 'application'
|
||||||
id 'jvm-test-suite'
|
id 'jvm-test-suite'
|
||||||
id 'com.google.cloud.tools.jib' version '3.4.4'
|
id 'com.google.cloud.tools.jib' version '3.4.5'
|
||||||
}
|
}
|
||||||
|
|
||||||
application {
|
application {
|
||||||
|
@@ -3,7 +3,7 @@ plugins {
|
|||||||
|
|
||||||
id 'application'
|
id 'application'
|
||||||
id 'jvm-test-suite'
|
id 'jvm-test-suite'
|
||||||
id 'com.google.cloud.tools.jib' version '3.4.4'
|
id 'com.google.cloud.tools.jib' version '3.4.5'
|
||||||
}
|
}
|
||||||
|
|
||||||
application {
|
application {
|
||||||
|
@@ -5,7 +5,7 @@ plugins {
|
|||||||
id 'application'
|
id 'application'
|
||||||
id 'jvm-test-suite'
|
id 'jvm-test-suite'
|
||||||
|
|
||||||
id 'com.google.cloud.tools.jib' version '3.4.4'
|
id 'com.google.cloud.tools.jib' version '3.4.5'
|
||||||
}
|
}
|
||||||
|
|
||||||
application {
|
application {
|
||||||
|
@@ -3,7 +3,7 @@ plugins {
|
|||||||
id 'application'
|
id 'application'
|
||||||
id 'jvm-test-suite'
|
id 'jvm-test-suite'
|
||||||
id 'gg.jte.gradle' version '3.1.15'
|
id 'gg.jte.gradle' version '3.1.15'
|
||||||
id 'com.google.cloud.tools.jib' version '3.4.4'
|
id 'com.google.cloud.tools.jib' version '3.4.5'
|
||||||
}
|
}
|
||||||
|
|
||||||
application {
|
application {
|
||||||
|
@@ -26,4 +26,10 @@
|
|||||||
|
|
||||||
<link rel="search" type="application/opensearchdescription+xml" href="/opensearch.xml" title="Marginalia">
|
<link rel="search" type="application/opensearchdescription+xml" href="/opensearch.xml" title="Marginalia">
|
||||||
|
|
||||||
</head>
|
</head>
|
||||||
|
<noscript>
|
||||||
|
<h1>Users of text-based browsers</h1>
|
||||||
|
<p>Consider using the old interface at <a href="https://old-search.marginalia.nu/">https://old-search.marginalia.nu/</a>,
|
||||||
|
as it uses fewer modern CSS tricks, and should work better than the new UI. It's functionally nearly identical, but just renders it using a different layout.</p>
|
||||||
|
<hr>
|
||||||
|
</noscript>
|
@@ -1,9 +1,16 @@
|
|||||||
This is a bit of a hack!
|
This is a bit of a hack!
|
||||||
|
|
||||||
This class exists to let tailwind we're using these classes even though they aren't visible in the code,
|
This class exists to let tailwind we're using these classes even though they aren't visible in the code,
|
||||||
as we sometimes generate classes from Java code!
|
as we sometimes generate classes from Java code or javascript!
|
||||||
|
|
||||||
<i class="text-blue-800 bg-blue-50 dark:text-blue-200 dark:bg-blue-950"></i>
|
<i class="text-blue-800 bg-blue-50 dark:text-blue-200 dark:bg-blue-950"></i>
|
||||||
<i class="text-green-800 bg-green-50 dark:text-green-200 dark:bg-green-950"></i>
|
<i class="text-green-800 bg-green-50 dark:text-green-200 dark:bg-green-950"></i>
|
||||||
<i class="text-purple-800 bg-purple-50 dark:text-purple-200 dark:bg-purple-950"></i>
|
<i class="text-purple-800 bg-purple-50 dark:text-purple-200 dark:bg-purple-950"></i>
|
||||||
<i class="text-blue-950 bg-gray-100 dark:text-blue-50 dark:bg-gray-900"></i>
|
<i class="text-blue-950 bg-gray-100 dark:text-blue-50 dark:bg-gray-900"></i>
|
||||||
|
<span class="hover:bg-gray-300 "></span>
|
||||||
|
|
||||||
|
<label class="suggestion group block relative">
|
||||||
|
<input type="radio" name="suggestion" class="peer hidden" checked>
|
||||||
|
<div class="px-4 py-2 cursor-pointer dark:peer-checked:bg-gray-700 dark:hover:bg-gray-700 peer-checked:bg-gray-300 hover:bg-gray-300 w-full">
|
||||||
|
</div>
|
||||||
|
</label>
|
@@ -26,7 +26,7 @@
|
|||||||
|
|
||||||
<!-- Main content -->
|
<!-- Main content -->
|
||||||
<main class="flex-1 p-4 max-w-2xl space-y-4">
|
<main class="flex-1 p-4 max-w-2xl space-y-4">
|
||||||
<div class="border dark:border-gray-600 rounded bg-white text-black dark:bg-gray-800 dark:text-white text-m p-4">
|
<div class="border border-gray-300 dark:border-gray-600 rounded bg-white text-black dark:bg-gray-800 dark:text-white text-m p-4">
|
||||||
<div class="flex space-x-3 place-items-baseline">
|
<div class="flex space-x-3 place-items-baseline">
|
||||||
<i class="fa fa-circle-exclamation text-red-800"></i>
|
<i class="fa fa-circle-exclamation text-red-800"></i>
|
||||||
<div class="grow">${model.errorTitle()}</div>
|
<div class="grow">${model.errorTitle()}</div>
|
||||||
|
@@ -80,10 +80,6 @@
|
|||||||
<tr><td>rank>50</td><td>The ranking of the website is at least 50 in a span of 1 - 255</td></tr>
|
<tr><td>rank>50</td><td>The ranking of the website is at least 50 in a span of 1 - 255</td></tr>
|
||||||
<tr><td>rank<50</td><td>The ranking of the website is at most 50 in a span of 1 - 255</td></tr>
|
<tr><td>rank<50</td><td>The ranking of the website is at most 50 in a span of 1 - 255</td></tr>
|
||||||
|
|
||||||
<tr><td>count>10</td><td> The search term must appear in at least 10 results form the domain</td></tr>
|
|
||||||
<tr><td>count<10</td><td> The search term must appear in at most 10 results from the domain</td></tr>
|
|
||||||
|
|
||||||
|
|
||||||
<tr><td>format:html5</td><td>Filter documents using the HTML5 standard. This is typically modern websites.</td></tr>
|
<tr><td>format:html5</td><td>Filter documents using the HTML5 standard. This is typically modern websites.</td></tr>
|
||||||
<tr><td>format:xhtml</td><td>Filter documents using the XHTML standard</td></tr>
|
<tr><td>format:xhtml</td><td>Filter documents using the XHTML standard</td></tr>
|
||||||
<tr><td>format:html123</td><td>Filter documents using the HTML standards 1, 2, and 3. This is typically very old websites. </td></tr>
|
<tr><td>format:html123</td><td>Filter documents using the HTML standards 1, 2, and 3. This is typically very old websites. </td></tr>
|
||||||
|
@@ -7,7 +7,7 @@
|
|||||||
|
|
||||||
<form class="flex-1 max-w-2xl" action="/search">
|
<form class="flex-1 max-w-2xl" action="/search">
|
||||||
<div class="flex">
|
<div class="flex">
|
||||||
@if (query.isBlank())
|
@if (query != null && query.isBlank())
|
||||||
<%-- Add autofocus if the query is blank --%>
|
<%-- Add autofocus if the query is blank --%>
|
||||||
<input type="text"
|
<input type="text"
|
||||||
class="shadow-inner flex-1 dark:bg-black dark:text-gray-100 bg-gray-50 border dark:border-gray-600 border-gray-300 text-gray-900 text-sm rounded-sm block w-full p-2.5"
|
class="shadow-inner flex-1 dark:bg-black dark:text-gray-100 bg-gray-50 border dark:border-gray-600 border-gray-300 text-gray-900 text-sm rounded-sm block w-full p-2.5"
|
||||||
@@ -27,7 +27,7 @@
|
|||||||
id="searchInput" />
|
id="searchInput" />
|
||||||
@endif
|
@endif
|
||||||
|
|
||||||
<div id="searchSuggestions" class="text-sm absolute top-2 mt-10 w-96 bg-white dark:bg-black border dark:border-gray-600 border-gray-200 rounded-lg shadow-lg hidden"></div>
|
<div aria-hidden="true" id="searchSuggestions" class="text-sm absolute top-3 mt-10 w-96 bg-white dark:bg-black border dark:border-gray-600 border-gray-300 rounded-lg shadow-lg hidden"></div>
|
||||||
|
|
||||||
<button class="px-4 py-2 bg-margeblue text-white ml-2 rounded whitespace-nowrap active:text-slate-200">
|
<button class="px-4 py-2 bg-margeblue text-white ml-2 rounded whitespace-nowrap active:text-slate-200">
|
||||||
<i class="fas fa-search text-sm sm:mr-3"></i>
|
<i class="fas fa-search text-sm sm:mr-3"></i>
|
||||||
|
@@ -43,13 +43,13 @@ function displaySuggestions(suggestions) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
suggestionsContainer.innerHTML = suggestions.map((suggestion, index) => `
|
suggestionsContainer.innerHTML = suggestions.map((suggestion, index) => `
|
||||||
<div
|
<label class="suggestion group block relative">
|
||||||
class="suggestion px-4 py-2 cursor-pointer hover:bg-gray-100 ${index === selectedIndex ? 'bg-blue-50' : ''}"
|
<input type="radio" name="suggestion" class="peer hidden" ${index === selectedIndex ? 'checked' : ''}>
|
||||||
data-index="${index}"
|
<div class="px-4 py-2 cursor-pointer dark:peer-checked:bg-gray-700 dark:hover:bg-gray-700 peer-checked:bg-gray-300 hover:bg-gray-300 w-full" data-index="${index}">
|
||||||
>
|
${suggestion}
|
||||||
${suggestion}
|
</div>
|
||||||
</div>
|
</label>
|
||||||
`).join('');
|
`).join('');
|
||||||
|
|
||||||
suggestionsContainer.classList.remove('hidden');
|
suggestionsContainer.classList.remove('hidden');
|
||||||
|
|
||||||
|
@@ -2,7 +2,7 @@ plugins {
|
|||||||
id 'java'
|
id 'java'
|
||||||
id 'application'
|
id 'application'
|
||||||
id 'jvm-test-suite'
|
id 'jvm-test-suite'
|
||||||
id 'com.google.cloud.tools.jib' version '3.4.4'
|
id 'com.google.cloud.tools.jib' version '3.4.5'
|
||||||
}
|
}
|
||||||
|
|
||||||
java {
|
java {
|
||||||
|
@@ -3,7 +3,7 @@ plugins {
|
|||||||
|
|
||||||
id 'application'
|
id 'application'
|
||||||
id 'jvm-test-suite'
|
id 'jvm-test-suite'
|
||||||
id 'com.google.cloud.tools.jib' version '3.4.4'
|
id 'com.google.cloud.tools.jib' version '3.4.5'
|
||||||
}
|
}
|
||||||
|
|
||||||
application {
|
application {
|
||||||
|
@@ -10,7 +10,8 @@ import static com.google.inject.name.Names.named;
|
|||||||
|
|
||||||
public class AssistantModule extends AbstractModule {
|
public class AssistantModule extends AbstractModule {
|
||||||
public void configure() {
|
public void configure() {
|
||||||
bind(Path.class).annotatedWith(named("suggestions-file")).toInstance(WmsaHome.getHomePath().resolve("data/suggestions.txt"));
|
bind(Path.class).annotatedWith(named("suggestions-file1")).toInstance(WmsaHome.getHomePath().resolve("data/suggestions2.txt.gz"));
|
||||||
|
bind(Path.class).annotatedWith(named("suggestions-file2")).toInstance(WmsaHome.getHomePath().resolve("data/suggestions3.txt.gz"));
|
||||||
|
|
||||||
bind(LanguageModels.class).toInstance(WmsaHome.getLanguageModels());
|
bind(LanguageModels.class).toInstance(WmsaHome.getLanguageModels());
|
||||||
}
|
}
|
||||||
|
@@ -0,0 +1,465 @@
|
|||||||
|
package nu.marginalia.assistant.suggest;
|
||||||
|
|
||||||
|
import gnu.trove.list.array.TIntArrayList;
|
||||||
|
import org.jetbrains.annotations.NotNull;
|
||||||
|
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
|
/** Unhinged data structure for fast prefix searching.
|
||||||
|
*/
|
||||||
|
public class PrefixSearchStructure {
|
||||||
|
// Core data structures
|
||||||
|
private final HashMap<String, TIntArrayList> prefixIndex; // Short prefix index (up to 8 chars)
|
||||||
|
private final HashMap<String, TIntArrayList> longPrefixIndex; // Long prefix index (9-16 chars)
|
||||||
|
private final ArrayList<String> words; // All words by ID
|
||||||
|
private final TIntArrayList wordScores; // Scores for all words
|
||||||
|
|
||||||
|
// Configuration
|
||||||
|
private static final int SHORT_PREFIX_LENGTH = 8;
|
||||||
|
private static final int MAX_INDEXED_PREFIX_LENGTH = 16;
|
||||||
|
|
||||||
|
public int size() {
|
||||||
|
return words.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
// For sorting efficiency
|
||||||
|
private static class WordScorePair {
|
||||||
|
final String word;
|
||||||
|
final int score;
|
||||||
|
|
||||||
|
WordScorePair(String word, int score) {
|
||||||
|
this.word = word;
|
||||||
|
this.score = score;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a new PrefixTrie for typeahead search.
|
||||||
|
*/
|
||||||
|
public PrefixSearchStructure() {
|
||||||
|
prefixIndex = new HashMap<>(1024);
|
||||||
|
longPrefixIndex = new HashMap<>(1024);
|
||||||
|
words = new ArrayList<>(1024);
|
||||||
|
wordScores = new TIntArrayList(1024);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Adds a prefix to the index.
|
||||||
|
*/
|
||||||
|
private void indexPrefix(String word, int wordId) {
|
||||||
|
// Index short prefixes
|
||||||
|
for (int i = 1; i <= Math.min(word.length(), SHORT_PREFIX_LENGTH); i++) {
|
||||||
|
String prefix = word.substring(0, i);
|
||||||
|
TIntArrayList wordIds = prefixIndex.computeIfAbsent(
|
||||||
|
prefix, k -> new TIntArrayList(16));
|
||||||
|
wordIds.add(wordId);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Index longer prefixes
|
||||||
|
for (int i = SHORT_PREFIX_LENGTH + 1; i <= Math.min(word.length(), MAX_INDEXED_PREFIX_LENGTH); i++) {
|
||||||
|
String prefix = word.substring(0, i);
|
||||||
|
TIntArrayList wordIds = longPrefixIndex.computeIfAbsent(
|
||||||
|
prefix, k -> new TIntArrayList(8));
|
||||||
|
wordIds.add(wordId);
|
||||||
|
}
|
||||||
|
|
||||||
|
// If the word contains spaces, also index by each term for multi-word queries
|
||||||
|
if (word.contains(" ")) {
|
||||||
|
String[] terms = word.split("\\s+");
|
||||||
|
for (String term : terms) {
|
||||||
|
if (term.length() >= 2) {
|
||||||
|
for (int i = 1; i <= Math.min(term.length(), SHORT_PREFIX_LENGTH); i++) {
|
||||||
|
String termPrefix = "t:" + term.substring(0, i);
|
||||||
|
TIntArrayList wordIds = prefixIndex.computeIfAbsent(
|
||||||
|
termPrefix, k -> new TIntArrayList(16));
|
||||||
|
wordIds.add(wordId);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Inserts a word with its associated score.
|
||||||
|
*/
|
||||||
|
public void insert(String word, int score) {
|
||||||
|
if (word == null || word.isEmpty()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add to the word list and index
|
||||||
|
int wordId = words.size();
|
||||||
|
words.add(word);
|
||||||
|
wordScores.add(score);
|
||||||
|
indexPrefix(word, wordId);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the top k completions for a given prefix.
|
||||||
|
*/
|
||||||
|
public List<ScoredSuggestion> getTopCompletions(String prefix, int k) {
|
||||||
|
if (prefix == null || prefix.isEmpty()) {
|
||||||
|
// Return top k words by score
|
||||||
|
return getTopKWords(k);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if this is a term search (t:) - for searching within multi-word items
|
||||||
|
boolean isTermSearch = false;
|
||||||
|
if (prefix.startsWith("t:") && prefix.length() > 2) {
|
||||||
|
isTermSearch = true;
|
||||||
|
prefix = prefix.substring(2);
|
||||||
|
}
|
||||||
|
|
||||||
|
// 1. Fast path for short prefixes
|
||||||
|
if (prefix.length() <= SHORT_PREFIX_LENGTH) {
|
||||||
|
String lookupPrefix = isTermSearch ? "t:" + prefix : prefix;
|
||||||
|
TIntArrayList wordIds = prefixIndex.get(lookupPrefix);
|
||||||
|
if (wordIds != null) {
|
||||||
|
return getTopKFromWordIds(wordIds, k);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 2. Fast path for long prefixes (truncate to MAX_INDEXED_PREFIX_LENGTH)
|
||||||
|
if (prefix.length() > SHORT_PREFIX_LENGTH) {
|
||||||
|
// Try exact match in longPrefixIndex first
|
||||||
|
if (prefix.length() <= MAX_INDEXED_PREFIX_LENGTH) {
|
||||||
|
TIntArrayList wordIds = longPrefixIndex.get(prefix);
|
||||||
|
if (wordIds != null) {
|
||||||
|
return getTopKFromWordIds(wordIds, k);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If prefix is longer than MAX_INDEXED_PREFIX_LENGTH, truncate and filter
|
||||||
|
if (prefix.length() > MAX_INDEXED_PREFIX_LENGTH) {
|
||||||
|
String truncatedPrefix = prefix.substring(0, MAX_INDEXED_PREFIX_LENGTH);
|
||||||
|
TIntArrayList candidateIds = longPrefixIndex.get(truncatedPrefix);
|
||||||
|
if (candidateIds != null) {
|
||||||
|
// Filter candidates by the full prefix
|
||||||
|
return getFilteredTopKFromWordIds(candidateIds, prefix, k);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 3. Optimized fallback for long prefixes - use prefix tree for segments
|
||||||
|
List<ScoredSuggestion> results = new ArrayList<>();
|
||||||
|
|
||||||
|
// Handle multi-segment queries by finding candidates from first 8 chars
|
||||||
|
if (prefix.length() > SHORT_PREFIX_LENGTH) {
|
||||||
|
String shortPrefix = prefix.substring(0, Math.min(prefix.length(), SHORT_PREFIX_LENGTH));
|
||||||
|
TIntArrayList candidates = prefixIndex.get(shortPrefix);
|
||||||
|
|
||||||
|
if (candidates != null) {
|
||||||
|
return getFilteredTopKFromWordIds(candidates, prefix, k);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 4. Last resort - optimized binary search in sorted segments
|
||||||
|
return findByBinarySearchPrefix(prefix, k);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Helper to get the top k words by score.
|
||||||
|
*/
|
||||||
|
private List<ScoredSuggestion> getTopKWords(int k) {
|
||||||
|
// Create pairs of (score, wordId)
|
||||||
|
int[][] pairs = new int[words.size()][2];
|
||||||
|
for (int i = 0; i < words.size(); i++) {
|
||||||
|
pairs[i][0] = wordScores.get(i);
|
||||||
|
pairs[i][1] = i;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sort by score (descending)
|
||||||
|
Arrays.sort(pairs, (a, b) -> Integer.compare(b[0], a[0]));
|
||||||
|
|
||||||
|
// Take top k
|
||||||
|
List<ScoredSuggestion> results = new ArrayList<>();
|
||||||
|
for (int i = 0; i < Math.min(k, pairs.length); i++) {
|
||||||
|
String word = words.get(pairs[i][1]);
|
||||||
|
int score = pairs[i][0];
|
||||||
|
results.add(new ScoredSuggestion(word, score));
|
||||||
|
}
|
||||||
|
|
||||||
|
return results;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Helper to get the top k words from a list of word IDs.
|
||||||
|
*/
|
||||||
|
private List<ScoredSuggestion> getTopKFromWordIds(TIntArrayList wordIds, int k) {
|
||||||
|
if (wordIds == null || wordIds.isEmpty()) {
|
||||||
|
return Collections.emptyList();
|
||||||
|
}
|
||||||
|
|
||||||
|
// For small lists, avoid sorting
|
||||||
|
if (wordIds.size() <= k) {
|
||||||
|
List<ScoredSuggestion> results = new ArrayList<>(wordIds.size());
|
||||||
|
int[] ids = wordIds.toArray();
|
||||||
|
for (int wordId : ids) {
|
||||||
|
if (wordId >= 0 && wordId < words.size()) {
|
||||||
|
results.add(new ScoredSuggestion(words.get(wordId), wordScores.get(wordId)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
results.sort((a, b) -> Integer.compare(b.getScore(), a.getScore()));
|
||||||
|
return results;
|
||||||
|
}
|
||||||
|
|
||||||
|
// For larger lists, use an array-based approach for better performance
|
||||||
|
// Find top k without full sorting
|
||||||
|
int[] topScores = new int[k];
|
||||||
|
int[] topWordIds = new int[k];
|
||||||
|
int[] ids = wordIds.toArray();
|
||||||
|
|
||||||
|
// Initialize with first k elements
|
||||||
|
int filledCount = Math.min(k, ids.length);
|
||||||
|
for (int i = 0; i < filledCount; i++) {
|
||||||
|
int wordId = ids[i];
|
||||||
|
if (wordId >= 0 && wordId < words.size()) {
|
||||||
|
topWordIds[i] = wordId;
|
||||||
|
topScores[i] = wordScores.get(wordId);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sort initial elements
|
||||||
|
for (int i = 0; i < filledCount; i++) {
|
||||||
|
for (int j = i + 1; j < filledCount; j++) {
|
||||||
|
if (topScores[j] > topScores[i]) {
|
||||||
|
// Swap scores
|
||||||
|
int tempScore = topScores[i];
|
||||||
|
topScores[i] = topScores[j];
|
||||||
|
topScores[j] = tempScore;
|
||||||
|
|
||||||
|
// Swap word IDs
|
||||||
|
int tempId = topWordIds[i];
|
||||||
|
topWordIds[i] = topWordIds[j];
|
||||||
|
topWordIds[j] = tempId;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Process remaining elements
|
||||||
|
int minScore = filledCount > 0 ? topScores[filledCount - 1] : Integer.MIN_VALUE;
|
||||||
|
|
||||||
|
for (int i = k; i < ids.length; i++) {
|
||||||
|
int wordId = ids[i];
|
||||||
|
if (wordId >= 0 && wordId < words.size()) {
|
||||||
|
int score = wordScores.get(wordId);
|
||||||
|
|
||||||
|
if (score > minScore) {
|
||||||
|
// Replace the lowest element
|
||||||
|
topScores[filledCount - 1] = score;
|
||||||
|
topWordIds[filledCount - 1] = wordId;
|
||||||
|
|
||||||
|
// Bubble up the new element
|
||||||
|
for (int j = filledCount - 1; j > 0; j--) {
|
||||||
|
if (topScores[j] > topScores[j - 1]) {
|
||||||
|
// Swap scores
|
||||||
|
int tempScore = topScores[j];
|
||||||
|
topScores[j] = topScores[j - 1];
|
||||||
|
topScores[j - 1] = tempScore;
|
||||||
|
|
||||||
|
// Swap word IDs
|
||||||
|
int tempId = topWordIds[j];
|
||||||
|
topWordIds[j] = topWordIds[j - 1];
|
||||||
|
topWordIds[j - 1] = tempId;
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update min score
|
||||||
|
minScore = topScores[filledCount - 1];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create result list
|
||||||
|
List<ScoredSuggestion> results = new ArrayList<>(filledCount);
|
||||||
|
for (int i = 0; i < filledCount; i++) {
|
||||||
|
results.add(new ScoredSuggestion(words.get(topWordIds[i]), topScores[i]));
|
||||||
|
}
|
||||||
|
|
||||||
|
return results;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Use binary search on sorted word segments to efficiently find matches.
|
||||||
|
*/
|
||||||
|
private List<ScoredSuggestion> findByBinarySearchPrefix(String prefix, int k) {
|
||||||
|
// If we have a lot of words, use an optimized segment approach
|
||||||
|
if (words.size() > 1000) {
|
||||||
|
// Divide words into segments for better locality
|
||||||
|
int segmentSize = 1000;
|
||||||
|
int numSegments = (words.size() + segmentSize - 1) / segmentSize;
|
||||||
|
|
||||||
|
// Find matches using binary search within each segment
|
||||||
|
List<WordScorePair> allMatches = new ArrayList<>();
|
||||||
|
for (int segment = 0; segment < numSegments; segment++) {
|
||||||
|
int start = segment * segmentSize;
|
||||||
|
int end = Math.min(start + segmentSize, words.size());
|
||||||
|
|
||||||
|
// Binary search for first potential match
|
||||||
|
int pos = Collections.binarySearch(
|
||||||
|
words.subList(start, end),
|
||||||
|
prefix,
|
||||||
|
(a, b) -> a.compareTo(b)
|
||||||
|
);
|
||||||
|
|
||||||
|
if (pos < 0) {
|
||||||
|
pos = -pos - 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Collect all matches
|
||||||
|
for (int i = start + pos; i < end && i < words.size(); i++) {
|
||||||
|
String word = words.get(i);
|
||||||
|
if (word.startsWith(prefix)) {
|
||||||
|
allMatches.add(new WordScorePair(word, wordScores.get(i)));
|
||||||
|
} else if (word.compareTo(prefix) > 0) {
|
||||||
|
break; // Past potential matches
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sort by score and take top k
|
||||||
|
allMatches.sort((a, b) -> Integer.compare(b.score, a.score));
|
||||||
|
List<ScoredSuggestion> results = new ArrayList<>(Math.min(k, allMatches.size()));
|
||||||
|
for (int i = 0; i < Math.min(k, allMatches.size()); i++) {
|
||||||
|
WordScorePair pair = allMatches.get(i);
|
||||||
|
results.add(new ScoredSuggestion(pair.word, pair.score));
|
||||||
|
}
|
||||||
|
return results;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fallback for small dictionaries - linear scan but optimized
|
||||||
|
return simpleSearchFallback(prefix, k);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Optimized linear scan - only used for small dictionaries.
|
||||||
|
*/
|
||||||
|
private List<ScoredSuggestion> simpleSearchFallback(String prefix, int k) {
|
||||||
|
// Use primitive arrays for better cache locality
|
||||||
|
int[] matchScores = new int[Math.min(words.size(), 100)]; // Assume we won't find more than 100 matches
|
||||||
|
String[] matchWords = new String[matchScores.length];
|
||||||
|
int matchCount = 0;
|
||||||
|
|
||||||
|
for (int i = 0; i < words.size() && matchCount < matchScores.length; i++) {
|
||||||
|
String word = words.get(i);
|
||||||
|
if (word.startsWith(prefix)) {
|
||||||
|
matchWords[matchCount] = word;
|
||||||
|
matchScores[matchCount] = wordScores.get(i);
|
||||||
|
matchCount++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sort matches by score (in-place for small arrays)
|
||||||
|
for (int i = 0; i < matchCount; i++) {
|
||||||
|
for (int j = i + 1; j < matchCount; j++) {
|
||||||
|
if (matchScores[j] > matchScores[i]) {
|
||||||
|
// Swap scores
|
||||||
|
int tempScore = matchScores[i];
|
||||||
|
matchScores[i] = matchScores[j];
|
||||||
|
matchScores[j] = tempScore;
|
||||||
|
|
||||||
|
// Swap words
|
||||||
|
String tempWord = matchWords[i];
|
||||||
|
matchWords[i] = matchWords[j];
|
||||||
|
matchWords[j] = tempWord;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create results
|
||||||
|
List<ScoredSuggestion> results = new ArrayList<>(Math.min(k, matchCount));
|
||||||
|
for (int i = 0; i < Math.min(k, matchCount); i++) {
|
||||||
|
results.add(new ScoredSuggestion(matchWords[i], matchScores[i]));
|
||||||
|
}
|
||||||
|
|
||||||
|
return results;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get top k words from candidate IDs, filtering by the full prefix.
|
||||||
|
*/
|
||||||
|
private List<ScoredSuggestion> getFilteredTopKFromWordIds(TIntArrayList wordIds, String fullPrefix, int k) {
|
||||||
|
if (wordIds == null || wordIds.isEmpty()) {
|
||||||
|
return Collections.emptyList();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Make primitive arrays for better performance
|
||||||
|
String[] matchWords = new String[Math.min(wordIds.size(), 1000)];
|
||||||
|
int[] matchScores = new int[matchWords.length];
|
||||||
|
int matchCount = 0;
|
||||||
|
|
||||||
|
int[] ids = wordIds.toArray();
|
||||||
|
for (int i = 0; i < ids.length && matchCount < matchWords.length; i++) {
|
||||||
|
int wordId = ids[i];
|
||||||
|
if (wordId >= 0 && wordId < words.size()) {
|
||||||
|
String word = words.get(wordId);
|
||||||
|
if (word.startsWith(fullPrefix)) {
|
||||||
|
matchWords[matchCount] = word;
|
||||||
|
matchScores[matchCount] = wordScores.get(wordId);
|
||||||
|
matchCount++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sort by score (efficient insertion sort for small k)
|
||||||
|
for (int i = 0; i < Math.min(matchCount, k); i++) {
|
||||||
|
int maxPos = i;
|
||||||
|
for (int j = i + 1; j < matchCount; j++) {
|
||||||
|
if (matchScores[j] > matchScores[maxPos]) {
|
||||||
|
maxPos = j;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (maxPos != i) {
|
||||||
|
// Swap
|
||||||
|
int tempScore = matchScores[i];
|
||||||
|
matchScores[i] = matchScores[maxPos];
|
||||||
|
matchScores[maxPos] = tempScore;
|
||||||
|
|
||||||
|
String tempWord = matchWords[i];
|
||||||
|
matchWords[i] = matchWords[maxPos];
|
||||||
|
matchWords[maxPos] = tempWord;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create result list (only up to k elements)
|
||||||
|
List<ScoredSuggestion> results = new ArrayList<>(Math.min(k, matchCount));
|
||||||
|
for (int i = 0; i < Math.min(k, matchCount); i++) {
|
||||||
|
results.add(new ScoredSuggestion(matchWords[i], matchScores[i]));
|
||||||
|
}
|
||||||
|
|
||||||
|
return results;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Class representing a suggested completion.
|
||||||
|
*/
|
||||||
|
public static class ScoredSuggestion implements Comparable<ScoredSuggestion> {
|
||||||
|
private final String word;
|
||||||
|
private final int score;
|
||||||
|
|
||||||
|
public ScoredSuggestion(String word, int score) {
|
||||||
|
this.word = word;
|
||||||
|
this.score = score;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getWord() {
|
||||||
|
return word;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getScore() {
|
||||||
|
return score;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return word + " (" + score + ")";
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int compareTo(@NotNull PrefixSearchStructure.ScoredSuggestion o) {
|
||||||
|
return Integer.compare(this.score, o.score);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@@ -2,74 +2,89 @@ package nu.marginalia.assistant.suggest;
|
|||||||
|
|
||||||
import com.google.inject.Inject;
|
import com.google.inject.Inject;
|
||||||
import com.google.inject.name.Named;
|
import com.google.inject.name.Named;
|
||||||
import nu.marginalia.functions.math.dict.SpellChecker;
|
|
||||||
import nu.marginalia.term_frequency_dict.TermFrequencyDict;
|
|
||||||
import nu.marginalia.model.crawl.HtmlFeature;
|
|
||||||
import org.apache.commons.collections4.trie.PatriciaTrie;
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import java.io.BufferedInputStream;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.nio.file.Files;
|
import java.nio.file.Files;
|
||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
|
import java.nio.file.StandardOpenOption;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
import java.util.function.Supplier;
|
import java.util.zip.GZIPInputStream;
|
||||||
import java.util.regex.Pattern;
|
|
||||||
import java.util.stream.Collectors;
|
|
||||||
import java.util.stream.Stream;
|
|
||||||
|
|
||||||
public class Suggestions {
|
public class Suggestions {
|
||||||
private PatriciaTrie<String> suggestionsTrie = null;
|
List<PrefixSearchStructure> searchStructures = new ArrayList<>();
|
||||||
private TermFrequencyDict termFrequencyDict = null;
|
|
||||||
private volatile boolean ready = false;
|
private volatile boolean ready = false;
|
||||||
private final SpellChecker spellChecker;
|
|
||||||
|
|
||||||
private static final Pattern suggestionPattern = Pattern.compile("^[a-zA-Z0-9]+( [a-zA-Z0-9]+)*$");
|
|
||||||
private static final Logger logger = LoggerFactory.getLogger(Suggestions.class);
|
private static final Logger logger = LoggerFactory.getLogger(Suggestions.class);
|
||||||
|
|
||||||
private static final int MIN_SUGGEST_LENGTH = 3;
|
private static final int MIN_SUGGEST_LENGTH = 3;
|
||||||
@Inject
|
@Inject
|
||||||
public Suggestions(@Named("suggestions-file") Path suggestionsFile,
|
public Suggestions(@Named("suggestions-file1") Path suggestionsFile1,
|
||||||
SpellChecker spellChecker,
|
@Named("suggestions-file2") Path suggestionsFile2
|
||||||
TermFrequencyDict dict
|
|
||||||
) {
|
) {
|
||||||
this.spellChecker = spellChecker;
|
|
||||||
|
|
||||||
Thread.ofPlatform().start(() -> {
|
Thread.ofPlatform().start(() -> {
|
||||||
suggestionsTrie = loadSuggestions(suggestionsFile);
|
searchStructures.add(loadSuggestions(suggestionsFile1));
|
||||||
termFrequencyDict = dict;
|
searchStructures.add(loadSuggestions(suggestionsFile2));
|
||||||
ready = true;
|
ready = true;
|
||||||
logger.info("Loaded {} suggestions", suggestionsTrie.size());
|
logger.info("Loaded suggestions");
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
private static PatriciaTrie<String> loadSuggestions(Path file) {
|
private static PrefixSearchStructure loadSuggestions(Path file) {
|
||||||
|
PrefixSearchStructure ret = new PrefixSearchStructure();
|
||||||
|
|
||||||
if (!Files.exists(file)) {
|
if (!Files.exists(file)) {
|
||||||
logger.error("Suggestions file {} absent, loading empty suggestions db", file);
|
logger.error("Suggestions file {} absent, loading empty suggestions db", file);
|
||||||
return new PatriciaTrie<>();
|
return ret;
|
||||||
}
|
}
|
||||||
try (var lines = Files.lines(file)) {
|
|
||||||
var ret = new PatriciaTrie<String>();
|
|
||||||
|
|
||||||
lines.filter(suggestionPattern.asPredicate())
|
try (var scanner = new Scanner(new GZIPInputStream(new BufferedInputStream(Files.newInputStream(file, StandardOpenOption.READ))))) {
|
||||||
.filter(line -> line.length()<32)
|
while (scanner.hasNextLine()) {
|
||||||
.map(String::toLowerCase)
|
String line = scanner.nextLine().trim();
|
||||||
.forEach(w -> ret.put(w, w));
|
String[] parts = StringUtils.split(line, " ,", 2);
|
||||||
|
if (parts.length != 2) {
|
||||||
|
logger.warn("Invalid suggestion line: {}", line);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
int cnt = Integer.parseInt(parts[0]);
|
||||||
|
if (cnt > 1) {
|
||||||
|
String word = parts[1];
|
||||||
|
|
||||||
// Add special keywords to the suggestions
|
// Remove quotes and trailing periods if this is a CSV
|
||||||
for (var feature : HtmlFeature.values()) {
|
if (word.startsWith("\"") && word.endsWith("\"")) {
|
||||||
String keyword = feature.getKeyword();
|
word = word.substring(1, word.length() - 1);
|
||||||
|
}
|
||||||
|
|
||||||
ret.put(keyword, keyword);
|
// Remove trailing periods
|
||||||
ret.put("-" + keyword, "-" + keyword);
|
while (word.endsWith(".")) {
|
||||||
|
word = word.substring(0, word.length() - 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Remove junk items we may have gotten from link extraction
|
||||||
|
if (word.startsWith("click here"))
|
||||||
|
continue;
|
||||||
|
if (word.contains("new window"))
|
||||||
|
continue;
|
||||||
|
if (word.contains("click to"))
|
||||||
|
continue;
|
||||||
|
if (word.startsWith("share "))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (word.length() > 3) {
|
||||||
|
ret.insert(word, cnt);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
catch (IOException ex) {
|
catch (IOException ex) {
|
||||||
logger.error("Failed to load suggestions file", ex);
|
logger.error("Failed to load suggestions file", ex);
|
||||||
return new PatriciaTrie<>();
|
return new PrefixSearchStructure();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -83,96 +98,36 @@ public class Suggestions {
|
|||||||
|
|
||||||
searchWord = StringUtils.stripStart(searchWord.toLowerCase(), " ");
|
searchWord = StringUtils.stripStart(searchWord.toLowerCase(), " ");
|
||||||
|
|
||||||
return Stream.of(
|
return getSuggestionsForKeyword(count, searchWord);
|
||||||
new SuggestionStream("", getSuggestionsForKeyword(count, searchWord)),
|
|
||||||
suggestionsForLastWord(count, searchWord),
|
|
||||||
spellCheckStream(searchWord)
|
|
||||||
)
|
|
||||||
.flatMap(SuggestionsStreamable::stream)
|
|
||||||
.limit(count)
|
|
||||||
.collect(Collectors.toList());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private SuggestionsStreamable suggestionsForLastWord(int count, String searchWord) {
|
public List<String> getSuggestionsForKeyword(int count, String prefix) {
|
||||||
int sp = searchWord.lastIndexOf(' ');
|
|
||||||
|
|
||||||
if (sp < 0) {
|
|
||||||
return Stream::empty;
|
|
||||||
}
|
|
||||||
|
|
||||||
String prefixString = searchWord.substring(0, sp+1);
|
|
||||||
String suggestString = searchWord.substring(sp+1);
|
|
||||||
|
|
||||||
return new SuggestionStream(prefixString, getSuggestionsForKeyword(count, suggestString));
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
private SuggestionsStreamable spellCheckStream(String word) {
|
|
||||||
int start = word.lastIndexOf(' ');
|
|
||||||
String prefix;
|
|
||||||
String corrWord;
|
|
||||||
|
|
||||||
if (start < 0) {
|
|
||||||
corrWord = word;
|
|
||||||
prefix = "";
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
prefix = word.substring(0, start + 1);
|
|
||||||
corrWord = word.substring(start + 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (corrWord.length() >= MIN_SUGGEST_LENGTH) {
|
|
||||||
Supplier<Stream<String>> suggestionsLazyEval = () -> spellChecker.correct(corrWord).stream();
|
|
||||||
return new SuggestionStream(prefix, Stream.of(suggestionsLazyEval).flatMap(Supplier::get));
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
return Stream::empty;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
public Stream<String> getSuggestionsForKeyword(int count, String prefix) {
|
|
||||||
if (!ready)
|
if (!ready)
|
||||||
return Stream.empty();
|
return List.of();
|
||||||
|
|
||||||
if (prefix.length() < MIN_SUGGEST_LENGTH) {
|
if (prefix.length() < MIN_SUGGEST_LENGTH) {
|
||||||
return Stream.empty();
|
return List.of();
|
||||||
}
|
}
|
||||||
|
|
||||||
var start = suggestionsTrie.select(prefix);
|
List<PrefixSearchStructure.ScoredSuggestion> resultsAll = new ArrayList<>();
|
||||||
|
|
||||||
if (start == null) {
|
for (var searchStructure : searchStructures) {
|
||||||
return Stream.empty();
|
resultsAll.addAll(searchStructure.getTopCompletions(prefix, count));
|
||||||
|
}
|
||||||
|
resultsAll.sort(Comparator.reverseOrder());
|
||||||
|
List<String> ret = new ArrayList<>(count);
|
||||||
|
|
||||||
|
Set<String> seen = new HashSet<>();
|
||||||
|
for (var result : resultsAll) {
|
||||||
|
if (seen.add(result.getWord())) {
|
||||||
|
ret.add(result.getWord());
|
||||||
|
}
|
||||||
|
if (ret.size() >= count) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!start.getKey().startsWith(prefix)) {
|
return ret;
|
||||||
return Stream.empty();
|
|
||||||
}
|
|
||||||
|
|
||||||
SuggestionsValueCalculator sv = new SuggestionsValueCalculator();
|
|
||||||
|
|
||||||
return Stream.iterate(start.getKey(), Objects::nonNull, suggestionsTrie::nextKey)
|
|
||||||
.takeWhile(s -> s.startsWith(prefix))
|
|
||||||
.limit(256)
|
|
||||||
.sorted(Comparator.comparing(sv::get).thenComparing(String::length).thenComparing(Comparator.naturalOrder()))
|
|
||||||
.limit(count);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private record SuggestionStream(String prefix, Stream<String> suggestionStream) implements SuggestionsStreamable {
|
|
||||||
public Stream<String> stream() {
|
|
||||||
return suggestionStream.map(s -> prefix + s);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
interface SuggestionsStreamable { Stream<String> stream(); }
|
|
||||||
|
|
||||||
private class SuggestionsValueCalculator {
|
|
||||||
|
|
||||||
private final Map<String, Long> hashCache = new HashMap<>(512);
|
|
||||||
|
|
||||||
public int get(String s) {
|
|
||||||
long hash = hashCache.computeIfAbsent(s, TermFrequencyDict::getStringHash);
|
|
||||||
return -termFrequencyDict.getTermFreqHash(hash);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
@@ -2,7 +2,7 @@ plugins {
|
|||||||
id 'java'
|
id 'java'
|
||||||
id 'application'
|
id 'application'
|
||||||
id 'jvm-test-suite'
|
id 'jvm-test-suite'
|
||||||
id 'com.google.cloud.tools.jib' version '3.4.4'
|
id 'com.google.cloud.tools.jib' version '3.4.5'
|
||||||
}
|
}
|
||||||
|
|
||||||
java {
|
java {
|
||||||
|
@@ -59,9 +59,14 @@ public class ControlMain extends MainClass {
|
|||||||
download(adblockFile, new URI("https://downloads.marginalia.nu/data/adblock.txt"));
|
download(adblockFile, new URI("https://downloads.marginalia.nu/data/adblock.txt"));
|
||||||
}
|
}
|
||||||
|
|
||||||
Path suggestionsFile = dataPath.resolve("suggestions.txt");
|
Path suggestionsFile = dataPath.resolve("suggestions2.txt.gz");
|
||||||
if (!Files.exists(suggestionsFile)) {
|
if (!Files.exists(suggestionsFile)) {
|
||||||
downloadGzipped(suggestionsFile, new URI("https://downloads.marginalia.nu/data/suggestions.txt.gz"));
|
download(suggestionsFile, new URI("https://downloads.marginalia.nu/data/suggestions2.txt.gz"));
|
||||||
|
}
|
||||||
|
|
||||||
|
Path altSuggestionsFile = dataPath.resolve("suggestions3.txt.gz");
|
||||||
|
if (!Files.exists(altSuggestionsFile)) {
|
||||||
|
download(altSuggestionsFile, new URI("https://downloads.marginalia.nu/data/suggestions3.txt.gz"));
|
||||||
}
|
}
|
||||||
|
|
||||||
Path asnRawData = dataPath.resolve("asn-data-raw-table");
|
Path asnRawData = dataPath.resolve("asn-data-raw-table");
|
||||||
|
@@ -321,9 +321,10 @@ public class ControlNodeActionsService {
|
|||||||
private Object exportSampleData(Request req, Response rsp) {
|
private Object exportSampleData(Request req, Response rsp) {
|
||||||
FileStorageId source = parseSourceFileStorageId(req.queryParams("source"));
|
FileStorageId source = parseSourceFileStorageId(req.queryParams("source"));
|
||||||
int size = Integer.parseInt(req.queryParams("size"));
|
int size = Integer.parseInt(req.queryParams("size"));
|
||||||
|
String ctFilter = req.queryParams("ctFilter");
|
||||||
String name = req.queryParams("name");
|
String name = req.queryParams("name");
|
||||||
|
|
||||||
exportClient.exportSampleData(Integer.parseInt(req.params("id")), source, size, name);
|
exportClient.exportSampleData(Integer.parseInt(req.params("id")), source, size, ctFilter, name);
|
||||||
|
|
||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
|
@@ -35,6 +35,11 @@
|
|||||||
<div><input type="text" name="size" id="size" pattern="\d+" /></div>
|
<div><input type="text" name="size" id="size" pattern="\d+" /></div>
|
||||||
<small class="text-muted">How many domains to include in the sample set</small>
|
<small class="text-muted">How many domains to include in the sample set</small>
|
||||||
</div>
|
</div>
|
||||||
|
<div class="mb-3">
|
||||||
|
<label for="ctFilter">Content Type Filter</label>
|
||||||
|
<div><input type="text" name="ctFilter" id="ctFilter" /></div>
|
||||||
|
<small class="text-muted">If set, includes only documents with the specified content type value</small>
|
||||||
|
</div>
|
||||||
<div class="mb-3">
|
<div class="mb-3">
|
||||||
<label for="name">Name</label>
|
<label for="name">Name</label>
|
||||||
<div><input type="text" name="name" id="name" /></div>
|
<div><input type="text" name="name" id="name" /></div>
|
||||||
|
@@ -3,7 +3,7 @@ plugins {
|
|||||||
|
|
||||||
id 'application'
|
id 'application'
|
||||||
id 'jvm-test-suite'
|
id 'jvm-test-suite'
|
||||||
id 'com.google.cloud.tools.jib' version '3.4.4'
|
id 'com.google.cloud.tools.jib' version '3.4.5'
|
||||||
}
|
}
|
||||||
|
|
||||||
application {
|
application {
|
||||||
|
@@ -3,7 +3,7 @@ plugins {
|
|||||||
|
|
||||||
id 'application'
|
id 'application'
|
||||||
id 'jvm-test-suite'
|
id 'jvm-test-suite'
|
||||||
id 'com.google.cloud.tools.jib' version '3.4.4'
|
id 'com.google.cloud.tools.jib' version '3.4.5'
|
||||||
}
|
}
|
||||||
|
|
||||||
application {
|
application {
|
||||||
|
@@ -3,7 +3,7 @@ plugins {
|
|||||||
|
|
||||||
id 'application'
|
id 'application'
|
||||||
id 'jvm-test-suite'
|
id 'jvm-test-suite'
|
||||||
id 'com.google.cloud.tools.jib' version '3.4.4'
|
id 'com.google.cloud.tools.jib' version '3.4.5'
|
||||||
}
|
}
|
||||||
|
|
||||||
application {
|
application {
|
||||||
|
@@ -3,7 +3,7 @@ plugins {
|
|||||||
|
|
||||||
id 'application'
|
id 'application'
|
||||||
id 'jvm-test-suite'
|
id 'jvm-test-suite'
|
||||||
id 'com.google.cloud.tools.jib' version '3.4.4'
|
id 'com.google.cloud.tools.jib' version '3.4.5'
|
||||||
}
|
}
|
||||||
|
|
||||||
java {
|
java {
|
||||||
|
@@ -1,4 +1,6 @@
|
|||||||
## This is a token file for automatic deployment
|
## This is a token file for triggering automatic deployment when no commit is made.
|
||||||
|
|
||||||
2025-01-08: Deploy executor.
|
2025-01-08: Deploy executor.
|
||||||
2025-01-07: Deploy executor.
|
2025-01-07: Deploy executor.
|
||||||
|
2025-04-24: Deploy executor.
|
||||||
|
2025-04-24: Deploy assistant.
|
2
gradle/wrapper/gradle-wrapper.properties
vendored
2
gradle/wrapper/gradle-wrapper.properties
vendored
@@ -1,5 +1,5 @@
|
|||||||
distributionBase=GRADLE_USER_HOME
|
distributionBase=GRADLE_USER_HOME
|
||||||
distributionPath=wrapper/dists
|
distributionPath=wrapper/dists
|
||||||
distributionUrl=https\://services.gradle.org/distributions/gradle-8.10-bin.zip
|
distributionUrl=https\://services.gradle.org/distributions/gradle-8.14-bin.zip
|
||||||
zipStoreBase=GRADLE_USER_HOME
|
zipStoreBase=GRADLE_USER_HOME
|
||||||
zipStorePath=wrapper/dists
|
zipStorePath=wrapper/dists
|
||||||
|
Reference in New Issue
Block a user