mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-10-06 07:32:38 +02:00
Compare commits
10 Commits
deploy-016
...
deploy-017
Author | SHA1 | Date | |
---|---|---|---|
|
83967e3305 | ||
|
4db980a291 | ||
|
089b177868 | ||
|
9c8e9a68d5 | ||
|
413d5cc788 | ||
|
58539b92ac | ||
|
fe72f16df1 | ||
|
b49a244a2e | ||
|
3f0b4c010f | ||
|
c6e0cd93f7 |
@@ -121,6 +121,28 @@ public class EdgeUrl implements Serializable {
|
|||||||
return sb.toString();
|
return sb.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public String toDisplayString() {
|
||||||
|
StringBuilder sb = new StringBuilder(256);
|
||||||
|
|
||||||
|
sb.append(proto);
|
||||||
|
sb.append("://");
|
||||||
|
sb.append(domain);
|
||||||
|
|
||||||
|
if (port != null) {
|
||||||
|
sb.append(':');
|
||||||
|
sb.append(port);
|
||||||
|
}
|
||||||
|
|
||||||
|
sb.append(path);
|
||||||
|
|
||||||
|
if (param != null) {
|
||||||
|
sb.append('?').append(param);
|
||||||
|
}
|
||||||
|
|
||||||
|
return sb.toString();
|
||||||
|
}
|
||||||
|
|
||||||
public String dir() {
|
public String dir() {
|
||||||
return path.replaceAll("/[^/]+$", "/");
|
return path.replaceAll("/[^/]+$", "/");
|
||||||
}
|
}
|
||||||
@@ -303,7 +325,14 @@ class EdgeUriFactory {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (shouldUrlEncode) {
|
if (shouldUrlEncode) {
|
||||||
sb.append(URLEncoder.encode(queryPart, StandardCharsets.UTF_8));
|
int idx = queryPart.indexOf('=');
|
||||||
|
if (idx < 0) {
|
||||||
|
sb.append(URLEncoder.encode(queryPart, StandardCharsets.UTF_8));
|
||||||
|
} else {
|
||||||
|
sb.append(URLEncoder.encode(queryPart.substring(0, idx), StandardCharsets.UTF_8));
|
||||||
|
sb.append('=');
|
||||||
|
sb.append(URLEncoder.encode(queryPart.substring(idx + 1), StandardCharsets.UTF_8));
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
sb.append(queryPart);
|
sb.append(queryPart);
|
||||||
}
|
}
|
||||||
|
@@ -58,16 +58,32 @@ class EdgeUrlTest {
|
|||||||
Assertions.assertEquals("/wiki/Sámi", EdgeUriFactory.parseURILenient("https://en.wikipedia.org/wiki/Sámi").getPath());
|
Assertions.assertEquals("/wiki/Sámi", EdgeUriFactory.parseURILenient("https://en.wikipedia.org/wiki/Sámi").getPath());
|
||||||
Assertions.assertEquals("https://en.wikipedia.org/wiki/S%C3%A1mi", EdgeUriFactory.parseURILenient("https://en.wikipedia.org/wiki/Sámi").toString());
|
Assertions.assertEquals("https://en.wikipedia.org/wiki/S%C3%A1mi", EdgeUriFactory.parseURILenient("https://en.wikipedia.org/wiki/Sámi").toString());
|
||||||
Assertions.assertEquals("https://en.wikipedia.org/wiki/S%C3%A1mi", new EdgeUrl("https://en.wikipedia.org/wiki/Sámi").toString());
|
Assertions.assertEquals("https://en.wikipedia.org/wiki/S%C3%A1mi", new EdgeUrl("https://en.wikipedia.org/wiki/Sámi").toString());
|
||||||
|
|
||||||
|
Assertions.assertEquals("https://www.prijatelji-zivotinja.hr/index.en.php?id=2301k", new EdgeUrl("https://www.prijatelji-zivotinja.hr/index.en.php?id=2301k").toString());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
void testParms() throws URISyntaxException {
|
void testParms() throws URISyntaxException {
|
||||||
Assertions.assertEquals("id=123", new EdgeUrl("https://search.marginalia.nu/?id=123").param);
|
Assertions.assertEquals("id=123", new EdgeUrl("https://search.marginalia.nu/?id=123").param);
|
||||||
|
Assertions.assertEquals("https://search.marginalia.nu/?id=123", new EdgeUrl("https://search.marginalia.nu/?id=123").toString());
|
||||||
|
|
||||||
Assertions.assertEquals("t=123", new EdgeUrl("https://search.marginalia.nu/?t=123").param);
|
Assertions.assertEquals("t=123", new EdgeUrl("https://search.marginalia.nu/?t=123").param);
|
||||||
|
Assertions.assertEquals("https://search.marginalia.nu/?t=123", new EdgeUrl("https://search.marginalia.nu/?t=123").toString());
|
||||||
|
|
||||||
Assertions.assertEquals("v=123", new EdgeUrl("https://search.marginalia.nu/?v=123").param);
|
Assertions.assertEquals("v=123", new EdgeUrl("https://search.marginalia.nu/?v=123").param);
|
||||||
|
Assertions.assertEquals("https://search.marginalia.nu/?v=123", new EdgeUrl("https://search.marginalia.nu/?v=123").toString());
|
||||||
|
|
||||||
Assertions.assertEquals("id=1", new EdgeUrl("https://memex.marginalia.nu/showthread.php?id=1&count=5&tracking=123").param);
|
Assertions.assertEquals("id=1", new EdgeUrl("https://memex.marginalia.nu/showthread.php?id=1&count=5&tracking=123").param);
|
||||||
|
Assertions.assertEquals("https://memex.marginalia.nu/showthread.php?id=1",
|
||||||
|
new EdgeUrl("https://memex.marginalia.nu/showthread.php?id=1&count=5&tracking=123").toString());
|
||||||
|
|
||||||
|
|
||||||
Assertions.assertEquals("id=1&t=5", new EdgeUrl("https://memex.marginalia.nu/shöwthrëad.php?id=1&t=5&tracking=123").param);
|
Assertions.assertEquals("id=1&t=5", new EdgeUrl("https://memex.marginalia.nu/shöwthrëad.php?id=1&t=5&tracking=123").param);
|
||||||
|
Assertions.assertEquals("https://memex.marginalia.nu/sh%C3%B6wthr%C3%ABad.php?id=1&t=5", new EdgeUrl("https://memex.marginalia.nu/shöwthrëad.php?id=1&t=5&tracking=123").toString());
|
||||||
|
|
||||||
Assertions.assertEquals("id=1&t=5", new EdgeUrl("https://memex.marginalia.nu/shöwthrëad.php?trëaking=123&id=1&t=5&").param);
|
Assertions.assertEquals("id=1&t=5", new EdgeUrl("https://memex.marginalia.nu/shöwthrëad.php?trëaking=123&id=1&t=5&").param);
|
||||||
|
Assertions.assertEquals("https://memex.marginalia.nu/sh%C3%B6wthr%C3%ABad.php?id=1&t=5", new EdgeUrl("https://memex.marginalia.nu/shöwthrëad.php?trëaking=123&id=1&t=5&").toString());
|
||||||
|
|
||||||
Assertions.assertNull(new EdgeUrl("https://search.marginalia.nu/?m=123").param);
|
Assertions.assertNull(new EdgeUrl("https://search.marginalia.nu/?m=123").param);
|
||||||
Assertions.assertNull(new EdgeUrl("https://search.marginalia.nu/?follow=123").param);
|
Assertions.assertNull(new EdgeUrl("https://search.marginalia.nu/?follow=123").param);
|
||||||
}
|
}
|
||||||
|
@@ -122,6 +122,11 @@ public class JoobyService {
|
|||||||
// single digit percentage difference since HTML already compresses very well with level = 1.
|
// single digit percentage difference since HTML already compresses very well with level = 1.
|
||||||
options.setCompressionLevel(1);
|
options.setCompressionLevel(1);
|
||||||
|
|
||||||
|
// Set a cap on the number of worker threads, as Jooby's default value does not seem to consider
|
||||||
|
// multi-tenant servers with high thread counts, and spins up an exorbitant number of threads in that
|
||||||
|
// scenario
|
||||||
|
options.setWorkerThreads(Math.min(128, options.getWorkerThreads()));
|
||||||
|
|
||||||
|
|
||||||
jooby.setServerOptions(options);
|
jooby.setServerOptions(options);
|
||||||
|
|
||||||
|
@@ -59,12 +59,6 @@ public class SampleDataExporter {
|
|||||||
Path newCrawlerLogFile = Files.createTempFile(destStorage.asPath(), "crawler", ".log",
|
Path newCrawlerLogFile = Files.createTempFile(destStorage.asPath(), "crawler", ".log",
|
||||||
PosixFilePermissions.asFileAttribute(PosixFilePermissions.fromString("rw-r--r--")));
|
PosixFilePermissions.asFileAttribute(PosixFilePermissions.fromString("rw-r--r--")));
|
||||||
|
|
||||||
try (var bw = Files.newBufferedWriter(newCrawlerLogFile, StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING)) {
|
|
||||||
for (var item : entriesAll) {
|
|
||||||
bw.write(item.id() + " " + item.ts() + " " + item.relPath() + " " + item.cnt() + "\n");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Path newManifestJsonFile = Files.createTempFile(destStorage.asPath(), "manifest", ".json",
|
Path newManifestJsonFile = Files.createTempFile(destStorage.asPath(), "manifest", ".json",
|
||||||
PosixFilePermissions.asFileAttribute(PosixFilePermissions.fromString("rw-r--r--")));
|
PosixFilePermissions.asFileAttribute(PosixFilePermissions.fromString("rw-r--r--")));
|
||||||
Files.writeString(newManifestJsonFile, " { \"description\": \"" + name.replace("[\"\\]", "_") + "\",\n \"type\": \"CRAWL_DATA\" }\n");
|
Files.writeString(newManifestJsonFile, " { \"description\": \"" + name.replace("[\"\\]", "_") + "\",\n \"type\": \"CRAWL_DATA\" }\n");
|
||||||
@@ -72,24 +66,27 @@ public class SampleDataExporter {
|
|||||||
var tmpTarFile = Files.createTempFile(destStorage.asPath(), "data", ".tar",
|
var tmpTarFile = Files.createTempFile(destStorage.asPath(), "data", ".tar",
|
||||||
PosixFilePermissions.asFileAttribute(PosixFilePermissions.fromString("rw-r--r--")));
|
PosixFilePermissions.asFileAttribute(PosixFilePermissions.fromString("rw-r--r--")));
|
||||||
|
|
||||||
try (var stream = new TarArchiveOutputStream(Files.newOutputStream(tmpTarFile, StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING))) {
|
try (var stream = new TarArchiveOutputStream(Files.newOutputStream(tmpTarFile, StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING));
|
||||||
|
var logWriter = Files.newBufferedWriter(newCrawlerLogFile, StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING)
|
||||||
|
) {
|
||||||
for (var item : entriesAll) {
|
for (var item : entriesAll) {
|
||||||
Path crawlDataPath = inputDir.resolve(item.relPath());
|
Path crawlDataPath = inputDir.resolve(item.relPath());
|
||||||
if (!Files.exists(crawlDataPath)) continue;
|
if (!Files.exists(crawlDataPath)) continue;
|
||||||
|
|
||||||
if (StringUtils.isBlank(ctFilter)) {
|
if (StringUtils.isBlank(ctFilter)) {
|
||||||
addFileToTar(stream, crawlDataPath, item.relPath());
|
addFileToTar(stream, crawlDataPath, item.relPath());
|
||||||
|
logWriter.write(item.id() + " " + item.ts() + " " + item.relPath() + " " + item.cnt() + "\n");
|
||||||
}
|
}
|
||||||
else /* filter != null */ {
|
else /* filter != null */ {
|
||||||
boolean didFilterData = false;
|
Path filteredData = null;
|
||||||
try {
|
try {
|
||||||
crawlDataPath = filterEntries(crawlDataPath, ctFilter);
|
filteredData = filterEntries(crawlDataPath, ctFilter);
|
||||||
didFilterData = true;
|
addFileToTar(stream, filteredData, item.relPath());
|
||||||
addFileToTar(stream, crawlDataPath, item.relPath());
|
logWriter.write(item.id() + " " + item.ts() + " " + item.relPath() + " " + item.cnt() + "\n");
|
||||||
}
|
}
|
||||||
finally {
|
finally {
|
||||||
if (didFilterData) {
|
if (filteredData != null) {
|
||||||
Files.deleteIfExists(crawlDataPath);
|
Files.deleteIfExists(filteredData);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -106,11 +103,7 @@ public class SampleDataExporter {
|
|||||||
Files.move(tmpTarFile, destStorage.asPath().resolve("crawl-data.tar"), StandardCopyOption.ATOMIC_MOVE, StandardCopyOption.REPLACE_EXISTING);
|
Files.move(tmpTarFile, destStorage.asPath().resolve("crawl-data.tar"), StandardCopyOption.ATOMIC_MOVE, StandardCopyOption.REPLACE_EXISTING);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Filters the entries in the crawl data file based on the content type.
|
/** Filters the entries in the crawl data file based on the content type. */
|
||||||
* @param crawlDataPath The path to the crawl data file.
|
|
||||||
* @param contentTypeFilter The content type to filter by.
|
|
||||||
* @return The path to the filtered crawl data file, or null if an error occurred.
|
|
||||||
*/
|
|
||||||
private Path filterEntries(Path crawlDataPath, String contentTypeFilter) throws IOException {
|
private Path filterEntries(Path crawlDataPath, String contentTypeFilter) throws IOException {
|
||||||
Path tempDir = crawlDataPath.resolveSibling(crawlDataPath.getFileName() + ".filtered");
|
Path tempDir = crawlDataPath.resolveSibling(crawlDataPath.getFileName() + ".filtered");
|
||||||
Path tempFile = crawlDataPath.resolveSibling(crawlDataPath.getFileName() + ".filtered.slop.zip");
|
Path tempFile = crawlDataPath.resolveSibling(crawlDataPath.getFileName() + ".filtered.slop.zip");
|
||||||
@@ -132,8 +125,16 @@ public class SampleDataExporter {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
) {
|
) {
|
||||||
|
boolean wroteEntry = false;
|
||||||
while (reader.hasRemaining()) {
|
while (reader.hasRemaining()) {
|
||||||
writer.write(reader.get());
|
var entry = reader.get();
|
||||||
|
writer.write(entry);
|
||||||
|
|
||||||
|
wroteEntry = wroteEntry || contentTypeFilter.equals(entry.contentType());
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!wroteEntry) {
|
||||||
|
throw new IOException("No relevant entries found");
|
||||||
}
|
}
|
||||||
|
|
||||||
SlopTablePacker.packToSlopZip(tempDir, tempFile);
|
SlopTablePacker.packToSlopZip(tempDir, tempFile);
|
||||||
|
@@ -180,7 +180,7 @@ public class UrlDetails implements Comparable<UrlDetails> {
|
|||||||
* semantically meaningful codepoints into entity codes */
|
* semantically meaningful codepoints into entity codes */
|
||||||
public String displayUrl() {
|
public String displayUrl() {
|
||||||
StringBuilder sb = new StringBuilder();
|
StringBuilder sb = new StringBuilder();
|
||||||
String urlStr = url.toString();
|
String urlStr = url.toDisplayString();
|
||||||
for (int i = 0; i < urlStr.length(); i++) {
|
for (int i = 0; i < urlStr.length(); i++) {
|
||||||
char c = urlStr.charAt(i);
|
char c = urlStr.charAt(i);
|
||||||
|
|
||||||
|
@@ -20,6 +20,6 @@ public class StatusModule extends AbstractModule {
|
|||||||
bind(String.class)
|
bind(String.class)
|
||||||
.annotatedWith(Names.named("searchEngineTestQuery"))
|
.annotatedWith(Names.named("searchEngineTestQuery"))
|
||||||
.toInstance(System.getProperty("status-service.public-query",
|
.toInstance(System.getProperty("status-service.public-query",
|
||||||
"https://search.marginalia.nu/search?query=plato&ref=marginalia-automatic-metrics"));
|
"https://marginalia-search.com/search?query=plato&ref=marginalia-automatic-metrics"));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -4,4 +4,6 @@
|
|||||||
2025-01-07: Deploy executor.
|
2025-01-07: Deploy executor.
|
||||||
2025-04-24: Deploy executor.
|
2025-04-24: Deploy executor.
|
||||||
2025-04-24: Deploy assistant.
|
2025-04-24: Deploy assistant.
|
||||||
2025-04-24: Deploy qs, search and api-services.
|
2025-05-04: Deploy qs, search and api-services.
|
||||||
|
2025-05-05: Deploy executor partition 4.
|
||||||
|
2025-05-05: Deploy control.
|
||||||
|
@@ -314,6 +314,13 @@ if __name__ == '__main__':
|
|||||||
deploy_tier=0,
|
deploy_tier=0,
|
||||||
groups={"all", "core"}
|
groups={"all", "core"}
|
||||||
),
|
),
|
||||||
|
'status': ServiceConfig(
|
||||||
|
gradle_target=':code:services-application:status-service:docker',
|
||||||
|
docker_name='status-service',
|
||||||
|
instances=None,
|
||||||
|
deploy_tier=4,
|
||||||
|
groups={"all"}
|
||||||
|
),
|
||||||
'query': ServiceConfig(
|
'query': ServiceConfig(
|
||||||
gradle_target=':code:services-core:query-service:docker',
|
gradle_target=':code:services-core:query-service:docker',
|
||||||
docker_name='query-service',
|
docker_name='query-service',
|
||||||
|
Reference in New Issue
Block a user