mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-10-05 21:22:39 +02:00
Compare commits
274 Commits
v24.10.0
...
deploy-005
Author | SHA1 | Date | |
---|---|---|---|
|
b245cc9f38 | ||
|
6614d05bdf | ||
|
55aeb03c4a | ||
|
faa589962f | ||
|
c7edd6b39f | ||
|
79da622e3b | ||
|
3da8337ba6 | ||
|
a32d230f0a | ||
|
3772bfd387 | ||
|
02a7900d1a | ||
|
a1fb92468f | ||
|
b7f0a2a98e | ||
|
5fb76b2e79 | ||
|
ad8c97f342 | ||
|
dc1b6373eb | ||
|
983d6d067c | ||
|
a84a06975c | ||
|
d2864c13ec | ||
|
03ba53ce51 | ||
|
d4a6684931 | ||
|
6f0485287a | ||
|
59e2dd4c26 | ||
|
ca1807caae | ||
|
26c20e18ac | ||
|
7c90b6b414 | ||
|
b63c54c4ce | ||
|
fecd2f4ec3 | ||
|
39e420de88 | ||
|
dc83619861 | ||
|
87d1c89701 | ||
|
a42a7769e2 | ||
|
202bda884f | ||
|
2315fdc731 | ||
|
b5469bd8a1 | ||
|
6a6318d04c | ||
|
55933f8d40 | ||
|
be6382e0d0 | ||
|
45e771f96b | ||
|
8dde502cc9 | ||
|
3e66767af3 | ||
|
9ec9d1b338 | ||
|
dcad0d7863 | ||
|
94e1aa0baf | ||
|
b62f043910 | ||
|
6ea22d0d21 | ||
|
8c69dc31b8 | ||
|
00734ea87f | ||
|
3009713db4 | ||
|
9b2ceaf37c | ||
|
8019c2ce18 | ||
|
a9e312b8b1 | ||
|
4da3563d8a | ||
|
48d0a3089a | ||
|
594df64b20 | ||
|
06efb5abfc | ||
|
78eb1417a7 | ||
|
8c8f2ad5ee | ||
|
f71e79d10f | ||
|
1b27c5cf06 | ||
|
67edc8f90d | ||
|
5f576b7d0c | ||
|
8b05c788fd | ||
|
236f033bc9 | ||
|
510fc75121 | ||
|
0376f2e6e3 | ||
|
0b65164f60 | ||
|
9be477de33 | ||
|
84f55b84ff | ||
|
ab5c30ad51 | ||
|
0c839453c5 | ||
|
5e4c5d03ae | ||
|
710af4999a | ||
|
a5b0a1ae62 | ||
|
e9f71ee39b | ||
|
baeb4a46cd | ||
|
5e2a8e9f27 | ||
|
cc1a5bdf90 | ||
|
7f7b1ffaba | ||
|
0ea8092350 | ||
|
483d29497e | ||
|
bae44497fe | ||
|
0d59202aca | ||
|
0ca43f0c9c | ||
|
3bc99639a0 | ||
|
927bc0b63c | ||
|
d968801dc1 | ||
|
89db69d360 | ||
|
895cee7004 | ||
|
4bb71b8439 | ||
|
e4a41f7dd1 | ||
|
69ad6287b1 | ||
|
81cdd6385d | ||
|
e76c42329f | ||
|
e6ef4734ea | ||
|
41a59dcf45 | ||
|
df4bc1d7e9 | ||
|
2b222efa75 | ||
|
94d4d2edb7 | ||
|
7ae19a92ba | ||
|
56d14e56d7 | ||
|
a557c7ae7f | ||
|
b66879ccb1 | ||
|
f1b7157ca2 | ||
|
7622335e84 | ||
|
0da2047eae | ||
|
5ee4321110 | ||
|
9459b9933b | ||
|
87fb564f89 | ||
|
5ca8523220 | ||
|
1118657ffd | ||
|
b1f970152d | ||
|
e1783891ab | ||
|
64d32471dd | ||
|
232cc465d9 | ||
|
8c963bd4ba | ||
|
6a079c1c75 | ||
|
2dc9f2e639 | ||
|
b66fb9caf6 | ||
|
6d18e6d840 | ||
|
2a3c63f209 | ||
|
9f70cecaef | ||
|
c08203e2ed | ||
|
86497fd32f | ||
|
3b998573fd | ||
|
e161882ec7 | ||
|
357f349e30 | ||
|
e4769f541d | ||
|
2a173e2861 | ||
|
a6a900266c | ||
|
bdba53f055 | ||
|
eb2fe18867 | ||
|
a7468c8d23 | ||
|
fb2beb1eac | ||
|
0fb03e3d62 | ||
|
67db3f295e | ||
|
dafaab3ef7 | ||
|
3f11ca409f | ||
|
694eed79ef | ||
|
4220169119 | ||
|
bbdde789e7 | ||
|
0a53ac68a0 | ||
|
eab61cd48a | ||
|
e65d75a0f9 | ||
|
3b99cffb3d | ||
|
a97c05107e | ||
|
5002870d1f | ||
|
73861e613f | ||
|
0ce2ba9ad9 | ||
|
3ddcebaa36 | ||
|
b91463383e | ||
|
7444a2f36c | ||
|
461bc3eb1a | ||
|
cf7f84f033 | ||
|
fdee07048d | ||
|
2fbf201761 | ||
|
4018e4c434 | ||
|
f3382b5bd8 | ||
|
9fc82574f0 | ||
|
589f4dafb9 | ||
|
c5d657ef98 | ||
|
3c2bb566da | ||
|
9287ee0141 | ||
|
2769c8f869 | ||
|
ddb66f33ba | ||
|
79500b8fbc | ||
|
187eea43a4 | ||
|
a89ed6fa9f | ||
|
e0c0ed27bc | ||
|
20abb91657 | ||
|
291ca8daf1 | ||
|
8d168be138 | ||
|
6e1aa7b391 | ||
|
deab9b9516 | ||
|
39d99a906a | ||
|
6f72e6e0d3 | ||
|
d786d79483 | ||
|
01510f6c2e | ||
|
7ba43e9e3f | ||
|
97bfcd1353 | ||
|
aa3c85c196 | ||
|
ee2d5496d0 | ||
|
5c858a2b94 | ||
|
fb75a3827d | ||
|
7d546d0e2a | ||
|
8fcb6ffd7a | ||
|
f97de0c15a | ||
|
be9e192b78 | ||
|
75ae1c9526 | ||
|
33761a0236 | ||
|
19b69b1764 | ||
|
8b804359a9 | ||
|
f050bf5c4c | ||
|
fdc3efa250 | ||
|
5fdd2c71f8 | ||
|
c97c66a41c | ||
|
7b64377fd6 | ||
|
e11ebf18e5 | ||
|
ba47d72bf4 | ||
|
52bc0272f8 | ||
|
d4bce13a03 | ||
|
b9842b57e0 | ||
|
95776e9bee | ||
|
077d8dcd11 | ||
|
9ec41e27c6 | ||
|
200743c84f | ||
|
6d7998e349 | ||
|
7d1ef08a0f | ||
|
ea6b148df2 | ||
|
3ec9c4c5fa | ||
|
0b6b5dab07 | ||
|
ff17473105 | ||
|
dc5f97e737 | ||
|
d919179ba3 | ||
|
f09669a5b0 | ||
|
b3b0f6fed3 | ||
|
88caca60f9 | ||
|
923ebbac81 | ||
|
df298df852 | ||
|
552b246099 | ||
|
80e6d0069c | ||
|
b941604135 | ||
|
52eb5bc84f | ||
|
4d23fe6261 | ||
|
14519294d2 | ||
|
51e46ad2b0 | ||
|
665c8831a3 | ||
|
47dfbacb00 | ||
|
f94911541a | ||
|
89d8af640d | ||
|
6e4252cf4c | ||
|
79ce4de2ab | ||
|
d6575dfee4 | ||
|
a91ab4c203 | ||
|
6a3079a167 | ||
|
c728a1e2f2 | ||
|
d874d76a09 | ||
|
70bc8831f5 | ||
|
41c11be075 | ||
|
163ce19846 | ||
|
9eb16cb667 | ||
|
af40fa327b | ||
|
cf6d28e71e | ||
|
3791ea1e18 | ||
|
34258b92d1 | ||
|
e5db3f11e1 | ||
|
9f47ce8d15 | ||
|
a5b4951f23 | ||
|
8b8bf0748f | ||
|
5cc71ae586 | ||
|
33fcfe4b63 | ||
|
a31a3b53c4 | ||
|
a456ec9599 | ||
|
a2bc9a98c0 | ||
|
e24a98390c | ||
|
6f858cd627 | ||
|
a293266ccd | ||
|
b8e0dc93d7 | ||
|
d774c39031 | ||
|
ab17af99da | ||
|
b0ac3c586f | ||
|
139fa85b18 | ||
|
bfeb9a4538 | ||
|
3d6c79ae5f | ||
|
c9e9f73ea9 | ||
|
80e482b155 | ||
|
9351593495 | ||
|
d74436f546 | ||
|
76e9053dd0 | ||
|
dbb8bcdd8e | ||
|
7305afa0f8 | ||
|
481f999b70 | ||
|
4b16022556 | ||
|
89dd201a7b | ||
|
ab486323f2 |
1
.github/FUNDING.yml
vendored
1
.github/FUNDING.yml
vendored
@@ -1,5 +1,6 @@
|
||||
# These are supported funding model platforms
|
||||
|
||||
polar: marginalia-search
|
||||
github: MarginaliaSearch
|
||||
patreon: marginalia_nu
|
||||
open_collective: # Replace with a single Open Collective username
|
||||
|
1
.gitignore
vendored
1
.gitignore
vendored
@@ -7,3 +7,4 @@ build/
|
||||
lombok.config
|
||||
Dockerfile
|
||||
run
|
||||
jte-classes
|
56
ROADMAP.md
56
ROADMAP.md
@@ -8,20 +8,10 @@ be implemented as well.
|
||||
Major goals:
|
||||
|
||||
* Reach 1 billion pages indexed
|
||||
* Improve technical ability of indexing and search. Although this area has improved a bit, the
|
||||
search engine is still not very good at dealing with longer queries.
|
||||
|
||||
## Proper Position Index (COMPLETED 2024-09)
|
||||
|
||||
The search engine uses a fixed width bit mask to indicate word positions. It has the benefit
|
||||
of being very fast to evaluate and works well for what it is, but is inaccurate and has the
|
||||
drawback of making support for quoted search terms inaccurate and largely reliant on indexing
|
||||
word n-grams known beforehand. This limits the ability to interpret longer queries.
|
||||
|
||||
The positions mask should be supplemented or replaced with a more accurate (e.g.) gamma coded positions
|
||||
list, as is the civilized way of doing this.
|
||||
|
||||
Completed with PR https://github.com/MarginaliaSearch/MarginaliaSearch/pull/99
|
||||
* Improve technical ability of indexing and search. ~~Although this area has improved a bit, the
|
||||
search engine is still not very good at dealing with longer queries.~~ (As of PR [#129](https://github.com/MarginaliaSearch/MarginaliaSearch/pull/129), this has improved significantly. There is still more work to be done )
|
||||
|
||||
## Hybridize crawler w/ Common Crawl data
|
||||
|
||||
@@ -37,10 +27,15 @@ Retaining the ability to independently crawl the web is still strongly desirable
|
||||
|
||||
## Safe Search
|
||||
|
||||
The search engine has a bit of a problem showing spicy content mixed in with the results. It would be desirable
|
||||
to have a way to filter this out. It's likely something like a URL blacklist (e.g. [UT1](https://dsi.ut-capitole.fr/blacklists/index_en.php) )
|
||||
The search engine has a bit of a problem showing spicy content mixed in with the results. It would be desirable to have a way to filter this out. It's likely something like a URL blacklist (e.g. [UT1](https://dsi.ut-capitole.fr/blacklists/index_en.php) )
|
||||
combined with naive bayesian filter would go a long way, or something more sophisticated...?
|
||||
|
||||
## Web Design Overhaul
|
||||
|
||||
The design is kinda clunky and hard to maintain, and needlessly outdated-looking.
|
||||
|
||||
In progress: PR [#127](https://github.com/MarginaliaSearch/MarginaliaSearch/pull/127) -- demo available at https://test.marginalia.nu/
|
||||
|
||||
## Additional Language Support
|
||||
|
||||
It would be desirable if the search engine supported more languages than English. This is partially about
|
||||
@@ -49,13 +44,6 @@ associated with each language added, at least a models file or two, as well as s
|
||||
|
||||
It would be very helpful to find a speaker of a large language other than English to help in the fine tuning.
|
||||
|
||||
## Finalize RSS support
|
||||
|
||||
Marginalia has experimental RSS preview support for a few domains. This works well and
|
||||
it should be extended to all domains. It would also be interesting to offer search of the
|
||||
RSS data itself, or use the RSS set to feed a special live index that updates faster than the
|
||||
main dataset.
|
||||
|
||||
## Support for binary formats like PDF
|
||||
|
||||
The crawler needs to be modified to retain them, and the conversion logic needs to parse them.
|
||||
@@ -72,5 +60,27 @@ This looks like a good idea that wouldn't just help clean up the search filters
|
||||
website, but might be cheap enough we might go as far as to offer a number of ad-hoc custom search
|
||||
filter for any API consumer.
|
||||
|
||||
I've talked to the stract dev and he does not think it's a good idea to mimic their optics language,
|
||||
which is quite ad-hoc, but instead to work together to find some new common description language for this.
|
||||
I've talked to the stract dev and he does not think it's a good idea to mimic their optics language, which is quite ad-hoc, but instead to work together to find some new common description language for this.
|
||||
|
||||
# Completed
|
||||
|
||||
## Proper Position Index (COMPLETED 2024-09)
|
||||
|
||||
The search engine uses a fixed width bit mask to indicate word positions. It has the benefit
|
||||
of being very fast to evaluate and works well for what it is, but is inaccurate and has the
|
||||
drawback of making support for quoted search terms inaccurate and largely reliant on indexing
|
||||
word n-grams known beforehand. This limits the ability to interpret longer queries.
|
||||
|
||||
The positions mask should be supplemented or replaced with a more accurate (e.g.) gamma coded positions
|
||||
list, as is the civilized way of doing this.
|
||||
|
||||
Completed with PR [#99](https://github.com/MarginaliaSearch/MarginaliaSearch/pull/99)
|
||||
|
||||
## Finalize RSS support (COMPLETED 2024-11)
|
||||
|
||||
Marginalia has experimental RSS preview support for a few domains. This works well and
|
||||
it should be extended to all domains. It would also be interesting to offer search of the
|
||||
RSS data itself, or use the RSS set to feed a special live index that updates faster than the
|
||||
main dataset.
|
||||
|
||||
Completed with PR [#122](https://github.com/MarginaliaSearch/MarginaliaSearch/pull/122) and PR [#125](https://github.com/MarginaliaSearch/MarginaliaSearch/pull/125)
|
||||
|
@@ -1,7 +1,6 @@
|
||||
plugins {
|
||||
id 'java'
|
||||
id("org.jetbrains.gradle.plugin.idea-ext") version "1.0"
|
||||
id "io.freefair.lombok" version "8.3"
|
||||
id "me.champeau.jmh" version "0.6.6"
|
||||
|
||||
// This is a workaround for a bug in the Jib plugin that causes it to stall randomly
|
||||
@@ -44,11 +43,12 @@ subprojects.forEach {it ->
|
||||
}
|
||||
|
||||
ext {
|
||||
jvmVersion=22
|
||||
dockerImageBase='container-registry.oracle.com/graalvm/jdk:22'
|
||||
jvmVersion=23
|
||||
dockerImageBase='container-registry.oracle.com/graalvm/jdk:23'
|
||||
dockerImageTag='latest'
|
||||
dockerImageRegistry='marginalia'
|
||||
jibVersion = '3.4.3'
|
||||
|
||||
}
|
||||
|
||||
idea {
|
||||
|
@@ -1,17 +1,13 @@
|
||||
package nu.marginalia;
|
||||
|
||||
import lombok.Builder;
|
||||
|
||||
import java.nio.file.Path;
|
||||
|
||||
@Builder
|
||||
public class LanguageModels {
|
||||
public final Path termFrequencies;
|
||||
|
||||
public final Path openNLPSentenceDetectionData;
|
||||
public final Path posRules;
|
||||
public final Path posDict;
|
||||
public final Path openNLPTokenData;
|
||||
public final Path fasttextLanguageModel;
|
||||
public final Path segments;
|
||||
|
||||
@@ -19,15 +15,67 @@ public class LanguageModels {
|
||||
Path openNLPSentenceDetectionData,
|
||||
Path posRules,
|
||||
Path posDict,
|
||||
Path openNLPTokenData,
|
||||
Path fasttextLanguageModel,
|
||||
Path segments) {
|
||||
this.termFrequencies = termFrequencies;
|
||||
this.openNLPSentenceDetectionData = openNLPSentenceDetectionData;
|
||||
this.posRules = posRules;
|
||||
this.posDict = posDict;
|
||||
this.openNLPTokenData = openNLPTokenData;
|
||||
this.fasttextLanguageModel = fasttextLanguageModel;
|
||||
this.segments = segments;
|
||||
}
|
||||
|
||||
public static LanguageModelsBuilder builder() {
|
||||
return new LanguageModelsBuilder();
|
||||
}
|
||||
|
||||
public static class LanguageModelsBuilder {
|
||||
private Path termFrequencies;
|
||||
private Path openNLPSentenceDetectionData;
|
||||
private Path posRules;
|
||||
private Path posDict;
|
||||
private Path fasttextLanguageModel;
|
||||
private Path segments;
|
||||
|
||||
LanguageModelsBuilder() {
|
||||
}
|
||||
|
||||
public LanguageModelsBuilder termFrequencies(Path termFrequencies) {
|
||||
this.termFrequencies = termFrequencies;
|
||||
return this;
|
||||
}
|
||||
|
||||
public LanguageModelsBuilder openNLPSentenceDetectionData(Path openNLPSentenceDetectionData) {
|
||||
this.openNLPSentenceDetectionData = openNLPSentenceDetectionData;
|
||||
return this;
|
||||
}
|
||||
|
||||
public LanguageModelsBuilder posRules(Path posRules) {
|
||||
this.posRules = posRules;
|
||||
return this;
|
||||
}
|
||||
|
||||
public LanguageModelsBuilder posDict(Path posDict) {
|
||||
this.posDict = posDict;
|
||||
return this;
|
||||
}
|
||||
|
||||
public LanguageModelsBuilder fasttextLanguageModel(Path fasttextLanguageModel) {
|
||||
this.fasttextLanguageModel = fasttextLanguageModel;
|
||||
return this;
|
||||
}
|
||||
|
||||
public LanguageModelsBuilder segments(Path segments) {
|
||||
this.segments = segments;
|
||||
return this;
|
||||
}
|
||||
|
||||
public LanguageModels build() {
|
||||
return new LanguageModels(this.termFrequencies, this.openNLPSentenceDetectionData, this.posRules, this.posDict, this.fasttextLanguageModel, this.segments);
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return "LanguageModels.LanguageModelsBuilder(termFrequencies=" + this.termFrequencies + ", openNLPSentenceDetectionData=" + this.openNLPSentenceDetectionData + ", posRules=" + this.posRules + ", posDict=" + this.posDict + ", fasttextLanguageModel=" + this.fasttextLanguageModel + ", segments=" + this.segments + ")";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -75,6 +75,10 @@ public class WmsaHome {
|
||||
return ret;
|
||||
}
|
||||
|
||||
public static Path getDataPath() {
|
||||
return getHomePath().resolve("data");
|
||||
}
|
||||
|
||||
public static Path getAdsDefinition() {
|
||||
return getHomePath().resolve("data").resolve("adblock.txt");
|
||||
}
|
||||
@@ -100,7 +104,6 @@ public class WmsaHome {
|
||||
home.resolve("model/opennlp-sentence.bin"),
|
||||
home.resolve("model/English.RDR"),
|
||||
home.resolve("model/English.DICT"),
|
||||
home.resolve("model/opennlp-tok.bin"),
|
||||
home.resolve("model/lid.176.ftz"),
|
||||
home.resolve("model/segments.bin")
|
||||
);
|
||||
|
@@ -3,6 +3,7 @@ package nu.marginalia.nodecfg;
|
||||
import com.google.inject.Inject;
|
||||
import com.zaxxer.hikari.HikariDataSource;
|
||||
import nu.marginalia.nodecfg.model.NodeConfiguration;
|
||||
import nu.marginalia.nodecfg.model.NodeProfile;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
@@ -20,10 +21,10 @@ public class NodeConfigurationService {
|
||||
this.dataSource = dataSource;
|
||||
}
|
||||
|
||||
public NodeConfiguration create(int id, String description, boolean acceptQueries, boolean keepWarcs) throws SQLException {
|
||||
public NodeConfiguration create(int id, String description, boolean acceptQueries, boolean keepWarcs, NodeProfile nodeProfile) throws SQLException {
|
||||
try (var conn = dataSource.getConnection();
|
||||
var is = conn.prepareStatement("""
|
||||
INSERT IGNORE INTO NODE_CONFIGURATION(ID, DESCRIPTION, ACCEPT_QUERIES, KEEP_WARCS) VALUES(?, ?, ?, ?)
|
||||
INSERT IGNORE INTO NODE_CONFIGURATION(ID, DESCRIPTION, ACCEPT_QUERIES, KEEP_WARCS, NODE_PROFILE) VALUES(?, ?, ?, ?, ?)
|
||||
""")
|
||||
)
|
||||
{
|
||||
@@ -31,6 +32,7 @@ public class NodeConfigurationService {
|
||||
is.setString(2, description);
|
||||
is.setBoolean(3, acceptQueries);
|
||||
is.setBoolean(4, keepWarcs);
|
||||
is.setString(5, nodeProfile.name());
|
||||
|
||||
if (is.executeUpdate() <= 0) {
|
||||
throw new IllegalStateException("Failed to insert configuration");
|
||||
@@ -43,7 +45,7 @@ public class NodeConfigurationService {
|
||||
public List<NodeConfiguration> getAll() {
|
||||
try (var conn = dataSource.getConnection();
|
||||
var qs = conn.prepareStatement("""
|
||||
SELECT ID, DESCRIPTION, ACCEPT_QUERIES, AUTO_CLEAN, PRECESSION, KEEP_WARCS, DISABLED
|
||||
SELECT ID, DESCRIPTION, ACCEPT_QUERIES, AUTO_CLEAN, PRECESSION, KEEP_WARCS, NODE_PROFILE, DISABLED
|
||||
FROM NODE_CONFIGURATION
|
||||
""")) {
|
||||
var rs = qs.executeQuery();
|
||||
@@ -58,6 +60,7 @@ public class NodeConfigurationService {
|
||||
rs.getBoolean("AUTO_CLEAN"),
|
||||
rs.getBoolean("PRECESSION"),
|
||||
rs.getBoolean("KEEP_WARCS"),
|
||||
NodeProfile.valueOf(rs.getString("NODE_PROFILE")),
|
||||
rs.getBoolean("DISABLED")
|
||||
));
|
||||
}
|
||||
@@ -72,7 +75,7 @@ public class NodeConfigurationService {
|
||||
public NodeConfiguration get(int nodeId) throws SQLException {
|
||||
try (var conn = dataSource.getConnection();
|
||||
var qs = conn.prepareStatement("""
|
||||
SELECT ID, DESCRIPTION, ACCEPT_QUERIES, AUTO_CLEAN, PRECESSION, KEEP_WARCS, DISABLED
|
||||
SELECT ID, DESCRIPTION, ACCEPT_QUERIES, AUTO_CLEAN, PRECESSION, KEEP_WARCS, NODE_PROFILE, DISABLED
|
||||
FROM NODE_CONFIGURATION
|
||||
WHERE ID=?
|
||||
""")) {
|
||||
@@ -86,6 +89,7 @@ public class NodeConfigurationService {
|
||||
rs.getBoolean("AUTO_CLEAN"),
|
||||
rs.getBoolean("PRECESSION"),
|
||||
rs.getBoolean("KEEP_WARCS"),
|
||||
NodeProfile.valueOf(rs.getString("NODE_PROFILE")),
|
||||
rs.getBoolean("DISABLED")
|
||||
);
|
||||
}
|
||||
@@ -98,7 +102,7 @@ public class NodeConfigurationService {
|
||||
try (var conn = dataSource.getConnection();
|
||||
var us = conn.prepareStatement("""
|
||||
UPDATE NODE_CONFIGURATION
|
||||
SET DESCRIPTION=?, ACCEPT_QUERIES=?, AUTO_CLEAN=?, PRECESSION=?, KEEP_WARCS=?, DISABLED=?
|
||||
SET DESCRIPTION=?, ACCEPT_QUERIES=?, AUTO_CLEAN=?, PRECESSION=?, KEEP_WARCS=?, DISABLED=?, NODE_PROFILE=?
|
||||
WHERE ID=?
|
||||
"""))
|
||||
{
|
||||
@@ -108,7 +112,8 @@ public class NodeConfigurationService {
|
||||
us.setBoolean(4, config.includeInPrecession());
|
||||
us.setBoolean(5, config.keepWarcs());
|
||||
us.setBoolean(6, config.disabled());
|
||||
us.setInt(7, config.node());
|
||||
us.setString(7, config.profile().name());
|
||||
us.setInt(8, config.node());
|
||||
|
||||
if (us.executeUpdate() <= 0)
|
||||
throw new IllegalStateException("Failed to update configuration");
|
||||
|
@@ -6,6 +6,7 @@ public record NodeConfiguration(int node,
|
||||
boolean autoClean,
|
||||
boolean includeInPrecession,
|
||||
boolean keepWarcs,
|
||||
NodeProfile profile,
|
||||
boolean disabled
|
||||
)
|
||||
{
|
||||
|
@@ -0,0 +1,28 @@
|
||||
package nu.marginalia.nodecfg.model;
|
||||
|
||||
public enum NodeProfile {
|
||||
BATCH_CRAWL,
|
||||
REALTIME,
|
||||
MIXED,
|
||||
SIDELOAD;
|
||||
|
||||
public boolean isBatchCrawl() {
|
||||
return this == BATCH_CRAWL;
|
||||
}
|
||||
public boolean isRealtime() {
|
||||
return this == REALTIME;
|
||||
}
|
||||
public boolean isMixed() {
|
||||
return this == MIXED;
|
||||
}
|
||||
public boolean isSideload() {
|
||||
return this == SIDELOAD;
|
||||
}
|
||||
|
||||
public boolean permitBatchCrawl() {
|
||||
return isBatchCrawl() ||isMixed();
|
||||
}
|
||||
public boolean permitSideload() {
|
||||
return isMixed() || isSideload();
|
||||
}
|
||||
}
|
@@ -2,6 +2,7 @@ package nu.marginalia.nodecfg;
|
||||
|
||||
import com.zaxxer.hikari.HikariConfig;
|
||||
import com.zaxxer.hikari.HikariDataSource;
|
||||
import nu.marginalia.nodecfg.model.NodeProfile;
|
||||
import nu.marginalia.test.TestMigrationLoader;
|
||||
import org.junit.jupiter.api.BeforeAll;
|
||||
import org.junit.jupiter.api.Tag;
|
||||
@@ -46,8 +47,8 @@ public class NodeConfigurationServiceTest {
|
||||
|
||||
@Test
|
||||
public void test() throws SQLException {
|
||||
var a = nodeConfigurationService.create(1, "Test", false, false);
|
||||
var b = nodeConfigurationService.create(2, "Foo", true, false);
|
||||
var a = nodeConfigurationService.create(1, "Test", false, false, NodeProfile.MIXED);
|
||||
var b = nodeConfigurationService.create(2, "Foo", true, false, NodeProfile.MIXED);
|
||||
|
||||
assertEquals(1, a.node());
|
||||
assertEquals("Test", a.description());
|
||||
|
@@ -7,17 +7,19 @@ import com.google.common.util.concurrent.UncheckedExecutionException;
|
||||
import com.google.inject.Inject;
|
||||
import com.google.inject.Singleton;
|
||||
import com.zaxxer.hikari.HikariDataSource;
|
||||
import lombok.SneakyThrows;
|
||||
import nu.marginalia.model.EdgeDomain;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.util.NoSuchElementException;
|
||||
import java.util.Optional;
|
||||
import java.util.OptionalInt;
|
||||
import java.sql.SQLException;
|
||||
import java.util.*;
|
||||
import java.util.concurrent.ExecutionException;
|
||||
|
||||
@Singleton
|
||||
public class DbDomainQueries {
|
||||
private final HikariDataSource dataSource;
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(DbDomainQueries.class);
|
||||
private final Cache<EdgeDomain, Integer> domainIdCache = CacheBuilder.newBuilder().maximumSize(10_000).build();
|
||||
|
||||
@Inject
|
||||
@@ -27,8 +29,7 @@ public class DbDomainQueries {
|
||||
}
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
public Integer getDomainId(EdgeDomain domain) {
|
||||
public Integer getDomainId(EdgeDomain domain) throws NoSuchElementException {
|
||||
try (var connection = dataSource.getConnection()) {
|
||||
|
||||
return domainIdCache.get(domain, () -> {
|
||||
@@ -43,11 +44,16 @@ public class DbDomainQueries {
|
||||
});
|
||||
}
|
||||
catch (UncheckedExecutionException ex) {
|
||||
throw ex.getCause();
|
||||
throw new NoSuchElementException();
|
||||
}
|
||||
catch (ExecutionException ex) {
|
||||
throw new RuntimeException(ex.getCause());
|
||||
}
|
||||
catch (SQLException ex) {
|
||||
throw new RuntimeException(ex);
|
||||
}
|
||||
}
|
||||
|
||||
@SneakyThrows
|
||||
public OptionalInt tryGetDomainId(EdgeDomain domain) {
|
||||
|
||||
Integer maybeId = domainIdCache.getIfPresent(domain);
|
||||
@@ -70,11 +76,13 @@ public class DbDomainQueries {
|
||||
return OptionalInt.empty();
|
||||
}
|
||||
catch (UncheckedExecutionException ex) {
|
||||
return OptionalInt.empty();
|
||||
throw new RuntimeException(ex.getCause());
|
||||
}
|
||||
catch (SQLException ex) {
|
||||
throw new RuntimeException(ex);
|
||||
}
|
||||
}
|
||||
|
||||
@SneakyThrows
|
||||
public Optional<EdgeDomain> getDomain(int id) {
|
||||
try (var connection = dataSource.getConnection()) {
|
||||
|
||||
@@ -87,5 +95,41 @@ public class DbDomainQueries {
|
||||
return Optional.empty();
|
||||
}
|
||||
}
|
||||
catch (UncheckedExecutionException ex) {
|
||||
throw new RuntimeException(ex.getCause());
|
||||
}
|
||||
catch (SQLException ex) {
|
||||
throw new RuntimeException(ex);
|
||||
}
|
||||
}
|
||||
|
||||
public List<DomainWithNode> otherSubdomains(EdgeDomain domain, int cnt) {
|
||||
List<DomainWithNode> ret = new ArrayList<>();
|
||||
|
||||
try (var conn = dataSource.getConnection();
|
||||
var stmt = conn.prepareStatement("SELECT DOMAIN_NAME, NODE_AFFINITY FROM EC_DOMAIN WHERE DOMAIN_TOP = ? LIMIT ?")) {
|
||||
stmt.setString(1, domain.topDomain);
|
||||
stmt.setInt(2, cnt);
|
||||
|
||||
var rs = stmt.executeQuery();
|
||||
while (rs.next()) {
|
||||
var sibling = new EdgeDomain(rs.getString(1));
|
||||
|
||||
if (sibling.equals(domain))
|
||||
continue;
|
||||
|
||||
ret.add(new DomainWithNode(sibling, rs.getInt(2)));
|
||||
}
|
||||
} catch (SQLException e) {
|
||||
logger.error("Failed to get domain neighbors");
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
public record DomainWithNode (EdgeDomain domain, int nodeAffinity) {
|
||||
public boolean isIndexed() {
|
||||
return nodeAffinity > 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -2,7 +2,6 @@ package nu.marginalia.db;
|
||||
|
||||
import com.google.inject.Inject;
|
||||
import com.zaxxer.hikari.HikariDataSource;
|
||||
import lombok.With;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
@@ -115,23 +114,23 @@ public class DomainRankingSetsService {
|
||||
}
|
||||
}
|
||||
|
||||
/** Defines a domain ranking set, parameters for the ranking algorithms.
|
||||
/**
|
||||
* Defines a domain ranking set, parameters for the ranking algorithms.
|
||||
*
|
||||
* @param name Key and name of the set
|
||||
* @param name Key and name of the set
|
||||
* @param description Human-readable description
|
||||
* @param depth Depth of the algorithm
|
||||
* @param definition Definition of the set, typically a list of domains or globs for domain-names
|
||||
* */
|
||||
@With
|
||||
* @param depth Depth of the algorithm
|
||||
* @param definition Definition of the set, typically a list of domains or globs for domain-names
|
||||
*/
|
||||
public record DomainRankingSet(String name,
|
||||
String description,
|
||||
int depth,
|
||||
String definition)
|
||||
{
|
||||
String definition) {
|
||||
|
||||
public Path fileName(Path base) {
|
||||
return base.resolve(name().toLowerCase() + ".dat");
|
||||
}
|
||||
|
||||
public String[] domains() {
|
||||
return Arrays.stream(definition().split("\n+"))
|
||||
.map(String::trim)
|
||||
@@ -144,5 +143,20 @@ public class DomainRankingSetsService {
|
||||
return name().equals("BLOGS") || name().equals("NONE") || name().equals("RANK");
|
||||
}
|
||||
|
||||
public DomainRankingSet withName(String name) {
|
||||
return this.name == name ? this : new DomainRankingSet(name, description, depth, definition);
|
||||
}
|
||||
|
||||
public DomainRankingSet withDescription(String description) {
|
||||
return this.description == description ? this : new DomainRankingSet(name, description, depth, definition);
|
||||
}
|
||||
|
||||
public DomainRankingSet withDepth(int depth) {
|
||||
return this.depth == depth ? this : new DomainRankingSet(name, description, depth, definition);
|
||||
}
|
||||
|
||||
public DomainRankingSet withDefinition(String definition) {
|
||||
return this.definition == definition ? this : new DomainRankingSet(name, description, depth, definition);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -0,0 +1 @@
|
||||
ALTER TABLE WMSA_prod.NODE_CONFIGURATION ADD COLUMN NODE_PROFILE VARCHAR(255) DEFAULT 'MIXED';
|
@@ -1,15 +1,12 @@
|
||||
package nu.marginalia.model;
|
||||
|
||||
import lombok.*;
|
||||
|
||||
import javax.annotation.Nonnull;
|
||||
import java.io.Serializable;
|
||||
import java.util.Objects;
|
||||
import java.util.Optional;
|
||||
import java.util.function.Predicate;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
@AllArgsConstructor
|
||||
@Getter @Setter @Builder
|
||||
public class EdgeDomain implements Serializable {
|
||||
|
||||
@Nonnull
|
||||
@@ -17,7 +14,6 @@ public class EdgeDomain implements Serializable {
|
||||
@Nonnull
|
||||
public final String topDomain;
|
||||
|
||||
@SneakyThrows
|
||||
public EdgeDomain(String host) {
|
||||
Objects.requireNonNull(host, "domain name must not be null");
|
||||
|
||||
@@ -34,28 +30,23 @@ public class EdgeDomain implements Serializable {
|
||||
if (dot < 0 || looksLikeAnIp(host)) { // IPV6 >.>
|
||||
subDomain = "";
|
||||
topDomain = host;
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
int dot2 = host.substring(0, dot).lastIndexOf('.');
|
||||
if (dot2 < 0) {
|
||||
subDomain = "";
|
||||
topDomain = host;
|
||||
}
|
||||
else {
|
||||
if (looksLikeGovTld(host))
|
||||
{ // Capture .ac.jp, .co.uk
|
||||
} else {
|
||||
if (looksLikeGovTld(host)) { // Capture .ac.jp, .co.uk
|
||||
int dot3 = host.substring(0, dot2).lastIndexOf('.');
|
||||
if (dot3 >= 0) {
|
||||
dot2 = dot3;
|
||||
subDomain = host.substring(0, dot2);
|
||||
topDomain = host.substring(dot2 + 1);
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
subDomain = "";
|
||||
topDomain = host;
|
||||
}
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
subDomain = host.substring(0, dot2);
|
||||
topDomain = host.substring(dot2 + 1);
|
||||
}
|
||||
@@ -64,6 +55,12 @@ public class EdgeDomain implements Serializable {
|
||||
}
|
||||
|
||||
private static final Predicate<String> govListTest = Pattern.compile(".*\\.(id|ac|co|org|gov|edu|com)\\.[a-z]{2}").asMatchPredicate();
|
||||
|
||||
public EdgeDomain(@Nonnull String subDomain, @Nonnull String topDomain) {
|
||||
this.subDomain = subDomain;
|
||||
this.topDomain = topDomain;
|
||||
}
|
||||
|
||||
private boolean looksLikeGovTld(String host) {
|
||||
if (host.length() < 8)
|
||||
return false;
|
||||
@@ -91,11 +88,11 @@ public class EdgeDomain implements Serializable {
|
||||
}
|
||||
|
||||
|
||||
|
||||
public EdgeUrl toRootUrlHttp() {
|
||||
// Set default protocol to http, as most https websites redirect http->https, but few http websites redirect https->http
|
||||
return new EdgeUrl("http", this, null, "/", null);
|
||||
}
|
||||
|
||||
public EdgeUrl toRootUrlHttps() {
|
||||
return new EdgeUrl("https", this, null, "/", null);
|
||||
}
|
||||
@@ -125,8 +122,7 @@ public class EdgeDomain implements Serializable {
|
||||
int cutPoint = topDomain.indexOf('.');
|
||||
if (cutPoint < 0) {
|
||||
ret.append(topDomain);
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
ret.append(topDomain, 0, cutPoint);
|
||||
}
|
||||
|
||||
@@ -138,6 +134,18 @@ public class EdgeDomain implements Serializable {
|
||||
return ret.toString().toLowerCase();
|
||||
}
|
||||
|
||||
/** If possible, try to provide an alias domain,
|
||||
* i.e. a domain name that is very likely to link to this one
|
||||
* */
|
||||
public Optional<EdgeDomain> aliasDomain() {
|
||||
if (subDomain.equals("www")) {
|
||||
return Optional.of(new EdgeDomain("", topDomain));
|
||||
} else if (subDomain.isBlank()){
|
||||
return Optional.of(new EdgeDomain("www", topDomain));
|
||||
}
|
||||
else return Optional.empty();
|
||||
}
|
||||
|
||||
|
||||
public boolean hasSameTopDomain(EdgeDomain other) {
|
||||
if (other == null) return false;
|
||||
@@ -155,16 +163,14 @@ public class EdgeDomain implements Serializable {
|
||||
|
||||
if (govListTest.test(topDomain)) {
|
||||
dot = topDomain.indexOf('.', Math.max(0, length - ".edu.uk".length()));
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
dot = topDomain.lastIndexOf('.');
|
||||
}
|
||||
|
||||
|
||||
if (dot < 0 || dot == topDomain.length() - 1) {
|
||||
return "-";
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
return topDomain.substring(dot + 1);
|
||||
}
|
||||
}
|
||||
@@ -174,10 +180,10 @@ public class EdgeDomain implements Serializable {
|
||||
if (!(o instanceof EdgeDomain other)) return false;
|
||||
final String this$subDomain = this.getSubDomain();
|
||||
final String other$subDomain = other.getSubDomain();
|
||||
if (!Objects.equals(this$subDomain,other$subDomain)) return false;
|
||||
if (!Objects.equals(this$subDomain, other$subDomain)) return false;
|
||||
final String this$domain = this.getTopDomain();
|
||||
final String other$domain = other.getTopDomain();
|
||||
if (!Objects.equals(this$domain,other$domain)) return false;
|
||||
if (!Objects.equals(this$domain, other$domain)) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -191,4 +197,13 @@ public class EdgeDomain implements Serializable {
|
||||
return result;
|
||||
}
|
||||
|
||||
@Nonnull
|
||||
public String getSubDomain() {
|
||||
return this.subDomain;
|
||||
}
|
||||
|
||||
@Nonnull
|
||||
public String getTopDomain() {
|
||||
return this.topDomain;
|
||||
}
|
||||
}
|
||||
|
@@ -1,8 +1,5 @@
|
||||
package nu.marginalia.model;
|
||||
|
||||
import lombok.Builder;
|
||||
import lombok.Getter;
|
||||
import lombok.Setter;
|
||||
import nu.marginalia.util.QueryParams;
|
||||
|
||||
import javax.annotation.Nullable;
|
||||
@@ -15,7 +12,6 @@ import java.util.Objects;
|
||||
import java.util.Optional;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
@Getter @Setter @Builder
|
||||
public class EdgeUrl implements Serializable {
|
||||
public final String proto;
|
||||
public final EdgeDomain domain;
|
||||
@@ -38,9 +34,8 @@ public class EdgeUrl implements Serializable {
|
||||
private static URI parseURI(String url) throws URISyntaxException {
|
||||
try {
|
||||
return new URI(urlencodeFixer(url));
|
||||
}
|
||||
catch (URISyntaxException ex) {
|
||||
throw new URISyntaxException(STR."Failed to parse URI '\{url}'", ex.getMessage());
|
||||
} catch (URISyntaxException ex) {
|
||||
throw new URISyntaxException("Failed to parse URI '" + url + "'", ex.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -83,20 +78,17 @@ public class EdgeUrl implements Serializable {
|
||||
for (int i = pathIdx; i < end; i++) {
|
||||
int c = url.charAt(i);
|
||||
|
||||
if (goodChars.indexOf(c) >= 0 || (c >= 'A' && c <='Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9')) {
|
||||
if (goodChars.indexOf(c) >= 0 || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9')) {
|
||||
s.appendCodePoint(c);
|
||||
}
|
||||
else if (c == '%' && i+2<end) {
|
||||
int cn = url.charAt(i+1);
|
||||
int cnn = url.charAt(i+2);
|
||||
} else if (c == '%' && i + 2 < end) {
|
||||
int cn = url.charAt(i + 1);
|
||||
int cnn = url.charAt(i + 2);
|
||||
if (hexChars.indexOf(cn) >= 0 && hexChars.indexOf(cnn) >= 0) {
|
||||
s.appendCodePoint(c);
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
s.append("%25");
|
||||
}
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
s.append(String.format("%%%02X", c));
|
||||
}
|
||||
}
|
||||
@@ -109,7 +101,7 @@ public class EdgeUrl implements Serializable {
|
||||
if (colonIdx < 0 || colonIdx + 2 >= url.length()) {
|
||||
throw new URISyntaxException(url, "Lacking protocol");
|
||||
}
|
||||
return url.indexOf('/', colonIdx+2);
|
||||
return url.indexOf('/', colonIdx + 2);
|
||||
}
|
||||
|
||||
public EdgeUrl(URI URI) {
|
||||
@@ -125,8 +117,7 @@ public class EdgeUrl implements Serializable {
|
||||
this.proto = URI.getScheme().toLowerCase();
|
||||
this.port = port(URI.getPort(), proto);
|
||||
this.param = QueryParams.queryParamsSanitizer(this.path, URI.getQuery());
|
||||
}
|
||||
catch (Exception ex) {
|
||||
} catch (Exception ex) {
|
||||
System.err.println("Failed to parse " + URI);
|
||||
throw ex;
|
||||
}
|
||||
@@ -145,8 +136,7 @@ public class EdgeUrl implements Serializable {
|
||||
this.proto = URL.getProtocol().toLowerCase();
|
||||
this.port = port(URL.getPort(), proto);
|
||||
this.param = QueryParams.queryParamsSanitizer(this.path, URL.getQuery());
|
||||
}
|
||||
catch (Exception ex) {
|
||||
} catch (Exception ex) {
|
||||
System.err.println("Failed to parse " + URL);
|
||||
throw ex;
|
||||
}
|
||||
@@ -158,8 +148,7 @@ public class EdgeUrl implements Serializable {
|
||||
}
|
||||
if (protocol.equals("http") && port == 80) {
|
||||
return null;
|
||||
}
|
||||
else if (protocol.equals("https") && port == 443) {
|
||||
} else if (protocol.equals("https") && port == 443) {
|
||||
return null;
|
||||
}
|
||||
return port;
|
||||
@@ -190,12 +179,13 @@ public class EdgeUrl implements Serializable {
|
||||
public String dir() {
|
||||
return path.replaceAll("/[^/]+$", "/");
|
||||
}
|
||||
|
||||
public String fileName() {
|
||||
return path.replaceAll(".*/", "");
|
||||
}
|
||||
|
||||
public int depth() {
|
||||
return (int) path.chars().filter(c -> c=='/').count();
|
||||
return (int) path.chars().filter(c -> c == '/').count();
|
||||
}
|
||||
|
||||
public EdgeUrl withPathAndParam(String path, String param) {
|
||||
@@ -207,8 +197,8 @@ public class EdgeUrl implements Serializable {
|
||||
if (other == this) return true;
|
||||
if (other instanceof EdgeUrl e) {
|
||||
return Objects.equals(e.domain, domain)
|
||||
&& Objects.equals(e.path, path)
|
||||
&& Objects.equals(e.param, param);
|
||||
&& Objects.equals(e.path, path)
|
||||
&& Objects.equals(e.param, param);
|
||||
}
|
||||
|
||||
return true;
|
||||
@@ -235,8 +225,7 @@ public class EdgeUrl implements Serializable {
|
||||
public URL asURL() throws MalformedURLException {
|
||||
try {
|
||||
return asURI().toURL();
|
||||
}
|
||||
catch (URISyntaxException e) {
|
||||
} catch (URISyntaxException e) {
|
||||
throw new MalformedURLException(e.getMessage());
|
||||
}
|
||||
}
|
||||
@@ -248,4 +237,13 @@ public class EdgeUrl implements Serializable {
|
||||
|
||||
return new URI(this.proto, this.domain.toString(), this.path, this.param, null);
|
||||
}
|
||||
|
||||
public EdgeDomain getDomain() {
|
||||
return this.domain;
|
||||
}
|
||||
|
||||
public String getProto() {
|
||||
return this.proto;
|
||||
}
|
||||
|
||||
}
|
||||
|
@@ -16,6 +16,9 @@ public enum HtmlFeature {
|
||||
KEBAB_CASE_URL("special:kcurl"), // https://www.example.com/urls-that-look-like-this/
|
||||
LONG_URL("special:longurl"),
|
||||
|
||||
CLOUDFLARE_FEATURE("special:cloudflare"),
|
||||
CDN_FEATURE("special:cdn"),
|
||||
|
||||
VIEWPORT("special:viewport"),
|
||||
|
||||
COOKIES("special:cookies"),
|
||||
@@ -60,6 +63,8 @@ public enum HtmlFeature {
|
||||
DOFOLLOW_LINK("special:dofollow"),
|
||||
APPLE_TOUCH_ICON("special:appleicon"),
|
||||
|
||||
S3_FEATURE("special:s3"),
|
||||
|
||||
UNKNOWN("special:uncategorized");
|
||||
|
||||
|
||||
|
@@ -83,6 +83,11 @@ public class QueryParams {
|
||||
if (path.endsWith("StoryView.py")) { // folklore.org is neat
|
||||
return param.startsWith("project=") || param.startsWith("story=");
|
||||
}
|
||||
|
||||
// www.perseus.tufts.edu:
|
||||
if (param.startsWith("collection=")) return true;
|
||||
if (param.startsWith("doc=")) return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
@@ -1,34 +0,0 @@
|
||||
plugins {
|
||||
id 'java'
|
||||
|
||||
id 'jvm-test-suite'
|
||||
}
|
||||
|
||||
java {
|
||||
toolchain {
|
||||
languageVersion.set(JavaLanguageVersion.of(rootProject.ext.jvmVersion))
|
||||
}
|
||||
}
|
||||
|
||||
apply from: "$rootProject.projectDir/srcsets.gradle"
|
||||
|
||||
dependencies {
|
||||
implementation libs.notnull
|
||||
|
||||
implementation libs.bundles.slf4j
|
||||
testImplementation libs.bundles.slf4j.test
|
||||
|
||||
implementation libs.guava
|
||||
implementation libs.guava
|
||||
implementation dependencies.create(libs.guice.get()) {
|
||||
exclude group: 'com.google.guava'
|
||||
}
|
||||
implementation libs.bundles.mariadb
|
||||
implementation libs.commons.lang3
|
||||
|
||||
implementation libs.snakeyaml
|
||||
|
||||
testImplementation libs.bundles.slf4j.test
|
||||
testImplementation libs.bundles.junit
|
||||
testImplementation libs.mockito
|
||||
}
|
@@ -1,7 +0,0 @@
|
||||
package nu.marginalia.process.control;
|
||||
|
||||
public interface ProcessAdHocTaskHeartbeat extends AutoCloseable {
|
||||
void progress(String step, int progress, int total);
|
||||
|
||||
void close();
|
||||
}
|
@@ -1,52 +0,0 @@
|
||||
package nu.marginalia.process.log;
|
||||
|
||||
import lombok.SneakyThrows;
|
||||
import org.jetbrains.annotations.NotNull;
|
||||
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.Iterator;
|
||||
import java.util.Optional;
|
||||
import java.util.function.Function;
|
||||
|
||||
class WorkLoadIterable<T> implements Iterable<T> {
|
||||
|
||||
private final Path logFile;
|
||||
private final Function<WorkLogEntry, Optional<T>> mapper;
|
||||
|
||||
WorkLoadIterable(Path logFile, Function<WorkLogEntry, Optional<T>> mapper) {
|
||||
this.logFile = logFile;
|
||||
this.mapper = mapper;
|
||||
}
|
||||
|
||||
@NotNull
|
||||
@Override
|
||||
@SneakyThrows
|
||||
public Iterator<T> iterator() {
|
||||
var stream = Files.lines(logFile);
|
||||
return new Iterator<>() {
|
||||
final Iterator<T> iter = stream
|
||||
.filter(WorkLogEntry::isJobId)
|
||||
.map(WorkLogEntry::parse)
|
||||
.map(mapper)
|
||||
.filter(Optional::isPresent)
|
||||
.map(Optional::get)
|
||||
.iterator();
|
||||
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
if (iter.hasNext()) {
|
||||
return true;
|
||||
} else {
|
||||
stream.close();
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public T next() {
|
||||
return iter.next();
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
@@ -1,4 +0,0 @@
|
||||
# Process
|
||||
|
||||
Basic functionality for a Process. Processes must include this dependency to ensure
|
||||
their loggers are configured properly!
|
@@ -1,9 +0,0 @@
|
||||
log4j2.isThreadContextMapInheritable=true
|
||||
status = info
|
||||
appender.console.type = Console
|
||||
appender.console.name = LogToConsole
|
||||
appender.console.layout.type = PatternLayout
|
||||
appender.console.layout.pattern = %highlight{%-5level}{FATAL=red, ERROR=red, WARN=yellow} %c{1}- %msg{nolookups}%n
|
||||
appender.console.filter.http.type = MarkerFilter
|
||||
rootLogger.level = info
|
||||
rootLogger.appenderRef.console.ref = LogToConsole
|
@@ -4,12 +4,12 @@ import com.github.jknack.handlebars.*;
|
||||
import com.github.jknack.handlebars.helper.ConditionalHelpers;
|
||||
import com.github.jknack.handlebars.io.ClassPathTemplateLoader;
|
||||
import com.github.jknack.handlebars.io.TemplateLoader;
|
||||
import lombok.SneakyThrows;
|
||||
import nu.marginalia.renderer.config.HandlebarsConfigurator;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.io.*;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
@@ -42,22 +42,35 @@ public class MustacheRenderer<T> {
|
||||
}
|
||||
}
|
||||
|
||||
@SneakyThrows
|
||||
public String render(T model) {
|
||||
return template.apply(model);
|
||||
try {
|
||||
return template.apply(model);
|
||||
}
|
||||
catch (IOException ex) {
|
||||
throw new RuntimeException("Failed to render template", ex);
|
||||
}
|
||||
}
|
||||
|
||||
@SneakyThrows
|
||||
public <T2> String render(T model, String name, List<T2> children) {
|
||||
Context ctx = Context.newBuilder(model).combine(name, children).build();
|
||||
|
||||
return template.apply(ctx);
|
||||
try {
|
||||
return template.apply(ctx);
|
||||
}
|
||||
catch (IOException ex) {
|
||||
throw new RuntimeException("Failed to render template", ex);
|
||||
}
|
||||
}
|
||||
|
||||
@SneakyThrows
|
||||
public String render(T model, Map<String, ?> children) {
|
||||
Context ctx = Context.newBuilder(model).combine(children).build();
|
||||
return template.apply(ctx);
|
||||
|
||||
try {
|
||||
return template.apply(ctx);
|
||||
}
|
||||
catch (IOException ex) {
|
||||
throw new RuntimeException("Failed to render template", ex);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
@@ -42,6 +42,12 @@ dependencies {
|
||||
implementation libs.bundles.curator
|
||||
implementation libs.bundles.flyway
|
||||
|
||||
libs.bundles.jooby.get().each {
|
||||
implementation dependencies.create(it) {
|
||||
exclude group: 'org.slf4j'
|
||||
}
|
||||
}
|
||||
|
||||
testImplementation libs.bundles.slf4j.test
|
||||
implementation libs.bundles.mariadb
|
||||
|
||||
|
@@ -1,4 +1,4 @@
|
||||
package nu.marginalia;
|
||||
package nu.marginalia.process;
|
||||
|
||||
import java.util.UUID;
|
||||
|
@@ -1,4 +1,4 @@
|
||||
package nu.marginalia;
|
||||
package nu.marginalia.process;
|
||||
|
||||
import com.google.inject.AbstractModule;
|
||||
import com.google.inject.name.Names;
|
@@ -0,0 +1,102 @@
|
||||
package nu.marginalia.process;
|
||||
|
||||
import com.google.gson.Gson;
|
||||
import nu.marginalia.mq.MessageQueueFactory;
|
||||
import nu.marginalia.mq.MqMessage;
|
||||
import nu.marginalia.mq.inbox.MqInboxResponse;
|
||||
import nu.marginalia.mq.inbox.MqSingleShotInbox;
|
||||
import nu.marginalia.service.ConfigLoader;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.sql.SQLException;
|
||||
import java.util.Optional;
|
||||
import java.util.UUID;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
public abstract class ProcessMainClass {
|
||||
private static final Logger logger = LoggerFactory.getLogger(ProcessMainClass.class);
|
||||
|
||||
private final MessageQueueFactory messageQueueFactory;
|
||||
private final int node;
|
||||
private final String inboxName;
|
||||
|
||||
static {
|
||||
// Load global config ASAP
|
||||
ConfigLoader.loadConfig(
|
||||
ConfigLoader.getConfigPath("system")
|
||||
);
|
||||
}
|
||||
|
||||
private final Gson gson;
|
||||
|
||||
public ProcessMainClass(MessageQueueFactory messageQueueFactory,
|
||||
ProcessConfiguration config,
|
||||
Gson gson,
|
||||
String inboxName
|
||||
) {
|
||||
this.gson = gson;
|
||||
new org.mariadb.jdbc.Driver();
|
||||
this.messageQueueFactory = messageQueueFactory;
|
||||
this.node = config.node();
|
||||
this.inboxName = inboxName;
|
||||
}
|
||||
|
||||
|
||||
protected <T> Instructions<T> fetchInstructions(Class<T> requestType) throws Exception {
|
||||
|
||||
var inbox = messageQueueFactory.createSingleShotInbox(inboxName, node, UUID.randomUUID());
|
||||
|
||||
logger.info("Waiting for instructions");
|
||||
|
||||
var msgOpt = getMessage(inbox, requestType.getSimpleName());
|
||||
var msg = msgOpt.orElseThrow(() -> new RuntimeException("No message received"));
|
||||
|
||||
// for live crawl, request is empty for now
|
||||
T request = gson.fromJson(msg.payload(), requestType);
|
||||
|
||||
return new Instructions<>(msg, inbox, request);
|
||||
}
|
||||
|
||||
|
||||
private Optional<MqMessage> getMessage(MqSingleShotInbox inbox, String expectedFunction) throws InterruptedException, SQLException {
|
||||
var opt = inbox.waitForMessage(30, TimeUnit.SECONDS);
|
||||
if (opt.isPresent()) {
|
||||
if (!opt.get().function().equals(expectedFunction)) {
|
||||
throw new RuntimeException("Unexpected function: " + opt.get().function());
|
||||
}
|
||||
return opt;
|
||||
}
|
||||
else {
|
||||
var stolenMessage = inbox.stealMessage(msg -> msg.function().equals(expectedFunction));
|
||||
stolenMessage.ifPresent(mqMessage -> logger.info("Stole message {}", mqMessage));
|
||||
return stolenMessage;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
protected static class Instructions<T> {
|
||||
private final MqMessage message;
|
||||
private final MqSingleShotInbox inbox;
|
||||
private final T value;
|
||||
Instructions(MqMessage message, MqSingleShotInbox inbox, T value)
|
||||
{
|
||||
this.message = message;
|
||||
this.inbox = inbox;
|
||||
this.value = value;
|
||||
}
|
||||
|
||||
public T value() {
|
||||
return value;
|
||||
}
|
||||
|
||||
public void ok() {
|
||||
inbox.sendResponse(message, MqInboxResponse.ok());
|
||||
}
|
||||
public void err() {
|
||||
inbox.sendResponse(message, MqInboxResponse.err());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
@@ -3,6 +3,8 @@ package nu.marginalia.process.control;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.util.Collection;
|
||||
|
||||
/** Dummy implementation of ProcessHeartbeat that does nothing */
|
||||
public class FakeProcessHeartbeat implements ProcessHeartbeat {
|
||||
private static final Logger logger = LoggerFactory.getLogger(FakeProcessHeartbeat.class);
|
||||
@@ -30,6 +32,11 @@ public class FakeProcessHeartbeat implements ProcessHeartbeat {
|
||||
logger.info("Progress: {}, {}/{}", step, progress, total);
|
||||
}
|
||||
|
||||
@Override
|
||||
public <T> Iterable<T> wrap(String step, Collection<T> collection) {
|
||||
return collection;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {}
|
||||
};
|
@@ -0,0 +1,12 @@
|
||||
package nu.marginalia.process.control;
|
||||
|
||||
import java.util.Collection;
|
||||
|
||||
public interface ProcessAdHocTaskHeartbeat extends AutoCloseable {
|
||||
void progress(String step, int progress, int total);
|
||||
|
||||
/** Wrap a collection to provide heartbeat progress updates as it's iterated through */
|
||||
<T> Iterable<T> wrap(String step, Collection<T> collection);
|
||||
|
||||
void close();
|
||||
}
|
@@ -2,11 +2,13 @@ package nu.marginalia.process.control;
|
||||
|
||||
|
||||
import com.zaxxer.hikari.HikariDataSource;
|
||||
import nu.marginalia.ProcessConfiguration;
|
||||
import nu.marginalia.process.ProcessConfiguration;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.sql.SQLException;
|
||||
import java.util.Collection;
|
||||
import java.util.Iterator;
|
||||
import java.util.UUID;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
@@ -69,6 +71,35 @@ public class ProcessAdHocTaskHeartbeatImpl implements AutoCloseable, ProcessAdHo
|
||||
logger.info("ProcessTask {} progress: {}%", taskBase, progress);
|
||||
}
|
||||
|
||||
/** Wrap a collection to provide heartbeat progress updates as it's iterated through */
|
||||
@Override
|
||||
public <T> Iterable<T> wrap(String step, Collection<T> collection) {
|
||||
return () -> new Iterator<>() {
|
||||
private final Iterator<T> base = collection.iterator();
|
||||
private final int size = collection.size();
|
||||
private final int updateInterval = Math.max(1, size / 100); // update every 1% of the collection, or at least once
|
||||
private int pos = 0;
|
||||
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
boolean ret = base.hasNext();
|
||||
if (!ret) {
|
||||
progress(step, size, size);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
@Override
|
||||
public T next() {
|
||||
// update every 1% of the collection, to avoid hammering the database with updates
|
||||
if (pos++ % updateInterval == 0) {
|
||||
progress(step, pos, size);
|
||||
}
|
||||
return base.next();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
public void shutDown() {
|
||||
if (!running)
|
||||
return;
|
||||
@@ -185,6 +216,5 @@ public class ProcessAdHocTaskHeartbeatImpl implements AutoCloseable, ProcessAdHo
|
||||
public void close() {
|
||||
shutDown();
|
||||
}
|
||||
|
||||
}
|
||||
|
@@ -4,17 +4,18 @@ package nu.marginalia.process.control;
|
||||
import com.google.inject.Inject;
|
||||
import com.google.inject.Singleton;
|
||||
import com.zaxxer.hikari.HikariDataSource;
|
||||
import nu.marginalia.ProcessConfiguration;
|
||||
import nu.marginalia.process.ProcessConfiguration;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.io.Closeable;
|
||||
import java.sql.SQLException;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
/** This service sends a heartbeat to the database every 5 seconds.
|
||||
*/
|
||||
@Singleton
|
||||
public class ProcessHeartbeatImpl implements ProcessHeartbeat {
|
||||
public class ProcessHeartbeatImpl implements ProcessHeartbeat, Closeable {
|
||||
private final Logger logger = LoggerFactory.getLogger(ProcessHeartbeatImpl.class);
|
||||
private final String processName;
|
||||
private final String processBase;
|
||||
@@ -169,5 +170,9 @@ public class ProcessHeartbeatImpl implements ProcessHeartbeat {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void close() {
|
||||
shutDown();
|
||||
}
|
||||
}
|
||||
|
@@ -2,7 +2,7 @@ package nu.marginalia.process.control;
|
||||
|
||||
|
||||
import com.zaxxer.hikari.HikariDataSource;
|
||||
import nu.marginalia.ProcessConfiguration;
|
||||
import nu.marginalia.process.ProcessConfiguration;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
@@ -0,0 +1,56 @@
|
||||
package nu.marginalia.process.log;
|
||||
|
||||
import org.jetbrains.annotations.NotNull;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.Iterator;
|
||||
import java.util.Optional;
|
||||
import java.util.function.Function;
|
||||
|
||||
class WorkLoadIterable<T> implements Iterable<T> {
|
||||
|
||||
private final Path logFile;
|
||||
private final Function<WorkLogEntry, Optional<T>> mapper;
|
||||
|
||||
WorkLoadIterable(Path logFile, Function<WorkLogEntry, Optional<T>> mapper) {
|
||||
this.logFile = logFile;
|
||||
this.mapper = mapper;
|
||||
}
|
||||
|
||||
@NotNull
|
||||
@Override
|
||||
public Iterator<T> iterator() {
|
||||
try {
|
||||
var stream = Files.lines(logFile);
|
||||
return new Iterator<>() {
|
||||
final Iterator<T> iter = stream
|
||||
.filter(WorkLogEntry::isJobId)
|
||||
.map(WorkLogEntry::parse)
|
||||
.map(mapper)
|
||||
.filter(Optional::isPresent)
|
||||
.map(Optional::get)
|
||||
.iterator();
|
||||
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
if (iter.hasNext()) {
|
||||
return true;
|
||||
} else {
|
||||
stream.close();
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public T next() {
|
||||
return iter.next();
|
||||
}
|
||||
};
|
||||
}
|
||||
catch (IOException ex) {
|
||||
throw new RuntimeException(ex);
|
||||
}
|
||||
}
|
||||
}
|
@@ -33,6 +33,6 @@ public record WorkLogEntry(String id, String ts, String path, int cnt) {
|
||||
|
||||
String relPath = fileName();
|
||||
|
||||
return STR."\{relPath.substring(0, 2)}/\{relPath.substring(2, 4)}/\{relPath}";
|
||||
return relPath.substring(0, 2) + "/" + relPath.substring(2, 4) + "/" + relPath;
|
||||
}
|
||||
}
|
@@ -9,11 +9,11 @@ import java.util.Properties;
|
||||
|
||||
public class ConfigLoader {
|
||||
|
||||
static Path getConfigPath(String configName) {
|
||||
public static Path getConfigPath(String configName) {
|
||||
return WmsaHome.getHomePath().resolve("conf/properties/" + configName + ".properties");
|
||||
}
|
||||
|
||||
static void loadConfig(Path configPath) {
|
||||
public static void loadConfig(Path configPath) {
|
||||
if (!Files.exists(configPath)) {
|
||||
System.err.println("No config file found at " + configPath);
|
||||
return;
|
||||
|
@@ -2,7 +2,6 @@ package nu.marginalia.service;
|
||||
|
||||
import com.google.inject.Inject;
|
||||
import com.zaxxer.hikari.HikariDataSource;
|
||||
import lombok.SneakyThrows;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
@@ -26,7 +25,6 @@ public class NodeConfigurationWatcher {
|
||||
watcherThread.start();
|
||||
}
|
||||
|
||||
@SneakyThrows
|
||||
private void pollConfiguration() {
|
||||
for (;;) {
|
||||
List<Integer> goodNodes = new ArrayList<>();
|
||||
@@ -34,7 +32,7 @@ public class NodeConfigurationWatcher {
|
||||
try (var conn = dataSource.getConnection()) {
|
||||
var stmt = conn.prepareStatement("""
|
||||
SELECT ID FROM NODE_CONFIGURATION
|
||||
WHERE ACCEPT_QUERIES AND NOT DISABLED
|
||||
WHERE ACCEPT_QUERIES AND NOT DISABLED
|
||||
""");
|
||||
var rs = stmt.executeQuery();
|
||||
while (rs.next()) {
|
||||
@@ -47,7 +45,12 @@ public class NodeConfigurationWatcher {
|
||||
|
||||
queryNodes = goodNodes;
|
||||
|
||||
TimeUnit.SECONDS.sleep(10);
|
||||
try {
|
||||
TimeUnit.SECONDS.sleep(10);
|
||||
}
|
||||
catch (InterruptedException ex) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -1,20 +0,0 @@
|
||||
package nu.marginalia.service;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
public abstract class ProcessMainClass {
|
||||
private static final Logger logger = LoggerFactory.getLogger(ProcessMainClass.class);
|
||||
|
||||
static {
|
||||
// Load global config ASAP
|
||||
ConfigLoader.loadConfig(
|
||||
ConfigLoader.getConfigPath("system")
|
||||
);
|
||||
}
|
||||
|
||||
public ProcessMainClass() {
|
||||
new org.mariadb.jdbc.Driver();
|
||||
}
|
||||
|
||||
}
|
@@ -12,7 +12,11 @@ public enum ServiceId {
|
||||
Control("control-service"),
|
||||
|
||||
Dating("dating-service"),
|
||||
Explorer("explorer-service");
|
||||
Status("setatus-service"),
|
||||
Explorer("explorer-service"),
|
||||
|
||||
NOT_A_SERVICE("NOT_A_SERVICE")
|
||||
;
|
||||
|
||||
public final String serviceName;
|
||||
|
||||
|
@@ -4,13 +4,13 @@ import com.google.inject.Inject;
|
||||
import com.google.inject.Singleton;
|
||||
import io.grpc.ManagedChannel;
|
||||
import io.grpc.ManagedChannelBuilder;
|
||||
import nu.marginalia.util.NamedExecutorFactory;
|
||||
import nu.marginalia.service.NodeConfigurationWatcher;
|
||||
import nu.marginalia.service.discovery.ServiceRegistryIf;
|
||||
import nu.marginalia.service.discovery.property.PartitionTraits;
|
||||
import nu.marginalia.service.discovery.property.ServiceEndpoint.InstanceAddress;
|
||||
import nu.marginalia.service.discovery.property.ServiceKey;
|
||||
import nu.marginalia.service.discovery.property.ServicePartition;
|
||||
import nu.marginalia.util.NamedExecutorFactory;
|
||||
|
||||
import java.util.concurrent.Executor;
|
||||
import java.util.function.Function;
|
||||
@@ -48,7 +48,12 @@ public class GrpcChannelPoolFactory {
|
||||
public <STUB> GrpcSingleNodeChannelPool<STUB> createSingle(ServiceKey<? extends PartitionTraits.Unicast> key,
|
||||
Function<ManagedChannel, STUB> stubConstructor)
|
||||
{
|
||||
return new GrpcSingleNodeChannelPool<>(serviceRegistryIf, key, this::createChannel, stubConstructor);
|
||||
try {
|
||||
return new GrpcSingleNodeChannelPool<>(serviceRegistryIf, key, this::createChannel, stubConstructor);
|
||||
}
|
||||
catch (Exception e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
private ManagedChannel createChannel(InstanceAddress route) {
|
||||
|
@@ -1,18 +1,18 @@
|
||||
package nu.marginalia.service.client;
|
||||
|
||||
import io.grpc.ManagedChannel;
|
||||
import lombok.SneakyThrows;
|
||||
import nu.marginalia.service.NodeConfigurationWatcher;
|
||||
import nu.marginalia.service.discovery.ServiceRegistryIf;
|
||||
import nu.marginalia.service.discovery.property.PartitionTraits;
|
||||
import nu.marginalia.service.discovery.property.ServiceEndpoint;
|
||||
import nu.marginalia.service.discovery.property.ServiceKey;
|
||||
import nu.marginalia.service.discovery.property.ServicePartition;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.concurrent.*;
|
||||
import java.util.concurrent.CompletableFuture;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.util.concurrent.Executor;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.function.BiFunction;
|
||||
import java.util.function.Function;
|
||||
|
||||
@@ -22,14 +22,13 @@ import java.util.function.Function;
|
||||
public class GrpcMultiNodeChannelPool<STUB> {
|
||||
private final ConcurrentHashMap<Integer, GrpcSingleNodeChannelPool<STUB>> pools =
|
||||
new ConcurrentHashMap<>();
|
||||
private static final Logger logger = LoggerFactory.getLogger(GrpcMultiNodeChannelPool.class);
|
||||
|
||||
private final ServiceRegistryIf serviceRegistryIf;
|
||||
private final ServiceKey<? extends PartitionTraits.Multicast> serviceKey;
|
||||
private final Function<ServiceEndpoint.InstanceAddress, ManagedChannel> channelConstructor;
|
||||
private final Function<ManagedChannel, STUB> stubConstructor;
|
||||
private final NodeConfigurationWatcher nodeConfigurationWatcher;
|
||||
|
||||
@SneakyThrows
|
||||
public GrpcMultiNodeChannelPool(ServiceRegistryIf serviceRegistryIf,
|
||||
ServiceKey<ServicePartition.Multi> serviceKey,
|
||||
Function<ServiceEndpoint.InstanceAddress, ManagedChannel> channelConstructor,
|
||||
@@ -52,11 +51,16 @@ public class GrpcMultiNodeChannelPool<STUB> {
|
||||
}
|
||||
|
||||
private GrpcSingleNodeChannelPool<STUB> newSingleChannelPool(int node) {
|
||||
return new GrpcSingleNodeChannelPool<>(
|
||||
serviceRegistryIf,
|
||||
serviceKey.forPartition(ServicePartition.partition(node)),
|
||||
channelConstructor,
|
||||
stubConstructor);
|
||||
try {
|
||||
return new GrpcSingleNodeChannelPool<>(
|
||||
serviceRegistryIf,
|
||||
serviceKey.forPartition(ServicePartition.partition(node)),
|
||||
channelConstructor,
|
||||
stubConstructor);
|
||||
}
|
||||
catch (Exception e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
/** Get the list of nodes that are eligible for broadcast-style requests */
|
||||
|
@@ -2,7 +2,6 @@ package nu.marginalia.service.client;
|
||||
|
||||
import com.google.common.collect.Sets;
|
||||
import io.grpc.ManagedChannel;
|
||||
import lombok.SneakyThrows;
|
||||
import nu.marginalia.service.discovery.ServiceRegistryIf;
|
||||
import nu.marginalia.service.discovery.monitor.ServiceChangeMonitor;
|
||||
import nu.marginalia.service.discovery.property.PartitionTraits;
|
||||
@@ -11,10 +10,14 @@ import nu.marginalia.service.discovery.property.ServiceKey;
|
||||
import org.jetbrains.annotations.NotNull;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.slf4j.Marker;
|
||||
import org.slf4j.MarkerFactory;
|
||||
|
||||
import java.time.Duration;
|
||||
import java.util.*;
|
||||
import java.util.concurrent.*;
|
||||
import java.util.concurrent.CompletableFuture;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.util.concurrent.Executor;
|
||||
import java.util.concurrent.atomic.AtomicReference;
|
||||
import java.util.function.BiFunction;
|
||||
import java.util.function.Function;
|
||||
@@ -25,18 +28,19 @@ import java.util.function.Function;
|
||||
public class GrpcSingleNodeChannelPool<STUB> extends ServiceChangeMonitor {
|
||||
private final Map<InstanceAddress, ConnectionHolder> channels = new ConcurrentHashMap<>();
|
||||
|
||||
private final Marker grpcMarker = MarkerFactory.getMarker("GRPC");
|
||||
private static final Logger logger = LoggerFactory.getLogger(GrpcSingleNodeChannelPool.class);
|
||||
|
||||
private final ServiceRegistryIf serviceRegistryIf;
|
||||
private final Function<InstanceAddress, ManagedChannel> channelConstructor;
|
||||
private final Function<ManagedChannel, STUB> stubConstructor;
|
||||
|
||||
|
||||
@SneakyThrows
|
||||
public GrpcSingleNodeChannelPool(ServiceRegistryIf serviceRegistryIf,
|
||||
ServiceKey<? extends PartitionTraits.Unicast> serviceKey,
|
||||
Function<InstanceAddress, ManagedChannel> channelConstructor,
|
||||
Function<ManagedChannel, STUB> stubConstructor) {
|
||||
Function<ManagedChannel, STUB> stubConstructor)
|
||||
throws Exception
|
||||
{
|
||||
super(serviceKey);
|
||||
|
||||
this.serviceRegistryIf = serviceRegistryIf;
|
||||
@@ -46,8 +50,6 @@ public class GrpcSingleNodeChannelPool<STUB> extends ServiceChangeMonitor {
|
||||
serviceRegistryIf.registerMonitor(this);
|
||||
|
||||
onChange();
|
||||
|
||||
awaitChannel(Duration.ofSeconds(5));
|
||||
}
|
||||
|
||||
|
||||
@@ -60,10 +62,10 @@ public class GrpcSingleNodeChannelPool<STUB> extends ServiceChangeMonitor {
|
||||
for (var route : Sets.symmetricDifference(oldRoutes, newRoutes)) {
|
||||
ConnectionHolder oldChannel;
|
||||
if (newRoutes.contains(route)) {
|
||||
logger.info("Adding route {}", route);
|
||||
logger.info(grpcMarker, "Adding route {} => {}", serviceKey, route);
|
||||
oldChannel = channels.put(route, new ConnectionHolder(route));
|
||||
} else {
|
||||
logger.info("Expelling route {}", route);
|
||||
logger.info(grpcMarker, "Expelling route {} => {}", serviceKey, route);
|
||||
oldChannel = channels.remove(route);
|
||||
}
|
||||
if (oldChannel != null) {
|
||||
@@ -101,7 +103,7 @@ public class GrpcSingleNodeChannelPool<STUB> extends ServiceChangeMonitor {
|
||||
}
|
||||
|
||||
try {
|
||||
logger.info("Creating channel for {}:{}", serviceKey, address);
|
||||
logger.info(grpcMarker, "Creating channel for {} => {}", serviceKey, address);
|
||||
value = channelConstructor.apply(address);
|
||||
if (channel.compareAndSet(null, value)) {
|
||||
return value;
|
||||
@@ -112,7 +114,7 @@ public class GrpcSingleNodeChannelPool<STUB> extends ServiceChangeMonitor {
|
||||
}
|
||||
}
|
||||
catch (Exception e) {
|
||||
logger.error(STR."Failed to get channel for \{address}", e);
|
||||
logger.error(grpcMarker, "Failed to get channel for " + address, e);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
@@ -204,7 +206,7 @@ public class GrpcSingleNodeChannelPool<STUB> extends ServiceChangeMonitor {
|
||||
}
|
||||
|
||||
for (var e : exceptions) {
|
||||
logger.error("Failed to call service {}", serviceKey, e);
|
||||
logger.error(grpcMarker, "Failed to call service {}", serviceKey, e);
|
||||
}
|
||||
|
||||
throw new ServiceNotAvailableException(serviceKey);
|
||||
|
@@ -4,6 +4,11 @@ import nu.marginalia.service.discovery.property.ServiceKey;
|
||||
|
||||
public class ServiceNotAvailableException extends RuntimeException {
|
||||
public ServiceNotAvailableException(ServiceKey<?> key) {
|
||||
super(STR."Service \{key} not available");
|
||||
super(key.toString());
|
||||
}
|
||||
|
||||
@Override
|
||||
public StackTraceElement[] getStackTrace() { // Suppress stack trace
|
||||
return new StackTraceElement[0];
|
||||
}
|
||||
}
|
||||
|
@@ -11,4 +11,14 @@ public class FakeServiceHeartbeat implements ServiceHeartbeat {
|
||||
public void close() {}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public ServiceAdHocTaskHeartbeat createServiceAdHocTaskHeartbeat(String taskName) {
|
||||
return new ServiceAdHocTaskHeartbeat() {
|
||||
@Override
|
||||
public void progress(String step, int stepProgress, int stepCount) {}
|
||||
@Override
|
||||
public void close() {}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
@@ -0,0 +1,7 @@
|
||||
package nu.marginalia.service.control;
|
||||
|
||||
public interface ServiceAdHocTaskHeartbeat extends AutoCloseable {
|
||||
void progress(String step, int progress, int total);
|
||||
|
||||
void close();
|
||||
}
|
@@ -0,0 +1,190 @@
|
||||
package nu.marginalia.service.control;
|
||||
|
||||
|
||||
import com.zaxxer.hikari.HikariDataSource;
|
||||
import nu.marginalia.service.module.ServiceConfiguration;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.sql.SQLException;
|
||||
import java.util.UUID;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
/** This object sends a heartbeat to the database every few seconds,
|
||||
* updating with the progress of a task within a service. Progress is tracked by providing
|
||||
* enumerations corresponding to the steps in the task. It's important they're arranged in the same
|
||||
* order as the steps in the task in order to get an accurate progress tracking.
|
||||
*/
|
||||
public class ServiceAdHocTaskHeartbeatImpl implements AutoCloseable, ServiceAdHocTaskHeartbeat {
|
||||
private final Logger logger = LoggerFactory.getLogger(ServiceAdHocTaskHeartbeatImpl.class);
|
||||
private final String taskName;
|
||||
private final String taskBase;
|
||||
private final int node;
|
||||
private final String instanceUUID;
|
||||
private final HikariDataSource dataSource;
|
||||
|
||||
|
||||
private final Thread runnerThread;
|
||||
private final int heartbeatInterval = Integer.getInteger("mcp.heartbeat.interval", 1);
|
||||
private final String serviceInstanceUUID;
|
||||
private int progress;
|
||||
|
||||
private volatile boolean running = false;
|
||||
private volatile String step = "-";
|
||||
|
||||
ServiceAdHocTaskHeartbeatImpl(ServiceConfiguration configuration,
|
||||
String taskName,
|
||||
HikariDataSource dataSource)
|
||||
{
|
||||
this.taskName = configuration.serviceName() + "." + taskName + ":" + configuration.node();
|
||||
this.taskBase = configuration.serviceName() + "." + taskName;
|
||||
this.node = configuration.node();
|
||||
this.dataSource = dataSource;
|
||||
|
||||
this.instanceUUID = UUID.randomUUID().toString();
|
||||
this.serviceInstanceUUID = configuration.instanceUuid().toString();
|
||||
|
||||
heartbeatInit();
|
||||
|
||||
runnerThread = new Thread(this::run);
|
||||
runnerThread.start();
|
||||
}
|
||||
|
||||
/** Update the progress of the task. This is a fast function that doesn't block;
|
||||
* the actual update is done in a separate thread.
|
||||
*
|
||||
* @param step The current step in the task.
|
||||
*/
|
||||
@Override
|
||||
public void progress(String step, int stepProgress, int stepCount) {
|
||||
this.step = step;
|
||||
|
||||
|
||||
// off by one since we calculate the progress based on the number of steps,
|
||||
// and Enum.ordinal() is zero-based (so the 5th step in a 5 step task is 4, not 5; resulting in the
|
||||
// final progress being 80% and not 100%)
|
||||
|
||||
this.progress = (int) Math.round(100. * stepProgress / (double) stepCount);
|
||||
|
||||
logger.info("ServiceTask {} progress: {}%", taskBase, progress);
|
||||
}
|
||||
|
||||
public void shutDown() {
|
||||
if (!running)
|
||||
return;
|
||||
|
||||
running = false;
|
||||
|
||||
try {
|
||||
runnerThread.join();
|
||||
heartbeatStop();
|
||||
}
|
||||
catch (InterruptedException|SQLException ex) {
|
||||
logger.warn("ServiceHeartbeat shutdown failed", ex);
|
||||
}
|
||||
}
|
||||
|
||||
private void run() {
|
||||
if (!running)
|
||||
running = true;
|
||||
else
|
||||
return;
|
||||
|
||||
try {
|
||||
while (running) {
|
||||
try {
|
||||
heartbeatUpdate();
|
||||
}
|
||||
catch (SQLException ex) {
|
||||
logger.warn("ServiceHeartbeat failed to update", ex);
|
||||
}
|
||||
|
||||
TimeUnit.SECONDS.sleep(heartbeatInterval);
|
||||
}
|
||||
}
|
||||
catch (InterruptedException ex) {
|
||||
logger.error("ServiceHeartbeat caught irrecoverable exception, killing service", ex);
|
||||
System.exit(255);
|
||||
}
|
||||
}
|
||||
|
||||
private void heartbeatInit() {
|
||||
try (var connection = dataSource.getConnection()) {
|
||||
try (var stmt = connection.prepareStatement(
|
||||
"""
|
||||
INSERT INTO TASK_HEARTBEAT (TASK_NAME, TASK_BASE, NODE, INSTANCE, SERVICE_INSTANCE, HEARTBEAT_TIME, STATUS)
|
||||
VALUES (?, ?, ?, ?, ?, CURRENT_TIMESTAMP(6), 'STARTING')
|
||||
ON DUPLICATE KEY UPDATE
|
||||
INSTANCE = ?,
|
||||
SERVICE_INSTANCE = ?,
|
||||
HEARTBEAT_TIME = CURRENT_TIMESTAMP(6),
|
||||
STATUS = 'STARTING'
|
||||
"""
|
||||
))
|
||||
{
|
||||
stmt.setString(1, taskName);
|
||||
stmt.setString(2, taskBase);
|
||||
stmt.setInt(3, node);
|
||||
stmt.setString(4, instanceUUID);
|
||||
stmt.setString(5, serviceInstanceUUID);
|
||||
stmt.setString(6, instanceUUID);
|
||||
stmt.setString(7, serviceInstanceUUID);
|
||||
stmt.executeUpdate();
|
||||
}
|
||||
}
|
||||
catch (SQLException ex) {
|
||||
logger.error("ServiceHeartbeat failed to initialize", ex);
|
||||
throw new RuntimeException(ex);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private void heartbeatUpdate() throws SQLException {
|
||||
try (var connection = dataSource.getConnection()) {
|
||||
try (var stmt = connection.prepareStatement(
|
||||
"""
|
||||
UPDATE TASK_HEARTBEAT
|
||||
SET HEARTBEAT_TIME = CURRENT_TIMESTAMP(6),
|
||||
STATUS = 'RUNNING',
|
||||
PROGRESS = ?,
|
||||
STAGE_NAME = ?
|
||||
WHERE INSTANCE = ?
|
||||
""")
|
||||
)
|
||||
{
|
||||
stmt.setInt(1, progress);
|
||||
stmt.setString(2, step);
|
||||
stmt.setString(3, instanceUUID);
|
||||
stmt.executeUpdate();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void heartbeatStop() throws SQLException {
|
||||
try (var connection = dataSource.getConnection()) {
|
||||
try (var stmt = connection.prepareStatement(
|
||||
"""
|
||||
UPDATE TASK_HEARTBEAT
|
||||
SET HEARTBEAT_TIME = CURRENT_TIMESTAMP(6),
|
||||
STATUS='STOPPED',
|
||||
PROGRESS = ?,
|
||||
STAGE_NAME = ?
|
||||
WHERE INSTANCE = ?
|
||||
""")
|
||||
)
|
||||
{
|
||||
stmt.setInt(1, progress);
|
||||
stmt.setString( 2, step);
|
||||
stmt.setString( 3, instanceUUID);
|
||||
stmt.executeUpdate();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
shutDown();
|
||||
}
|
||||
|
||||
}
|
||||
|
@@ -5,4 +5,5 @@ import com.google.inject.ImplementedBy;
|
||||
@ImplementedBy(ServiceHeartbeatImpl.class)
|
||||
public interface ServiceHeartbeat {
|
||||
<T extends Enum<T>> ServiceTaskHeartbeat<T> createServiceTaskHeartbeat(Class<T> steps, String processName);
|
||||
ServiceAdHocTaskHeartbeat createServiceAdHocTaskHeartbeat(String taskName);
|
||||
}
|
||||
|
@@ -54,6 +54,11 @@ public class ServiceHeartbeatImpl implements ServiceHeartbeat {
|
||||
return new ServiceTaskHeartbeatImpl<>(steps, configuration, processName, eventLog, dataSource);
|
||||
}
|
||||
|
||||
@Override
|
||||
public ServiceAdHocTaskHeartbeat createServiceAdHocTaskHeartbeat(String taskName) {
|
||||
return new ServiceAdHocTaskHeartbeatImpl(configuration, taskName, dataSource);
|
||||
}
|
||||
|
||||
|
||||
public void start() {
|
||||
if (!running) {
|
||||
|
@@ -2,11 +2,8 @@ package nu.marginalia.service.discovery;
|
||||
|
||||
import com.google.inject.Inject;
|
||||
import com.google.inject.Singleton;
|
||||
import lombok.SneakyThrows;
|
||||
import nu.marginalia.service.discovery.monitor.*;
|
||||
import nu.marginalia.service.discovery.monitor.ServiceMonitorIf;
|
||||
import nu.marginalia.service.discovery.property.ServiceEndpoint;
|
||||
import static nu.marginalia.service.discovery.property.ServiceEndpoint.*;
|
||||
|
||||
import nu.marginalia.service.discovery.property.ServiceKey;
|
||||
import org.apache.curator.framework.CuratorFramework;
|
||||
import org.apache.curator.utils.ZKPaths;
|
||||
@@ -16,9 +13,14 @@ import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.*;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Random;
|
||||
import java.util.UUID;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
import static nu.marginalia.service.discovery.property.ServiceEndpoint.InstanceAddress;
|
||||
|
||||
/** A versatile service registry that uses ZooKeeper to store service endpoints.
|
||||
* It is used to register services and to look up the endpoints of other services.
|
||||
* <p></p>
|
||||
@@ -37,18 +39,22 @@ public class ZkServiceRegistry implements ServiceRegistryIf {
|
||||
private final List<String> livenessPaths = new ArrayList<>();
|
||||
|
||||
@Inject
|
||||
@SneakyThrows
|
||||
public ZkServiceRegistry(CuratorFramework curatorFramework) {
|
||||
this.curatorFramework = curatorFramework;
|
||||
try {
|
||||
this.curatorFramework = curatorFramework;
|
||||
|
||||
curatorFramework.start();
|
||||
if (!curatorFramework.blockUntilConnected(30, TimeUnit.SECONDS)) {
|
||||
throw new IllegalStateException("Failed to connect to zookeeper after 30s");
|
||||
curatorFramework.start();
|
||||
if (!curatorFramework.blockUntilConnected(30, TimeUnit.SECONDS)) {
|
||||
throw new IllegalStateException("Failed to connect to zookeeper after 30s");
|
||||
}
|
||||
|
||||
Runtime.getRuntime().addShutdownHook(
|
||||
new Thread(this::shutDown, "ZkServiceRegistry shutdown hook")
|
||||
);
|
||||
}
|
||||
catch (Exception ex) {
|
||||
throw new RuntimeException("Failed to start ZkServiceRegistry", ex);
|
||||
}
|
||||
|
||||
Runtime.getRuntime().addShutdownHook(
|
||||
new Thread(this::shutDown, "ZkServiceRegistry shutdown hook")
|
||||
);
|
||||
}
|
||||
|
||||
@Override
|
||||
@@ -59,8 +65,8 @@ public class ZkServiceRegistry implements ServiceRegistryIf {
|
||||
{
|
||||
var endpoint = new ServiceEndpoint(externalAddress, requestPort(externalAddress, key));
|
||||
|
||||
String path = STR."\{key.toPath()}/\{instanceUUID.toString()}";
|
||||
byte[] payload = STR."\{endpoint.host()}:\{endpoint.port()}".getBytes(StandardCharsets.UTF_8);
|
||||
String path = key.toPath() + "/" + instanceUUID.toString();
|
||||
byte[] payload = (endpoint.host() + ":" + endpoint.port()).getBytes(StandardCharsets.UTF_8);
|
||||
|
||||
logger.info("Registering {} -> {}", path, endpoint);
|
||||
|
||||
@@ -72,14 +78,18 @@ public class ZkServiceRegistry implements ServiceRegistryIf {
|
||||
return endpoint;
|
||||
}
|
||||
|
||||
@SneakyThrows
|
||||
@Override
|
||||
public void declareFirstBoot() {
|
||||
if (!isFirstBoot()) {
|
||||
curatorFramework.create()
|
||||
.creatingParentsIfNeeded()
|
||||
.withMode(CreateMode.PERSISTENT)
|
||||
.forPath("/first-boot");
|
||||
try {
|
||||
curatorFramework.create()
|
||||
.creatingParentsIfNeeded()
|
||||
.withMode(CreateMode.PERSISTENT)
|
||||
.forPath("/first-boot");
|
||||
}
|
||||
catch (Exception ex) {
|
||||
logger.error("Failed to declare first-boot", ex);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -109,7 +119,7 @@ public class ZkServiceRegistry implements ServiceRegistryIf {
|
||||
@Override
|
||||
public void announceInstance(UUID instanceUUID) {
|
||||
try {
|
||||
String serviceRoot = STR."/running-instances/\{instanceUUID.toString()}";
|
||||
String serviceRoot = "/running-instances/" + instanceUUID.toString();
|
||||
|
||||
livenessPaths.add(serviceRoot);
|
||||
|
||||
@@ -128,7 +138,7 @@ public class ZkServiceRegistry implements ServiceRegistryIf {
|
||||
*/
|
||||
public boolean isInstanceRunning(UUID instanceUUID) {
|
||||
try {
|
||||
String serviceRoot = STR."/running-instances/\{instanceUUID.toString()}";
|
||||
String serviceRoot = "/running-instances/" + instanceUUID.toString();
|
||||
return null != curatorFramework.checkExists().forPath(serviceRoot);
|
||||
}
|
||||
catch (Exception ex) {
|
||||
@@ -165,11 +175,11 @@ public class ZkServiceRegistry implements ServiceRegistryIf {
|
||||
curatorFramework.create()
|
||||
.creatingParentsIfNeeded()
|
||||
.withMode(CreateMode.EPHEMERAL)
|
||||
.forPath(STR."/port-registry/\{externalHost}/\{port}", payload);
|
||||
.forPath("/port-registry/" + externalHost + "/" + port, payload);
|
||||
return port;
|
||||
}
|
||||
catch (Exception ex) {
|
||||
logger.error(STR."Still negotiating port for \{identifier}");
|
||||
logger.error("Still negotiating port for " + identifier);
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -48,5 +48,10 @@ public record ServiceEndpoint(String host, int port) {
|
||||
public int port() {
|
||||
return endpoint.port();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return endpoint().host() + ":" + endpoint.port() + " [" + instance + "]";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -38,7 +38,7 @@ public sealed interface ServiceKey<P extends ServicePartition> {
|
||||
|
||||
record Rest(String name) implements ServiceKey<ServicePartition.None> {
|
||||
public String toPath() {
|
||||
return STR."/services/rest/\{name}";
|
||||
return "/services/rest/" + name;
|
||||
}
|
||||
|
||||
@Override
|
||||
@@ -48,13 +48,26 @@ public sealed interface ServiceKey<P extends ServicePartition> {
|
||||
{
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
final String shortName;
|
||||
|
||||
int periodIndex = name.lastIndexOf('.');
|
||||
|
||||
if (periodIndex >= 0) shortName = name.substring(periodIndex+1);
|
||||
else shortName = name;
|
||||
|
||||
return "rest:" + shortName;
|
||||
}
|
||||
|
||||
}
|
||||
record Grpc<P extends ServicePartition>(String name, P partition) implements ServiceKey<P> {
|
||||
public String baseName() {
|
||||
return STR."/services/grpc/\{name}";
|
||||
return "/services/grpc/" + name;
|
||||
}
|
||||
public String toPath() {
|
||||
return STR."/services/grpc/\{name}/\{partition.identifier()}";
|
||||
return "/services/grpc/" + name + "/" + partition.identifier();
|
||||
}
|
||||
|
||||
@Override
|
||||
@@ -64,6 +77,18 @@ public sealed interface ServiceKey<P extends ServicePartition> {
|
||||
{
|
||||
return new Grpc<>(name, partition);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
final String shortName;
|
||||
|
||||
int periodIndex = name.lastIndexOf('.');
|
||||
|
||||
if (periodIndex >= 0) shortName = name.substring(periodIndex+1);
|
||||
else shortName = name;
|
||||
|
||||
return "grpc:" + shortName + "[" + partition.identifier() + "]";
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
@@ -5,14 +5,12 @@ import com.google.inject.Provides;
|
||||
import com.google.inject.Singleton;
|
||||
import com.zaxxer.hikari.HikariConfig;
|
||||
import com.zaxxer.hikari.HikariDataSource;
|
||||
import lombok.SneakyThrows;
|
||||
import nu.marginalia.WmsaHome;
|
||||
import org.flywaydb.core.Flyway;
|
||||
import org.mariadb.jdbc.Driver;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import javax.sql.DataSource;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
@@ -71,14 +69,12 @@ public class DatabaseModule extends AbstractModule {
|
||||
|
||||
}
|
||||
|
||||
@SneakyThrows
|
||||
@Singleton
|
||||
@Provides
|
||||
public HikariDataSource provideConnection() {
|
||||
return getMariaDB();
|
||||
}
|
||||
|
||||
@SneakyThrows
|
||||
private HikariDataSource getMariaDB() {
|
||||
var connStr = System.getProperty("db.overrideJdbc", dbProperties.getProperty(DB_CONN_KEY));
|
||||
|
||||
@@ -93,7 +89,7 @@ public class DatabaseModule extends AbstractModule {
|
||||
config.addDataSourceProperty("prepStmtCacheSize", "250");
|
||||
config.addDataSourceProperty("prepStmtCacheSqlLimit", "2048");
|
||||
|
||||
config.setMaximumPoolSize(5);
|
||||
config.setMaximumPoolSize(Integer.getInteger("db.poolSize", 5));
|
||||
config.setMinimumIdle(2);
|
||||
|
||||
config.setMaxLifetime(Duration.ofMinutes(9).toMillis());
|
||||
|
@@ -6,6 +6,9 @@ import nu.marginalia.service.ServiceId;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.net.InetAddress;
|
||||
import java.net.NetworkInterface;
|
||||
import java.util.Enumeration;
|
||||
import java.util.Objects;
|
||||
import java.util.UUID;
|
||||
|
||||
@@ -69,6 +72,17 @@ public class ServiceConfigurationModule extends AbstractModule {
|
||||
return configuredValue;
|
||||
}
|
||||
|
||||
if (Boolean.getBoolean("system.multiFace")) {
|
||||
try {
|
||||
String localNetworkIp = getLocalNetworkIP();
|
||||
if (null != localNetworkIp) {
|
||||
return localNetworkIp;
|
||||
}
|
||||
}
|
||||
catch (Exception ex) {
|
||||
logger.warn("Failed to get local network IP", ex);
|
||||
}
|
||||
}
|
||||
// If we're in docker, we'll use the hostname
|
||||
if (Boolean.getBoolean("service.useDockerHostname")) {
|
||||
return System.getenv("HOSTNAME");
|
||||
@@ -84,10 +98,41 @@ public class ServiceConfigurationModule extends AbstractModule {
|
||||
private String getBindAddress() {
|
||||
String configuredValue = System.getProperty("service.bind-address");
|
||||
if (configuredValue != null) {
|
||||
logger.info("Using configured bind address {}", configuredValue);
|
||||
return configuredValue;
|
||||
}
|
||||
|
||||
return "127.0.0.1";
|
||||
if (Boolean.getBoolean("system.multiFace")) {
|
||||
try {
|
||||
return Objects.requireNonNullElse(getLocalNetworkIP(), "0.0.0.0");
|
||||
} catch (Exception ex) {
|
||||
logger.warn("Failed to get local network IP, falling back to bind to 0.0.0.0", ex);
|
||||
return "0.0.0.0";
|
||||
}
|
||||
}
|
||||
else {
|
||||
return "0.0.0.0";
|
||||
}
|
||||
}
|
||||
|
||||
public static String getLocalNetworkIP() throws Exception {
|
||||
Enumeration<NetworkInterface> nets = NetworkInterface.getNetworkInterfaces();
|
||||
|
||||
while (nets.hasMoreElements()) {
|
||||
NetworkInterface netif = nets.nextElement();
|
||||
if (!netif.isUp() || netif.isLoopback()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
Enumeration<InetAddress> inetAddresses = netif.getInetAddresses();
|
||||
while (inetAddresses.hasMoreElements()) {
|
||||
InetAddress addr = inetAddresses.nextElement();
|
||||
if (addr.isSiteLocalAddress() && !addr.isLoopbackAddress()) {
|
||||
return addr.getHostAddress();
|
||||
}
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
}
|
||||
|
@@ -1,7 +1,6 @@
|
||||
package nu.marginalia.service.server;
|
||||
|
||||
import com.google.inject.Singleton;
|
||||
import lombok.SneakyThrows;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
@@ -59,13 +58,17 @@ public class Initialization {
|
||||
}
|
||||
}
|
||||
|
||||
@SneakyThrows
|
||||
public boolean waitReady() {
|
||||
synchronized (this) {
|
||||
while (!initialized) {
|
||||
wait();
|
||||
try {
|
||||
synchronized (this) {
|
||||
while (!initialized) {
|
||||
wait();
|
||||
}
|
||||
return true;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
catch (InterruptedException ex) {
|
||||
throw new RuntimeException("Interrupted while waiting for initialization", ex);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -0,0 +1,178 @@
|
||||
package nu.marginalia.service.server;
|
||||
|
||||
import io.jooby.*;
|
||||
import io.prometheus.client.Counter;
|
||||
import nu.marginalia.mq.inbox.MqInboxIf;
|
||||
import nu.marginalia.service.client.ServiceNotAvailableException;
|
||||
import nu.marginalia.service.discovery.property.ServiceEndpoint;
|
||||
import nu.marginalia.service.discovery.property.ServiceKey;
|
||||
import nu.marginalia.service.discovery.property.ServicePartition;
|
||||
import nu.marginalia.service.module.ServiceConfiguration;
|
||||
import nu.marginalia.service.server.jte.JteModule;
|
||||
import nu.marginalia.service.server.mq.ServiceMqSubscription;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.slf4j.Marker;
|
||||
import org.slf4j.MarkerFactory;
|
||||
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
import java.util.List;
|
||||
|
||||
public class JoobyService {
|
||||
private final Logger logger = LoggerFactory.getLogger(getClass());
|
||||
|
||||
// Marker for filtering out sensitive content from the persistent logs
|
||||
private final Marker httpMarker = MarkerFactory.getMarker("HTTP");
|
||||
|
||||
private final Initialization initialization;
|
||||
|
||||
private final static Counter request_counter = Counter.build("wmsa_request_counter", "Request Counter")
|
||||
.labelNames("service", "node")
|
||||
.register();
|
||||
private final static Counter request_counter_good = Counter.build("wmsa_request_counter_good", "Good Requests")
|
||||
.labelNames("service", "node")
|
||||
.register();
|
||||
private final static Counter request_counter_bad = Counter.build("wmsa_request_counter_bad", "Bad Requests")
|
||||
.labelNames("service", "node")
|
||||
.register();
|
||||
private final static Counter request_counter_err = Counter.build("wmsa_request_counter_err", "Error Requests")
|
||||
.labelNames("service", "node")
|
||||
.register();
|
||||
private final String serviceName;
|
||||
private static volatile boolean initialized = false;
|
||||
|
||||
protected final MqInboxIf messageQueueInbox;
|
||||
private final int node;
|
||||
private GrpcServer grpcServer;
|
||||
|
||||
private ServiceConfiguration config;
|
||||
private final List<MvcExtension> joobyServices;
|
||||
private final ServiceEndpoint restEndpoint;
|
||||
|
||||
public JoobyService(BaseServiceParams params,
|
||||
ServicePartition partition,
|
||||
List<DiscoverableService> grpcServices,
|
||||
List<MvcExtension> joobyServices
|
||||
) throws Exception {
|
||||
|
||||
this.joobyServices = joobyServices;
|
||||
this.initialization = params.initialization;
|
||||
config = params.configuration;
|
||||
node = config.node();
|
||||
|
||||
String inboxName = config.serviceName();
|
||||
logger.info("Inbox name: {}", inboxName);
|
||||
|
||||
var serviceRegistry = params.serviceRegistry;
|
||||
|
||||
restEndpoint = serviceRegistry.registerService(ServiceKey.forRest(config.serviceId(), config.node()),
|
||||
config.instanceUuid(), config.externalAddress());
|
||||
|
||||
var mqInboxFactory = params.messageQueueInboxFactory;
|
||||
messageQueueInbox = mqInboxFactory.createSynchronousInbox(inboxName, config.node(), config.instanceUuid());
|
||||
messageQueueInbox.subscribe(new ServiceMqSubscription(this));
|
||||
|
||||
serviceName = System.getProperty("service-name");
|
||||
|
||||
initialization.addCallback(params.heartbeat::start);
|
||||
initialization.addCallback(messageQueueInbox::start);
|
||||
initialization.addCallback(() -> params.eventLog.logEvent("SVC-INIT", serviceName + ":" + config.node()));
|
||||
initialization.addCallback(() -> serviceRegistry.announceInstance(config.instanceUuid()));
|
||||
|
||||
Thread.setDefaultUncaughtExceptionHandler((t, e) -> {
|
||||
if (e instanceof ServiceNotAvailableException) {
|
||||
// reduce log spam for this common case
|
||||
logger.error("Service not available: {}", e.getMessage());
|
||||
}
|
||||
else {
|
||||
logger.error("Uncaught exception", e);
|
||||
}
|
||||
request_counter_err.labels(serviceName, Integer.toString(node)).inc();
|
||||
});
|
||||
|
||||
if (!initialization.isReady() && ! initialized ) {
|
||||
initialized = true;
|
||||
grpcServer = new GrpcServer(config, serviceRegistry, partition, grpcServices);
|
||||
grpcServer.start();
|
||||
}
|
||||
}
|
||||
|
||||
public void startJooby(Jooby jooby) {
|
||||
|
||||
logger.info("{} Listening to {}:{} ({})", getClass().getSimpleName(),
|
||||
restEndpoint.host(),
|
||||
restEndpoint.port(),
|
||||
config.externalAddress());
|
||||
|
||||
// FIXME: This won't work outside of docker, may need to submit a PR to jooby to allow classpaths here
|
||||
jooby.install(new JteModule(Path.of("/app/resources/jte"), Path.of("/app/classes/jte-precompiled")));
|
||||
jooby.assets("/*", Paths.get("/app/resources/static"));
|
||||
|
||||
var options = new ServerOptions();
|
||||
options.setHost(config.bindAddress());
|
||||
options.setPort(restEndpoint.port());
|
||||
|
||||
// Enable gzip compression of response data, but set compression to the lowest level
|
||||
// since it doesn't really save much more space to dial it up. It's typically a
|
||||
// single digit percentage difference since HTML already compresses very well with level = 1.
|
||||
options.setCompressionLevel(1);
|
||||
|
||||
|
||||
jooby.setServerOptions(options);
|
||||
|
||||
jooby.get("/internal/ping", ctx -> "pong");
|
||||
jooby.get("/internal/started", this::isInitialized);
|
||||
jooby.get("/internal/ready", this::isReady);
|
||||
|
||||
for (var service : joobyServices) {
|
||||
jooby.mvc(service);
|
||||
}
|
||||
|
||||
jooby.before(this::auditRequestIn);
|
||||
jooby.after(this::auditRequestOut);
|
||||
}
|
||||
|
||||
private Object isInitialized(Context ctx) {
|
||||
if (initialization.isReady()) {
|
||||
return "ok";
|
||||
}
|
||||
else {
|
||||
ctx.setResponseCode(StatusCode.FAILED_DEPENDENCY_CODE);
|
||||
return "bad";
|
||||
}
|
||||
}
|
||||
|
||||
public boolean isReady() {
|
||||
return true;
|
||||
}
|
||||
|
||||
private String isReady(Context ctx) {
|
||||
if (isReady()) {
|
||||
return "ok";
|
||||
}
|
||||
else {
|
||||
ctx.setResponseCode(StatusCode.FAILED_DEPENDENCY_CODE);
|
||||
return "bad";
|
||||
}
|
||||
}
|
||||
|
||||
private void auditRequestIn(Context ctx) {
|
||||
request_counter.labels(serviceName, Integer.toString(node)).inc();
|
||||
}
|
||||
|
||||
private void auditRequestOut(Context ctx, Object result, Throwable failure) {
|
||||
if (ctx.getResponseCode().value() < 400) {
|
||||
request_counter_good.labels(serviceName, Integer.toString(node)).inc();
|
||||
}
|
||||
else {
|
||||
request_counter_bad.labels(serviceName, Integer.toString(node)).inc();
|
||||
}
|
||||
|
||||
if (failure != null) {
|
||||
logger.error("Request failed " + ctx.getMethod() + " " + ctx.getRequestURL(), failure);
|
||||
request_counter_err.labels(serviceName, Integer.toString(node)).inc();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@@ -2,22 +2,23 @@ package nu.marginalia.service.server;
|
||||
|
||||
import com.google.inject.Inject;
|
||||
import io.prometheus.client.exporter.MetricsServlet;
|
||||
import lombok.SneakyThrows;
|
||||
import nu.marginalia.service.module.ServiceConfiguration;
|
||||
import org.eclipse.jetty.server.Server;
|
||||
import org.eclipse.jetty.servlet.ServletContextHandler;
|
||||
import org.eclipse.jetty.servlet.ServletHolder;
|
||||
|
||||
import java.net.InetSocketAddress;
|
||||
|
||||
public class MetricsServer {
|
||||
|
||||
@SneakyThrows
|
||||
@Inject
|
||||
public MetricsServer(ServiceConfiguration configuration) {
|
||||
public MetricsServer(ServiceConfiguration configuration) throws Exception {
|
||||
// If less than zero, we forego setting up a metrics server
|
||||
if (configuration.metricsPort() < 0)
|
||||
return;
|
||||
|
||||
Server server = new Server(configuration.metricsPort());
|
||||
Server server = new Server(new InetSocketAddress(configuration.bindAddress(), configuration.metricsPort()));
|
||||
|
||||
ServletContextHandler context = new ServletContextHandler();
|
||||
context.setContextPath("/");
|
||||
server.setHandler(context);
|
||||
|
@@ -1,10 +1,10 @@
|
||||
package nu.marginalia.service.server;
|
||||
|
||||
import com.google.inject.name.Named;
|
||||
import com.google.inject.Inject;
|
||||
import lombok.SneakyThrows;
|
||||
import com.google.inject.name.Named;
|
||||
import nu.marginalia.mq.persistence.MqPersistence;
|
||||
import nu.marginalia.nodecfg.NodeConfigurationService;
|
||||
import nu.marginalia.nodecfg.model.NodeProfile;
|
||||
import nu.marginalia.storage.FileStorageService;
|
||||
import nu.marginalia.storage.model.FileStorageBaseType;
|
||||
import org.slf4j.Logger;
|
||||
@@ -57,7 +57,9 @@ public class NodeStatusWatcher {
|
||||
|
||||
private void setupNode() {
|
||||
try {
|
||||
configurationService.create(nodeId, "Node " + nodeId, true, false);
|
||||
NodeProfile profile = NodeProfile.MIXED;
|
||||
|
||||
configurationService.create(nodeId, "Node " + nodeId, true, false, profile);
|
||||
|
||||
fileStorageService.createStorageBase("Index Data", Path.of("/idx"), nodeId, FileStorageBaseType.CURRENT);
|
||||
fileStorageService.createStorageBase("Index Backups", Path.of("/backup"), nodeId, FileStorageBaseType.BACKUP);
|
||||
@@ -81,10 +83,14 @@ public class NodeStatusWatcher {
|
||||
}
|
||||
}
|
||||
|
||||
@SneakyThrows
|
||||
private boolean isConfigured() {
|
||||
var configuration = configurationService.get(nodeId);
|
||||
return configuration != null;
|
||||
try {
|
||||
var configuration = configurationService.get(nodeId);
|
||||
return configuration != null;
|
||||
}
|
||||
catch (SQLException ex) {
|
||||
throw new RuntimeException(ex);
|
||||
}
|
||||
}
|
||||
|
||||
/** Look for changes in the configuration and kill the service if the corresponding
|
||||
|
@@ -1,7 +1,6 @@
|
||||
package nu.marginalia.service.server;
|
||||
|
||||
import io.prometheus.client.Counter;
|
||||
import lombok.SneakyThrows;
|
||||
import nu.marginalia.mq.inbox.MqInboxIf;
|
||||
import nu.marginalia.service.client.ServiceNotAvailableException;
|
||||
import nu.marginalia.service.discovery.property.ServiceKey;
|
||||
@@ -17,7 +16,7 @@ import spark.Spark;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
public class Service {
|
||||
public class SparkService {
|
||||
private final Logger logger = LoggerFactory.getLogger(getClass());
|
||||
|
||||
// Marker for filtering out sensitive content from the persistent logs
|
||||
@@ -44,11 +43,10 @@ public class Service {
|
||||
private final int node;
|
||||
private GrpcServer grpcServer;
|
||||
|
||||
@SneakyThrows
|
||||
public Service(BaseServiceParams params,
|
||||
Runnable configureStaticFiles,
|
||||
ServicePartition partition,
|
||||
List<DiscoverableService> grpcServices) {
|
||||
public SparkService(BaseServiceParams params,
|
||||
Runnable configureStaticFiles,
|
||||
ServicePartition partition,
|
||||
List<DiscoverableService> grpcServices) throws Exception {
|
||||
|
||||
this.initialization = params.initialization;
|
||||
var config = params.configuration;
|
||||
@@ -128,18 +126,18 @@ public class Service {
|
||||
}
|
||||
}
|
||||
|
||||
public Service(BaseServiceParams params,
|
||||
ServicePartition partition,
|
||||
List<DiscoverableService> grpcServices) {
|
||||
public SparkService(BaseServiceParams params,
|
||||
ServicePartition partition,
|
||||
List<DiscoverableService> grpcServices) throws Exception {
|
||||
this(params,
|
||||
Service::defaultSparkConfig,
|
||||
SparkService::defaultSparkConfig,
|
||||
partition,
|
||||
grpcServices);
|
||||
}
|
||||
|
||||
public Service(BaseServiceParams params) {
|
||||
public SparkService(BaseServiceParams params) throws Exception {
|
||||
this(params,
|
||||
Service::defaultSparkConfig,
|
||||
SparkService::defaultSparkConfig,
|
||||
ServicePartition.any(),
|
||||
List.of());
|
||||
}
|
@@ -1,20 +1,18 @@
|
||||
package nu.marginalia.service.server;
|
||||
|
||||
import lombok.SneakyThrows;
|
||||
import spark.Request;
|
||||
import spark.Response;
|
||||
import spark.Spark;
|
||||
import spark.resource.ClassPathResource;
|
||||
import spark.staticfiles.MimeType;
|
||||
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.IOException;
|
||||
import java.time.LocalDateTime;
|
||||
import java.time.ZoneOffset;
|
||||
|
||||
public class StaticResources {
|
||||
private final long startTime = LocalDateTime.now().toEpochSecond(ZoneOffset.UTC);
|
||||
|
||||
@SneakyThrows
|
||||
public void serveStatic(String domain, String path, Request req, Response rsp) {
|
||||
try {
|
||||
if (path.startsWith("..") || domain.startsWith("..")) {
|
||||
@@ -28,7 +26,7 @@ public class StaticResources {
|
||||
|
||||
resource.getInputStream().transferTo(rsp.raw().getOutputStream());
|
||||
}
|
||||
catch (IllegalArgumentException | FileNotFoundException ex) {
|
||||
catch (IllegalArgumentException | IOException ex) {
|
||||
Spark.halt(404);
|
||||
}
|
||||
}
|
||||
@@ -57,7 +55,6 @@ public class StaticResources {
|
||||
return "application/octet-stream";
|
||||
}
|
||||
|
||||
@SneakyThrows
|
||||
private void handleEtagStatic(ClassPathResource resource, Request req, Response rsp) {
|
||||
rsp.header("Cache-Control", "public,max-age=3600");
|
||||
rsp.type(MimeType.fromResource(resource));
|
||||
|
@@ -0,0 +1,61 @@
|
||||
package nu.marginalia.service.server.jte;
|
||||
|
||||
import edu.umd.cs.findbugs.annotations.NonNull;
|
||||
import edu.umd.cs.findbugs.annotations.Nullable;
|
||||
import gg.jte.ContentType;
|
||||
import gg.jte.TemplateEngine;
|
||||
import gg.jte.resolve.DirectoryCodeResolver;
|
||||
import io.jooby.*;
|
||||
|
||||
import java.io.File;
|
||||
import java.nio.file.Path;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
import java.util.Optional;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
// Temporary workaround for a bug
|
||||
// APL-2.0 https://github.com/jooby-project/jooby
|
||||
public class JteModule implements Extension {
|
||||
private Path sourceDirectory;
|
||||
private Path classDirectory;
|
||||
private TemplateEngine templateEngine;
|
||||
|
||||
public JteModule(@NonNull Path sourceDirectory, @NonNull Path classDirectory) {
|
||||
this.sourceDirectory = (Path)Objects.requireNonNull(sourceDirectory, "Source directory is required.");
|
||||
this.classDirectory = (Path)Objects.requireNonNull(classDirectory, "Class directory is required.");
|
||||
}
|
||||
|
||||
public JteModule(@NonNull Path sourceDirectory) {
|
||||
this.sourceDirectory = (Path)Objects.requireNonNull(sourceDirectory, "Source directory is required.");
|
||||
}
|
||||
|
||||
public JteModule(@NonNull TemplateEngine templateEngine) {
|
||||
this.templateEngine = (TemplateEngine)Objects.requireNonNull(templateEngine, "Template engine is required.");
|
||||
}
|
||||
|
||||
public void install(@NonNull Jooby application) {
|
||||
if (this.templateEngine == null) {
|
||||
this.templateEngine = create(application.getEnvironment(), this.sourceDirectory, this.classDirectory);
|
||||
}
|
||||
|
||||
ServiceRegistry services = application.getServices();
|
||||
services.put(TemplateEngine.class, this.templateEngine);
|
||||
application.encoder(MediaType.html, new JteTemplateEngine(this.templateEngine));
|
||||
}
|
||||
|
||||
public static TemplateEngine create(@NonNull Environment environment, @NonNull Path sourceDirectory, @Nullable Path classDirectory) {
|
||||
boolean dev = environment.isActive("dev", new String[]{"test"});
|
||||
if (dev) {
|
||||
Objects.requireNonNull(sourceDirectory, "Source directory is required.");
|
||||
Path requiredClassDirectory = (Path)Optional.ofNullable(classDirectory).orElseGet(() -> sourceDirectory.resolve("jte-classes"));
|
||||
TemplateEngine engine = TemplateEngine.create(new DirectoryCodeResolver(sourceDirectory), requiredClassDirectory, ContentType.Html, environment.getClassLoader());
|
||||
Optional<List<String>> var10000 = Optional.ofNullable(System.getProperty("jooby.run.classpath")).map((it) -> it.split(File.pathSeparator)).map(Stream::of).map(Stream::toList);
|
||||
Objects.requireNonNull(engine);
|
||||
var10000.ifPresent(engine::setClassPath);
|
||||
return engine;
|
||||
} else {
|
||||
return classDirectory == null ? TemplateEngine.createPrecompiled(ContentType.Html) : TemplateEngine.createPrecompiled(classDirectory, ContentType.Html);
|
||||
}
|
||||
}
|
||||
}
|
@@ -0,0 +1,48 @@
|
||||
package nu.marginalia.service.server.jte;
|
||||
|
||||
import edu.umd.cs.findbugs.annotations.NonNull;
|
||||
import gg.jte.TemplateEngine;
|
||||
import io.jooby.Context;
|
||||
import io.jooby.MapModelAndView;
|
||||
import io.jooby.ModelAndView;
|
||||
import io.jooby.buffer.DataBuffer;
|
||||
import io.jooby.internal.jte.DataBufferOutput;
|
||||
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
|
||||
// Temporary workaround for a bug
|
||||
// APL-2.0 https://github.com/jooby-project/jooby
|
||||
class JteTemplateEngine implements io.jooby.TemplateEngine {
|
||||
private final TemplateEngine jte;
|
||||
private final List<String> extensions;
|
||||
|
||||
public JteTemplateEngine(TemplateEngine jte) {
|
||||
this.jte = jte;
|
||||
this.extensions = List.of(".jte", ".kte");
|
||||
}
|
||||
|
||||
|
||||
@NonNull @Override
|
||||
public List<String> extensions() {
|
||||
return extensions;
|
||||
}
|
||||
|
||||
@Override
|
||||
public DataBuffer render(Context ctx, ModelAndView modelAndView) {
|
||||
var buffer = ctx.getBufferFactory().allocateBuffer();
|
||||
var output = new DataBufferOutput(buffer, StandardCharsets.UTF_8);
|
||||
var attributes = ctx.getAttributes();
|
||||
if (modelAndView instanceof MapModelAndView mapModelAndView) {
|
||||
var mapModel = new HashMap<String, Object>();
|
||||
mapModel.putAll(attributes);
|
||||
mapModel.putAll(mapModelAndView.getModel());
|
||||
jte.render(modelAndView.getView(), mapModel, output);
|
||||
} else {
|
||||
jte.render(modelAndView.getView(), modelAndView.getModel(), output);
|
||||
}
|
||||
|
||||
return buffer;
|
||||
}
|
||||
}
|
@@ -3,7 +3,6 @@ package nu.marginalia.service.server.mq;
|
||||
import nu.marginalia.mq.MqMessage;
|
||||
import nu.marginalia.mq.inbox.MqInboxResponse;
|
||||
import nu.marginalia.mq.inbox.MqSubscription;
|
||||
import nu.marginalia.service.server.Service;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
@@ -15,10 +14,10 @@ import java.util.Map;
|
||||
public class ServiceMqSubscription implements MqSubscription {
|
||||
private static final Logger logger = LoggerFactory.getLogger(ServiceMqSubscription.class);
|
||||
private final Map<String, Method> requests = new HashMap<>();
|
||||
private final Service service;
|
||||
private final Object service;
|
||||
|
||||
|
||||
public ServiceMqSubscription(Service service) {
|
||||
public ServiceMqSubscription(Object service) {
|
||||
this.service = service;
|
||||
|
||||
/* Wire up all methods annotated with @MqRequest and @MqNotification
|
||||
|
@@ -24,7 +24,7 @@ public class NamedExecutorFactory {
|
||||
|
||||
@Override
|
||||
public Thread newThread(@NotNull Runnable r) {
|
||||
var thread = new Thread(r, STR."\{name}[\{threadNumber.getAndIncrement()}]");
|
||||
var thread = new Thread(r, name + "[" + threadNumber.getAndIncrement() + "]");
|
||||
thread.setDaemon(true);
|
||||
return thread;
|
||||
}
|
||||
|
@@ -1,9 +1,9 @@
|
||||
package nu.marginalia.service.discovery;
|
||||
|
||||
import nu.marginalia.service.ServiceId;
|
||||
import nu.marginalia.service.discovery.monitor.ServiceMonitorIf;
|
||||
import nu.marginalia.service.discovery.property.ServiceKey;
|
||||
import nu.marginalia.service.discovery.property.ServicePartition;
|
||||
import nu.marginalia.service.ServiceId;
|
||||
import nu.marginalia.test.TestApiGrpc;
|
||||
import org.apache.curator.framework.CuratorFrameworkFactory;
|
||||
import org.apache.curator.retry.ExponentialBackoffRetry;
|
||||
@@ -33,7 +33,7 @@ class ZkServiceRegistryTest {
|
||||
@BeforeEach
|
||||
public void setUp() {
|
||||
zookeeper.start();
|
||||
connectString = STR."\{zookeeper.getHost()}:\{zookeeper.getMappedPort(ZOOKEEPER_PORT)}";
|
||||
connectString = zookeeper.getHost() + ":" + zookeeper.getMappedPort(ZOOKEEPER_PORT);
|
||||
}
|
||||
|
||||
@AfterEach
|
||||
|
@@ -9,23 +9,25 @@ import nu.marginalia.executor.storage.FileStorageFile;
|
||||
import nu.marginalia.executor.upload.UploadDirContents;
|
||||
import nu.marginalia.executor.upload.UploadDirItem;
|
||||
import nu.marginalia.functions.execution.api.*;
|
||||
import nu.marginalia.service.ServiceId;
|
||||
import nu.marginalia.service.client.GrpcChannelPoolFactory;
|
||||
import nu.marginalia.service.client.GrpcMultiNodeChannelPool;
|
||||
import nu.marginalia.service.discovery.ServiceRegistryIf;
|
||||
import nu.marginalia.service.discovery.property.ServiceKey;
|
||||
import nu.marginalia.service.discovery.property.ServicePartition;
|
||||
import nu.marginalia.service.ServiceId;
|
||||
import nu.marginalia.storage.model.FileStorage;
|
||||
import nu.marginalia.storage.model.FileStorageId;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.net.*;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URISyntaxException;
|
||||
import java.net.URL;
|
||||
import java.net.URLEncoder;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.List;
|
||||
|
||||
import static nu.marginalia.functions.execution.api.ExecutorApiGrpc.*;
|
||||
import static nu.marginalia.functions.execution.api.ExecutorApiGrpc.ExecutorApiBlockingStub;
|
||||
|
||||
@Singleton
|
||||
public class ExecutorClient {
|
||||
@@ -163,8 +165,8 @@ public class ExecutorClient {
|
||||
* The endpoint is compatible with range requests.
|
||||
* */
|
||||
public URL remoteFileURL(FileStorage fileStorage, String path) {
|
||||
String uriPath = STR."/transfer/file/\{fileStorage.id()}";
|
||||
String uriQuery = STR."path=\{URLEncoder.encode(path, StandardCharsets.UTF_8)}";
|
||||
String uriPath = "/transfer/file/" + fileStorage.id();
|
||||
String uriQuery = "path=" + URLEncoder.encode(path, StandardCharsets.UTF_8);
|
||||
|
||||
var endpoints = registry.getEndpoints(ServiceKey.forRest(ServiceId.Executor, fileStorage.node()));
|
||||
if (endpoints.isEmpty()) {
|
||||
@@ -180,4 +182,10 @@ public class ExecutorClient {
|
||||
}
|
||||
}
|
||||
|
||||
public void restartExecutorService(int node) {
|
||||
channelPool.call(ExecutorApiBlockingStub::restartExecutorService)
|
||||
.forNode(node)
|
||||
.run(Empty.getDefaultInstance());
|
||||
}
|
||||
|
||||
}
|
||||
|
@@ -3,6 +3,7 @@ package nu.marginalia.executor.client;
|
||||
import com.google.inject.Inject;
|
||||
import com.google.inject.Singleton;
|
||||
import nu.marginalia.functions.execution.api.*;
|
||||
import nu.marginalia.mq.persistence.MqPersistence;
|
||||
import nu.marginalia.service.client.GrpcChannelPoolFactory;
|
||||
import nu.marginalia.service.client.GrpcMultiNodeChannelPool;
|
||||
import nu.marginalia.service.discovery.property.ServiceKey;
|
||||
@@ -11,6 +12,8 @@ import nu.marginalia.storage.model.FileStorageId;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.time.Duration;
|
||||
|
||||
import static nu.marginalia.functions.execution.api.ExecutorExportApiGrpc.ExecutorExportApiBlockingStub;
|
||||
|
||||
@Singleton
|
||||
@@ -18,23 +21,33 @@ public class ExecutorExportClient {
|
||||
private final GrpcMultiNodeChannelPool<ExecutorExportApiBlockingStub> channelPool;
|
||||
private static final Logger logger = LoggerFactory.getLogger(ExecutorExportClient.class);
|
||||
|
||||
private final MqPersistence persistence;
|
||||
@Inject
|
||||
public ExecutorExportClient(GrpcChannelPoolFactory grpcChannelPoolFactory)
|
||||
public ExecutorExportClient(GrpcChannelPoolFactory grpcChannelPoolFactory, MqPersistence persistence)
|
||||
{
|
||||
this.channelPool = grpcChannelPoolFactory
|
||||
.createMulti(
|
||||
ServiceKey.forGrpcApi(ExecutorExportApiGrpc.class, ServicePartition.multi()),
|
||||
ExecutorExportApiGrpc::newBlockingStub);
|
||||
this.persistence = persistence;
|
||||
}
|
||||
|
||||
long createTrackingTokenMsg(String task, int node, Duration ttl) throws Exception {
|
||||
return persistence.sendNewMessage("task-tracking[" + node + "]", "export-client", null, task, "", ttl);
|
||||
}
|
||||
|
||||
public long exportAtags(int node, FileStorageId fid) throws Exception {
|
||||
long msgId = createTrackingTokenMsg("atags", node, Duration.ofHours(6));
|
||||
|
||||
public void exportAtags(int node, FileStorageId fid) {
|
||||
channelPool.call(ExecutorExportApiBlockingStub::exportAtags)
|
||||
.forNode(node)
|
||||
.run(RpcFileStorageId.newBuilder()
|
||||
.run(RpcExportRequest.newBuilder()
|
||||
.setFileStorageId(fid.id())
|
||||
.setMsgId(msgId)
|
||||
.build());
|
||||
return msgId;
|
||||
}
|
||||
|
||||
public void exportSampleData(int node, FileStorageId fid, int size, String name) {
|
||||
channelPool.call(ExecutorExportApiBlockingStub::exportSampleData)
|
||||
.forNode(node)
|
||||
@@ -45,20 +58,26 @@ public class ExecutorExportClient {
|
||||
.build());
|
||||
}
|
||||
|
||||
public void exportRssFeeds(int node, FileStorageId fid) {
|
||||
public long exportRssFeeds(int node, FileStorageId fid) throws Exception {
|
||||
long msgId = createTrackingTokenMsg("rss", node, Duration.ofHours(6));
|
||||
channelPool.call(ExecutorExportApiBlockingStub::exportRssFeeds)
|
||||
.forNode(node)
|
||||
.run(RpcFileStorageId.newBuilder()
|
||||
.run(RpcExportRequest.newBuilder()
|
||||
.setFileStorageId(fid.id())
|
||||
.setMsgId(msgId)
|
||||
.build());
|
||||
return msgId;
|
||||
}
|
||||
|
||||
public void exportTermFrequencies(int node, FileStorageId fid) {
|
||||
public long exportTermFrequencies(int node, FileStorageId fid) throws Exception {
|
||||
long msgId = createTrackingTokenMsg("tfreq", node, Duration.ofHours(6));
|
||||
channelPool.call(ExecutorExportApiBlockingStub::exportTermFrequencies)
|
||||
.forNode(node)
|
||||
.run(RpcFileStorageId.newBuilder()
|
||||
.run(RpcExportRequest.newBuilder()
|
||||
.setFileStorageId(fid.id())
|
||||
.setMsgId(msgId)
|
||||
.build());
|
||||
return msgId;
|
||||
}
|
||||
|
||||
public void exportData(int node) {
|
||||
@@ -77,4 +96,21 @@ public class ExecutorExportClient {
|
||||
}
|
||||
|
||||
|
||||
public void exportAllAtags() {
|
||||
channelPool.call(ExecutorExportApiBlockingStub::exportAllAtags)
|
||||
.forNode(1)
|
||||
.run(Empty.getDefaultInstance());
|
||||
}
|
||||
|
||||
public void exportAllFeeds() {
|
||||
channelPool.call(ExecutorExportApiBlockingStub::exportAllFeeds)
|
||||
.forNode(1)
|
||||
.run(Empty.getDefaultInstance());
|
||||
}
|
||||
|
||||
public void exportAllTfreqs() {
|
||||
channelPool.call(ExecutorExportApiBlockingStub::exportAllTfreqs)
|
||||
.forNode(1)
|
||||
.run(Empty.getDefaultInstance());
|
||||
}
|
||||
}
|
||||
|
@@ -17,6 +17,8 @@ service ExecutorApi {
|
||||
rpc downloadSampleData(RpcDownloadSampleData) returns (Empty) {}
|
||||
rpc calculateAdjacencies(Empty) returns (Empty) {}
|
||||
rpc restoreBackup(RpcFileStorageId) returns (Empty) {}
|
||||
|
||||
rpc restartExecutorService(Empty) returns (Empty) {}
|
||||
}
|
||||
|
||||
service ExecutorCrawlApi {
|
||||
@@ -37,15 +39,20 @@ service ExecutorSideloadApi {
|
||||
}
|
||||
|
||||
service ExecutorExportApi {
|
||||
rpc exportAtags(RpcFileStorageId) returns (Empty) {}
|
||||
rpc exportAtags(RpcExportRequest) returns (Empty) {}
|
||||
rpc exportSegmentationModel(RpcExportSegmentationModel) returns (Empty) {}
|
||||
rpc exportSampleData(RpcExportSampleData) returns (Empty) {}
|
||||
rpc exportRssFeeds(RpcFileStorageId) returns (Empty) {}
|
||||
rpc exportTermFrequencies(RpcFileStorageId) returns (Empty) {}
|
||||
rpc exportRssFeeds(RpcExportRequest) returns (Empty) {}
|
||||
rpc exportTermFrequencies(RpcExportRequest) returns (Empty) {}
|
||||
rpc exportData(Empty) returns (Empty) {}
|
||||
|
||||
rpc exportAllAtags(Empty) returns (Empty) {}
|
||||
rpc exportAllFeeds(Empty) returns (Empty) {}
|
||||
rpc exportAllTfreqs(Empty) returns (Empty) {}
|
||||
}
|
||||
|
||||
message Empty {}
|
||||
|
||||
message RpcFsmName {
|
||||
string actorName = 1;
|
||||
}
|
||||
@@ -55,6 +62,10 @@ message RpcProcessId {
|
||||
message RpcFileStorageId {
|
||||
int64 fileStorageId = 1;
|
||||
}
|
||||
message RpcExportRequest {
|
||||
int64 fileStorageId = 1;
|
||||
int64 msgId = 2;
|
||||
}
|
||||
message RpcFileStorageIdWithDomainName {
|
||||
int64 fileStorageId = 1;
|
||||
string targetDomainName = 2;
|
||||
|
@@ -15,15 +15,15 @@ dependencies {
|
||||
// These look weird but they're needed to be able to spawn the processes
|
||||
// from the executor service
|
||||
|
||||
implementation project(':code:processes:website-adjacencies-calculator')
|
||||
implementation project(':code:processes:export-task-process')
|
||||
implementation project(':code:processes:crawling-process')
|
||||
implementation project(':code:processes:live-crawling-process')
|
||||
implementation project(':code:processes:loading-process')
|
||||
implementation project(':code:processes:converting-process')
|
||||
implementation project(':code:processes:index-constructor-process')
|
||||
|
||||
implementation project(':code:common:config')
|
||||
implementation project(':code:common:model')
|
||||
implementation project(':code:common:process')
|
||||
implementation project(':code:common:db')
|
||||
implementation project(':code:common:linkdb')
|
||||
|
||||
@@ -35,13 +35,13 @@ dependencies {
|
||||
implementation project(':code:libraries:term-frequency-dict')
|
||||
|
||||
implementation project(':code:functions:link-graph:api')
|
||||
implementation project(':code:functions:live-capture:api')
|
||||
implementation project(':code:functions:search-query')
|
||||
implementation project(':code:execution:api')
|
||||
|
||||
implementation project(':code:processes:crawling-process:model')
|
||||
implementation project(':code:processes:crawling-process:model')
|
||||
implementation project(':code:processes:crawling-process:ft-link-parser')
|
||||
implementation project(':code:execution:data-extractors')
|
||||
implementation project(':code:index:index-journal')
|
||||
implementation project(':code:index:api')
|
||||
implementation project(':code:processes:process-mq-api')
|
||||
|
@@ -1,7 +0,0 @@
|
||||
Contains converter-*like* extraction jobs that operate on crawled data to produce export files.
|
||||
|
||||
## Important classes
|
||||
|
||||
* [AtagExporter](java/nu/marginalia/extractor/AtagExporter.java) - extracts anchor texts from the crawled data.
|
||||
* [FeedExporter](java/nu/marginalia/extractor/FeedExporter.java) - tries to find RSS/Atom feeds within the crawled data.
|
||||
* [TermFrequencyExporter](java/nu/marginalia/extractor/TermFrequencyExporter.java) - exports the 'TF' part of TF-IDF.
|
@@ -1,28 +1,40 @@
|
||||
package nu.marginalia.actor;
|
||||
|
||||
import nu.marginalia.nodecfg.model.NodeProfile;
|
||||
|
||||
import java.util.Set;
|
||||
|
||||
public enum ExecutorActor {
|
||||
CRAWL,
|
||||
RECRAWL,
|
||||
RECRAWL_SINGLE_DOMAIN,
|
||||
CONVERT_AND_LOAD,
|
||||
PROC_CONVERTER_SPAWNER,
|
||||
PROC_LOADER_SPAWNER,
|
||||
PROC_CRAWLER_SPAWNER,
|
||||
MONITOR_PROCESS_LIVENESS,
|
||||
MONITOR_FILE_STORAGE,
|
||||
ADJACENCY_CALCULATION,
|
||||
CRAWL_JOB_EXTRACTOR,
|
||||
EXPORT_DATA,
|
||||
EXPORT_SEGMENTATION_MODEL,
|
||||
EXPORT_ATAGS,
|
||||
EXPORT_TERM_FREQUENCIES,
|
||||
EXPORT_FEEDS,
|
||||
PROC_INDEX_CONSTRUCTOR_SPAWNER,
|
||||
CONVERT,
|
||||
RESTORE_BACKUP,
|
||||
EXPORT_SAMPLE_DATA,
|
||||
DOWNLOAD_SAMPLE,
|
||||
SCRAPE_FEEDS;
|
||||
PREC_EXPORT_ALL(NodeProfile.BATCH_CRAWL, NodeProfile.MIXED),
|
||||
|
||||
CRAWL(NodeProfile.BATCH_CRAWL, NodeProfile.MIXED),
|
||||
RECRAWL(NodeProfile.BATCH_CRAWL, NodeProfile.MIXED),
|
||||
RECRAWL_SINGLE_DOMAIN(NodeProfile.BATCH_CRAWL, NodeProfile.MIXED),
|
||||
PROC_CRAWLER_SPAWNER(NodeProfile.BATCH_CRAWL, NodeProfile.MIXED),
|
||||
PROC_EXPORT_TASKS_SPAWNER(NodeProfile.BATCH_CRAWL, NodeProfile.MIXED),
|
||||
ADJACENCY_CALCULATION(NodeProfile.BATCH_CRAWL, NodeProfile.MIXED),
|
||||
EXPORT_DATA(NodeProfile.BATCH_CRAWL, NodeProfile.MIXED),
|
||||
EXPORT_SEGMENTATION_MODEL(NodeProfile.BATCH_CRAWL, NodeProfile.MIXED),
|
||||
EXPORT_ATAGS(NodeProfile.BATCH_CRAWL, NodeProfile.MIXED),
|
||||
EXPORT_TERM_FREQUENCIES(NodeProfile.BATCH_CRAWL, NodeProfile.MIXED),
|
||||
EXPORT_FEEDS(NodeProfile.BATCH_CRAWL, NodeProfile.MIXED),
|
||||
EXPORT_SAMPLE_DATA(NodeProfile.BATCH_CRAWL, NodeProfile.MIXED),
|
||||
DOWNLOAD_SAMPLE(NodeProfile.BATCH_CRAWL, NodeProfile.MIXED),
|
||||
|
||||
PROC_CONVERTER_SPAWNER(NodeProfile.BATCH_CRAWL, NodeProfile.MIXED, NodeProfile.SIDELOAD),
|
||||
PROC_LOADER_SPAWNER(NodeProfile.BATCH_CRAWL, NodeProfile.MIXED, NodeProfile.SIDELOAD),
|
||||
RESTORE_BACKUP(NodeProfile.BATCH_CRAWL, NodeProfile.MIXED, NodeProfile.SIDELOAD),
|
||||
CONVERT(NodeProfile.BATCH_CRAWL, NodeProfile.MIXED, NodeProfile.SIDELOAD),
|
||||
|
||||
CONVERT_AND_LOAD(NodeProfile.BATCH_CRAWL, NodeProfile.MIXED, NodeProfile.REALTIME, NodeProfile.SIDELOAD),
|
||||
MONITOR_PROCESS_LIVENESS(NodeProfile.BATCH_CRAWL, NodeProfile.REALTIME, NodeProfile.MIXED, NodeProfile.SIDELOAD),
|
||||
MONITOR_FILE_STORAGE(NodeProfile.BATCH_CRAWL, NodeProfile.REALTIME, NodeProfile.MIXED, NodeProfile.SIDELOAD),
|
||||
PROC_INDEX_CONSTRUCTOR_SPAWNER(NodeProfile.BATCH_CRAWL, NodeProfile.REALTIME, NodeProfile.MIXED, NodeProfile.SIDELOAD),
|
||||
|
||||
LIVE_CRAWL(NodeProfile.REALTIME),
|
||||
PROC_LIVE_CRAWL_SPAWNER(NodeProfile.REALTIME),
|
||||
SCRAPE_FEEDS(NodeProfile.REALTIME),
|
||||
UPDATE_RSS(NodeProfile.REALTIME);
|
||||
|
||||
public String id() {
|
||||
return "fsm:" + name().toLowerCase();
|
||||
@@ -32,4 +44,9 @@ public enum ExecutorActor {
|
||||
return "fsm:" + name().toLowerCase() + ":" + node;
|
||||
}
|
||||
|
||||
ExecutorActor(NodeProfile... profileSet) {
|
||||
this.profileSet = Set.of(profileSet);
|
||||
}
|
||||
|
||||
public Set<NodeProfile> profileSet;
|
||||
}
|
||||
|
@@ -2,8 +2,8 @@ package nu.marginalia.actor;
|
||||
|
||||
import com.google.inject.Inject;
|
||||
import com.google.inject.Singleton;
|
||||
import lombok.SneakyThrows;
|
||||
import nu.marginalia.actor.monitor.FileStorageMonitorActor;
|
||||
import nu.marginalia.actor.precession.ExportAllPrecessionActor;
|
||||
import nu.marginalia.actor.proc.*;
|
||||
import nu.marginalia.actor.prototype.ActorPrototype;
|
||||
import nu.marginalia.actor.prototype.RecordActorPrototype;
|
||||
@@ -11,9 +11,15 @@ import nu.marginalia.actor.state.ActorStateInstance;
|
||||
import nu.marginalia.actor.state.ActorStep;
|
||||
import nu.marginalia.actor.task.*;
|
||||
import nu.marginalia.mq.MessageQueueFactory;
|
||||
import nu.marginalia.nodecfg.NodeConfigurationService;
|
||||
import nu.marginalia.nodecfg.model.NodeConfiguration;
|
||||
import nu.marginalia.service.control.ServiceEventLog;
|
||||
import nu.marginalia.service.module.ServiceConfiguration;
|
||||
import nu.marginalia.service.server.BaseServiceParams;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.sql.SQLException;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.UUID;
|
||||
@@ -27,16 +33,24 @@ public class ExecutorActorControlService {
|
||||
public Map<ExecutorActor, ActorPrototype> actorDefinitions = new HashMap<>();
|
||||
private final int node;
|
||||
|
||||
private final NodeConfiguration nodeConfiguration;
|
||||
|
||||
private final Logger logger = LoggerFactory.getLogger(getClass());
|
||||
|
||||
@Inject
|
||||
public ExecutorActorControlService(MessageQueueFactory messageQueueFactory,
|
||||
ServiceConfiguration serviceConfiguration,
|
||||
NodeConfigurationService configurationService,
|
||||
BaseServiceParams baseServiceParams,
|
||||
ConvertActor convertActor,
|
||||
ConvertAndLoadActor convertAndLoadActor,
|
||||
CrawlActor crawlActor,
|
||||
LiveCrawlActor liveCrawlActor,
|
||||
RecrawlSingleDomainActor recrawlSingleDomainActor,
|
||||
RestoreBackupActor restoreBackupActor,
|
||||
ConverterMonitorActor converterMonitorFSM,
|
||||
CrawlerMonitorActor crawlerMonitorActor,
|
||||
LiveCrawlerMonitorActor liveCrawlerMonitorActor,
|
||||
LoaderMonitorActor loaderMonitor,
|
||||
ProcessLivenessMonitorActor processMonitorFSM,
|
||||
FileStorageMonitorActor fileStorageMonitorActor,
|
||||
@@ -48,15 +62,21 @@ public class ExecutorActorControlService {
|
||||
ExportSampleDataActor exportSampleDataActor,
|
||||
ExportTermFreqActor exportTermFrequenciesActor,
|
||||
ExportSegmentationModelActor exportSegmentationModelActor,
|
||||
ExportTaskMonitorActor exportTasksMonitorActor,
|
||||
DownloadSampleActor downloadSampleActor,
|
||||
ScrapeFeedsActor scrapeFeedsActor,
|
||||
ExecutorActorStateMachines stateMachines) {
|
||||
ExecutorActorStateMachines stateMachines,
|
||||
ExportAllPrecessionActor exportAllPrecessionActor,
|
||||
UpdateRssActor updateRssActor) throws SQLException {
|
||||
this.messageQueueFactory = messageQueueFactory;
|
||||
this.eventLog = baseServiceParams.eventLog;
|
||||
this.stateMachines = stateMachines;
|
||||
this.node = baseServiceParams.configuration.node();
|
||||
|
||||
this.nodeConfiguration = configurationService.get(node);
|
||||
|
||||
register(ExecutorActor.CRAWL, crawlActor);
|
||||
register(ExecutorActor.LIVE_CRAWL, liveCrawlActor);
|
||||
register(ExecutorActor.RECRAWL_SINGLE_DOMAIN, recrawlSingleDomainActor);
|
||||
|
||||
register(ExecutorActor.CONVERT, convertActor);
|
||||
@@ -67,6 +87,8 @@ public class ExecutorActorControlService {
|
||||
register(ExecutorActor.PROC_CONVERTER_SPAWNER, converterMonitorFSM);
|
||||
register(ExecutorActor.PROC_LOADER_SPAWNER, loaderMonitor);
|
||||
register(ExecutorActor.PROC_CRAWLER_SPAWNER, crawlerMonitorActor);
|
||||
register(ExecutorActor.PROC_LIVE_CRAWL_SPAWNER, liveCrawlerMonitorActor);
|
||||
register(ExecutorActor.PROC_EXPORT_TASKS_SPAWNER, exportTasksMonitorActor);
|
||||
|
||||
register(ExecutorActor.MONITOR_PROCESS_LIVENESS, processMonitorFSM);
|
||||
register(ExecutorActor.MONITOR_FILE_STORAGE, fileStorageMonitorActor);
|
||||
@@ -83,9 +105,19 @@ public class ExecutorActorControlService {
|
||||
register(ExecutorActor.DOWNLOAD_SAMPLE, downloadSampleActor);
|
||||
|
||||
register(ExecutorActor.SCRAPE_FEEDS, scrapeFeedsActor);
|
||||
register(ExecutorActor.UPDATE_RSS, updateRssActor);
|
||||
|
||||
if (serviceConfiguration.node() == 1) {
|
||||
register(ExecutorActor.PREC_EXPORT_ALL, exportAllPrecessionActor);
|
||||
}
|
||||
}
|
||||
|
||||
private void register(ExecutorActor process, RecordActorPrototype graph) {
|
||||
|
||||
if (!process.profileSet.contains(nodeConfiguration.profile())) {
|
||||
return;
|
||||
}
|
||||
|
||||
var sm = new ActorStateMachine(messageQueueFactory, process.id(), node, UUID.randomUUID(), graph);
|
||||
sm.listen((function, param) -> logStateChange(process, function));
|
||||
|
||||
@@ -117,11 +149,15 @@ public class ExecutorActorControlService {
|
||||
stateMachines.startFromJSON(process, state, json);
|
||||
}
|
||||
|
||||
@SneakyThrows
|
||||
public void stop(ExecutorActor process) {
|
||||
eventLog.logEvent("FSM-STOP", process.id());
|
||||
|
||||
stateMachines.stop(process);
|
||||
try {
|
||||
stateMachines.stop(process);
|
||||
}
|
||||
catch (Exception e) {
|
||||
logger.error("Failed to stop FSM", e);
|
||||
}
|
||||
}
|
||||
|
||||
public Map<ExecutorActor, ActorStateInstance> getActorStates() {
|
||||
|
@@ -0,0 +1,116 @@
|
||||
package nu.marginalia.actor.precession;
|
||||
|
||||
import com.google.gson.Gson;
|
||||
import com.google.inject.Inject;
|
||||
import nu.marginalia.actor.prototype.RecordActorPrototype;
|
||||
import nu.marginalia.actor.state.ActorStep;
|
||||
import nu.marginalia.executor.client.ExecutorExportClient;
|
||||
import nu.marginalia.mq.MqMessageState;
|
||||
import nu.marginalia.mq.persistence.MqPersistence;
|
||||
import nu.marginalia.nodecfg.NodeConfigurationService;
|
||||
import nu.marginalia.nodecfg.model.NodeConfiguration;
|
||||
import nu.marginalia.storage.FileStorageService;
|
||||
import nu.marginalia.storage.model.FileStorageType;
|
||||
|
||||
import java.time.Duration;
|
||||
import java.util.Comparator;
|
||||
import java.util.Optional;
|
||||
|
||||
public class ExportAllPrecessionActor extends RecordActorPrototype {
|
||||
|
||||
private final NodeConfigurationService nodeConfigurationService;
|
||||
private final ExecutorExportClient exportClient;
|
||||
private final FileStorageService fileStorageService;
|
||||
private final MqPersistence persistence;
|
||||
|
||||
@Inject
|
||||
public ExportAllPrecessionActor(Gson gson,
|
||||
NodeConfigurationService nodeConfigurationService,
|
||||
ExecutorExportClient exportClient,
|
||||
FileStorageService fileStorageService,
|
||||
MqPersistence persistence)
|
||||
{
|
||||
super(gson);
|
||||
this.nodeConfigurationService = nodeConfigurationService;
|
||||
this.exportClient = exportClient;
|
||||
this.fileStorageService = fileStorageService;
|
||||
this.persistence = persistence;
|
||||
}
|
||||
|
||||
public enum ExportTask {
|
||||
FEEDS,
|
||||
ATAGS,
|
||||
TFREQ
|
||||
}
|
||||
|
||||
public record Initial(ExportTask task) implements ActorStep {}
|
||||
public record Export(int nodeId, ExportTask task, long msgId) implements ActorStep {
|
||||
public Export(int nodeId, ExportTask task) {
|
||||
this(nodeId, task, -1);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public ActorStep transition(ActorStep self) throws Exception {
|
||||
return switch (self) {
|
||||
case Initial(ExportTask task) -> {
|
||||
var firstNode = nextNodeId(-1);
|
||||
if (firstNode.isEmpty())
|
||||
yield new Error("No nodes included in precession");
|
||||
else
|
||||
yield new Export(firstNode.get(), task);
|
||||
}
|
||||
|
||||
case Export(int nodeId, ExportTask task, long msgId) when msgId < 0 -> {
|
||||
var activeStorages = fileStorageService.getActiveFileStorages(nodeId, FileStorageType.CRAWL_DATA);
|
||||
if (activeStorages.isEmpty()) {
|
||||
yield new Error("Node " + nodeId + " has no active file storage");
|
||||
}
|
||||
var activeCrawlStorageId = activeStorages.getFirst();
|
||||
|
||||
long trackingMsgId = switch(task) {
|
||||
case ATAGS -> exportClient.exportAtags(nodeId, activeCrawlStorageId);
|
||||
case TFREQ -> exportClient.exportTermFrequencies(nodeId, activeCrawlStorageId);
|
||||
case FEEDS -> exportClient.exportRssFeeds(nodeId, activeCrawlStorageId);
|
||||
};
|
||||
|
||||
yield new Export(nodeId, task, trackingMsgId);
|
||||
}
|
||||
|
||||
case Export(int nodeId, ExportTask task, long msgId) -> {
|
||||
for (; ; ) {
|
||||
var msg = persistence.getMessage(msgId);
|
||||
if (!msg.state().isTerminal()) {
|
||||
Thread.sleep(Duration.ofSeconds(30));
|
||||
continue;
|
||||
}
|
||||
if (msg.state() == MqMessageState.OK) {
|
||||
var nextNode = nextNodeId(nodeId);
|
||||
if (nextNode.isEmpty()) {
|
||||
yield new End();
|
||||
} else {
|
||||
yield new Export(nextNode.get(), task);
|
||||
}
|
||||
} else {
|
||||
yield new Error("Export failed for node " + nodeId);
|
||||
}
|
||||
}
|
||||
}
|
||||
default -> new Error("Unknown state");
|
||||
};
|
||||
}
|
||||
|
||||
private Optional<Integer> nextNodeId(int currentNodeId) {
|
||||
return nodeConfigurationService.getAll()
|
||||
.stream().sorted(Comparator.comparing(NodeConfiguration::node))
|
||||
.filter(node -> node.node() > currentNodeId)
|
||||
.filter(NodeConfiguration::includeInPrecession)
|
||||
.map(NodeConfiguration::node)
|
||||
.findFirst();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String describe() {
|
||||
return "Runs an export job on each index node included in the precession";
|
||||
}
|
||||
}
|
@@ -0,0 +1,29 @@
|
||||
package nu.marginalia.actor.proc;
|
||||
|
||||
import com.google.gson.Gson;
|
||||
import com.google.inject.Inject;
|
||||
import com.google.inject.Singleton;
|
||||
import nu.marginalia.actor.monitor.AbstractProcessSpawnerActor;
|
||||
import nu.marginalia.mq.persistence.MqPersistence;
|
||||
import nu.marginalia.mqapi.ProcessInboxNames;
|
||||
import nu.marginalia.process.ProcessService;
|
||||
import nu.marginalia.service.module.ServiceConfiguration;
|
||||
|
||||
@Singleton
|
||||
public class ExportTaskMonitorActor extends AbstractProcessSpawnerActor {
|
||||
|
||||
@Inject
|
||||
public ExportTaskMonitorActor(Gson gson,
|
||||
ServiceConfiguration configuration,
|
||||
MqPersistence persistence,
|
||||
ProcessService processService) {
|
||||
super(gson,
|
||||
configuration,
|
||||
persistence,
|
||||
processService,
|
||||
ProcessInboxNames.EXPORT_TASK_INBOX,
|
||||
ProcessService.ProcessId.EXPORT_TASKS);
|
||||
}
|
||||
|
||||
|
||||
}
|
@@ -0,0 +1,29 @@
|
||||
package nu.marginalia.actor.proc;
|
||||
|
||||
import com.google.gson.Gson;
|
||||
import com.google.inject.Inject;
|
||||
import com.google.inject.Singleton;
|
||||
import nu.marginalia.actor.monitor.AbstractProcessSpawnerActor;
|
||||
import nu.marginalia.mq.persistence.MqPersistence;
|
||||
import nu.marginalia.mqapi.ProcessInboxNames;
|
||||
import nu.marginalia.process.ProcessService;
|
||||
import nu.marginalia.service.module.ServiceConfiguration;
|
||||
|
||||
@Singleton
|
||||
public class LiveCrawlerMonitorActor extends AbstractProcessSpawnerActor {
|
||||
|
||||
@Inject
|
||||
public LiveCrawlerMonitorActor(Gson gson,
|
||||
ServiceConfiguration configuration,
|
||||
MqPersistence persistence,
|
||||
ProcessService processService) {
|
||||
super(gson,
|
||||
configuration,
|
||||
persistence,
|
||||
processService,
|
||||
ProcessInboxNames.LIVE_CRAWLER_INBOX,
|
||||
ProcessService.ProcessId.LIVE_CRAWLER);
|
||||
}
|
||||
|
||||
|
||||
}
|
@@ -10,6 +10,8 @@ import nu.marginalia.actor.state.ActorResumeBehavior;
|
||||
import nu.marginalia.actor.state.ActorStep;
|
||||
import nu.marginalia.actor.state.Resume;
|
||||
import nu.marginalia.model.EdgeDomain;
|
||||
import nu.marginalia.nodecfg.NodeConfigurationService;
|
||||
import nu.marginalia.nodecfg.model.NodeProfile;
|
||||
import nu.marginalia.service.control.ServiceEventLog;
|
||||
import nu.marginalia.service.module.ServiceConfiguration;
|
||||
import org.jsoup.Jsoup;
|
||||
@@ -39,6 +41,7 @@ public class ScrapeFeedsActor extends RecordActorPrototype {
|
||||
private final Duration pollInterval = Duration.ofHours(6);
|
||||
|
||||
private final ServiceEventLog eventLog;
|
||||
private final NodeConfigurationService nodeConfigurationService;
|
||||
private final HikariDataSource dataSource;
|
||||
private final int nodeId;
|
||||
|
||||
@@ -54,8 +57,8 @@ public class ScrapeFeedsActor extends RecordActorPrototype {
|
||||
public ActorStep transition(ActorStep self) throws Exception {
|
||||
return switch(self) {
|
||||
case Initial() -> {
|
||||
if (nodeId > 1) {
|
||||
yield new End();
|
||||
if (nodeConfigurationService.get(nodeId).profile() != NodeProfile.REALTIME) {
|
||||
yield new Error("Invalid node profile for RSS update");
|
||||
}
|
||||
else {
|
||||
yield new Wait(LocalDateTime.now().toString());
|
||||
@@ -177,10 +180,12 @@ public class ScrapeFeedsActor extends RecordActorPrototype {
|
||||
public ScrapeFeedsActor(Gson gson,
|
||||
ServiceEventLog eventLog,
|
||||
ServiceConfiguration configuration,
|
||||
NodeConfigurationService nodeConfigurationService,
|
||||
HikariDataSource dataSource)
|
||||
{
|
||||
super(gson);
|
||||
this.eventLog = eventLog;
|
||||
this.nodeConfigurationService = nodeConfigurationService;
|
||||
this.dataSource = dataSource;
|
||||
this.nodeId = configuration.node();
|
||||
}
|
||||
|
141
code/execution/java/nu/marginalia/actor/proc/UpdateRssActor.java
Normal file
141
code/execution/java/nu/marginalia/actor/proc/UpdateRssActor.java
Normal file
@@ -0,0 +1,141 @@
|
||||
package nu.marginalia.actor.proc;
|
||||
|
||||
import com.google.gson.Gson;
|
||||
import com.google.inject.Inject;
|
||||
import nu.marginalia.actor.prototype.RecordActorPrototype;
|
||||
import nu.marginalia.actor.state.ActorResumeBehavior;
|
||||
import nu.marginalia.actor.state.ActorStep;
|
||||
import nu.marginalia.actor.state.Resume;
|
||||
import nu.marginalia.api.feeds.FeedsClient;
|
||||
import nu.marginalia.api.feeds.RpcFeedUpdateMode;
|
||||
import nu.marginalia.mq.MqMessage;
|
||||
import nu.marginalia.mq.MqMessageState;
|
||||
import nu.marginalia.mq.persistence.MqPersistence;
|
||||
import nu.marginalia.nodecfg.NodeConfigurationService;
|
||||
import nu.marginalia.nodecfg.model.NodeProfile;
|
||||
import nu.marginalia.service.module.ServiceConfiguration;
|
||||
|
||||
import java.time.Duration;
|
||||
import java.time.LocalDateTime;
|
||||
|
||||
public class UpdateRssActor extends RecordActorPrototype {
|
||||
|
||||
private final FeedsClient feedsClient;
|
||||
private final int nodeId;
|
||||
|
||||
private final Duration initialDelay = Duration.ofMinutes(5);
|
||||
private final Duration updateInterval = Duration.ofHours(24);
|
||||
private final int cleanInterval = 60;
|
||||
|
||||
private final NodeConfigurationService nodeConfigurationService;
|
||||
private final MqPersistence persistence;
|
||||
|
||||
@Inject
|
||||
public UpdateRssActor(Gson gson,
|
||||
FeedsClient feedsClient,
|
||||
ServiceConfiguration serviceConfiguration,
|
||||
NodeConfigurationService nodeConfigurationService,
|
||||
MqPersistence persistence) {
|
||||
super(gson);
|
||||
this.feedsClient = feedsClient;
|
||||
this.nodeId = serviceConfiguration.node();
|
||||
this.nodeConfigurationService = nodeConfigurationService;
|
||||
this.persistence = persistence;
|
||||
}
|
||||
|
||||
public record Initial() implements ActorStep {}
|
||||
@Resume(behavior = ActorResumeBehavior.RETRY)
|
||||
public record Wait(String ts, int refreshCount) implements ActorStep {}
|
||||
@Resume(behavior = ActorResumeBehavior.RETRY)
|
||||
public record UpdateRefresh(int refreshCount, long msgId) implements ActorStep {
|
||||
public UpdateRefresh(int refreshCount) {
|
||||
this(refreshCount, -1);
|
||||
}
|
||||
}
|
||||
@Resume(behavior = ActorResumeBehavior.RETRY)
|
||||
public record UpdateClean(long msgId) implements ActorStep {
|
||||
public UpdateClean() {
|
||||
this(-1);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public ActorStep transition(ActorStep self) throws Exception {
|
||||
return switch (self) {
|
||||
case Initial() -> {
|
||||
if (nodeConfigurationService.get(nodeId).profile() != NodeProfile.REALTIME) {
|
||||
yield new Error("Invalid node profile for RSS update");
|
||||
}
|
||||
else {
|
||||
// Wait for 5 minutes before starting the first update, to give the system time to start up properly
|
||||
yield new Wait(LocalDateTime.now().plus(initialDelay).toString(), 0);
|
||||
}
|
||||
}
|
||||
case Wait(String untilTs, int count) -> {
|
||||
var until = LocalDateTime.parse(untilTs);
|
||||
var now = LocalDateTime.now();
|
||||
|
||||
long remaining = Duration.between(now, until).toMillis();
|
||||
|
||||
if (remaining > 0) {
|
||||
Thread.sleep(remaining);
|
||||
yield new Wait(untilTs, count);
|
||||
}
|
||||
else {
|
||||
|
||||
// Once every `cleanInterval` updates, do a clean update;
|
||||
// otherwise do a refresh update
|
||||
if (count > cleanInterval) {
|
||||
yield new UpdateClean();
|
||||
}
|
||||
else {
|
||||
yield new UpdateRefresh(count);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
case UpdateRefresh(int count, long msgId) when msgId < 0 -> {
|
||||
long messageId = feedsClient.updateFeeds(RpcFeedUpdateMode.REFRESH);
|
||||
yield new UpdateRefresh(count, messageId);
|
||||
}
|
||||
case UpdateRefresh(int count, long msgId) -> {
|
||||
MqMessage msg = persistence.waitForMessageTerminalState(msgId, Duration.ofSeconds(10), Duration.ofHours(12));
|
||||
if (msg == null) {
|
||||
// Retry the update
|
||||
yield new Error("Failed to update feeds: message not found");
|
||||
} else if (msg.state() != MqMessageState.OK) {
|
||||
// Retry the update
|
||||
yield new Error("Failed to update feeds: " + msg.state());
|
||||
}
|
||||
else {
|
||||
// Increment the refresh count
|
||||
yield new Wait(LocalDateTime.now().plus(updateInterval).toString(), count + 1);
|
||||
}
|
||||
}
|
||||
case UpdateClean(long msgId) when msgId < 0 -> {
|
||||
long messageId = feedsClient.updateFeeds(RpcFeedUpdateMode.CLEAN);
|
||||
yield new UpdateClean(messageId);
|
||||
}
|
||||
case UpdateClean(long msgId) -> {
|
||||
MqMessage msg = persistence.waitForMessageTerminalState(msgId, Duration.ofSeconds(10), Duration.ofHours(12));
|
||||
if (msg == null) {
|
||||
// Retry the update
|
||||
yield new Error("Failed to update feeds: message not found");
|
||||
} else if (msg.state() != MqMessageState.OK) {
|
||||
// Retry the update
|
||||
yield new Error("Failed to update feeds: " + msg.state());
|
||||
}
|
||||
else {
|
||||
// Reset the refresh count after a successful update
|
||||
yield new Wait(LocalDateTime.now().plus(updateInterval).toString(), 0);
|
||||
}
|
||||
}
|
||||
default -> new Error("Unknown actor step: " + self);
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public String describe() {
|
||||
return "Periodically updates RSS and Atom feeds";
|
||||
}
|
||||
}
|
@@ -8,6 +8,9 @@ import nu.marginalia.actor.state.ActorResumeBehavior;
|
||||
import nu.marginalia.actor.state.ActorStep;
|
||||
import nu.marginalia.actor.state.Resume;
|
||||
import nu.marginalia.encyclopedia.EncyclopediaConverter;
|
||||
import nu.marginalia.mq.MqMessageState;
|
||||
import nu.marginalia.mq.outbox.MqOutbox;
|
||||
import nu.marginalia.mqapi.converting.ConvertRequest;
|
||||
import nu.marginalia.process.ProcessOutboxes;
|
||||
import nu.marginalia.process.ProcessService;
|
||||
import nu.marginalia.sideload.RedditSideloadHelper;
|
||||
@@ -17,9 +20,6 @@ import nu.marginalia.storage.FileStorageService;
|
||||
import nu.marginalia.storage.model.FileStorageId;
|
||||
import nu.marginalia.storage.model.FileStorageState;
|
||||
import nu.marginalia.storage.model.FileStorageType;
|
||||
import nu.marginalia.mq.MqMessageState;
|
||||
import nu.marginalia.mq.outbox.MqOutbox;
|
||||
import nu.marginalia.mqapi.converting.ConvertRequest;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
@@ -140,7 +140,7 @@ public class ConvertActor extends RecordActorPrototype {
|
||||
// To avoid re-converting the same file, we'll assign the file a name based on its hash
|
||||
// and the original filename. This way, if we're fed the same file again, we'll be able to just
|
||||
// re-use the predigested database file.
|
||||
yield new PredigestEncyclopedia(source, STR."\{source}.\{hash}.db", baseUrl);
|
||||
yield new PredigestEncyclopedia(source, source + "." + hash + ".db", baseUrl);
|
||||
} else if (!source.endsWith(".db")) {
|
||||
yield new Error("Source path must be a ZIM or pre-digested sqlite database file (.db)");
|
||||
}
|
||||
|
@@ -3,9 +3,6 @@ package nu.marginalia.actor.task;
|
||||
import com.google.gson.Gson;
|
||||
import com.google.inject.Inject;
|
||||
import com.google.inject.Singleton;
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.NoArgsConstructor;
|
||||
import lombok.With;
|
||||
import nu.marginalia.IndexLocations;
|
||||
import nu.marginalia.actor.prototype.RecordActorPrototype;
|
||||
import nu.marginalia.actor.state.ActorResumeBehavior;
|
||||
@@ -40,7 +37,6 @@ import java.util.List;
|
||||
public class ConvertAndLoadActor extends RecordActorPrototype {
|
||||
|
||||
// STATES
|
||||
|
||||
public static final String RERANK = "RERANK";
|
||||
private final ActorProcessWatcher processWatcher;
|
||||
private final MqOutbox mqConverterOutbox;
|
||||
@@ -54,15 +50,6 @@ public class ConvertAndLoadActor extends RecordActorPrototype {
|
||||
private final int nodeId;
|
||||
private final Logger logger = LoggerFactory.getLogger(getClass());
|
||||
|
||||
|
||||
@AllArgsConstructor @With @NoArgsConstructor
|
||||
public static class Message {
|
||||
public FileStorageId crawlStorageId = null;
|
||||
public List<FileStorageId> processedStorageId = null;
|
||||
public long converterMsgId = 0L;
|
||||
public long loaderMsgId = 0L;
|
||||
}
|
||||
|
||||
public record Initial(FileStorageId fid) implements ActorStep {}
|
||||
|
||||
@Resume(behavior = ActorResumeBehavior.RETRY)
|
||||
|
@@ -3,50 +3,78 @@ package nu.marginalia.actor.task;
|
||||
import com.google.gson.Gson;
|
||||
import com.google.inject.Inject;
|
||||
import com.google.inject.Singleton;
|
||||
import nu.marginalia.extractor.AtagExporter;
|
||||
import nu.marginalia.extractor.ExporterIf;
|
||||
import nu.marginalia.storage.model.*;
|
||||
import nu.marginalia.actor.prototype.RecordActorPrototype;
|
||||
import nu.marginalia.actor.state.ActorStep;
|
||||
import nu.marginalia.mq.MqMessageState;
|
||||
import nu.marginalia.mq.outbox.MqOutbox;
|
||||
import nu.marginalia.mq.persistence.MqPersistence;
|
||||
import nu.marginalia.mqapi.tasks.ExportTaskRequest;
|
||||
import nu.marginalia.process.ProcessOutboxes;
|
||||
import nu.marginalia.process.ProcessService;
|
||||
import nu.marginalia.storage.FileStorageService;
|
||||
import nu.marginalia.storage.model.FileStorageId;
|
||||
import nu.marginalia.storage.model.FileStorageState;
|
||||
import nu.marginalia.storage.model.FileStorageType;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.time.LocalDateTime;
|
||||
|
||||
@Singleton
|
||||
public class ExportAtagsActor extends RecordActorPrototype {
|
||||
private final FileStorageService storageService;
|
||||
private final ExporterIf atagExporter;
|
||||
private final ActorProcessWatcher processWatcher;
|
||||
private final MqOutbox exportTasksOutbox;
|
||||
private final Logger logger = LoggerFactory.getLogger(getClass());
|
||||
private final MqPersistence persistence;
|
||||
|
||||
public record Export(long responseMsgId, FileStorageId crawlId) implements ActorStep {}
|
||||
public record Run(long responseMsgId,FileStorageId crawlId, FileStorageId destId, long msgId) implements ActorStep {
|
||||
public Run(long responseMsgId, FileStorageId crawlId, FileStorageId destId) {
|
||||
this(responseMsgId, crawlId, destId, -1);
|
||||
}
|
||||
}
|
||||
public record Fail(long responseMsgId, String message) implements ActorStep {}
|
||||
|
||||
public record Export(FileStorageId crawlId) implements ActorStep {}
|
||||
public record Run(FileStorageId crawlId, FileStorageId destId) implements ActorStep {}
|
||||
@Override
|
||||
public ActorStep transition(ActorStep self) throws Exception {
|
||||
return switch(self) {
|
||||
case Export(FileStorageId crawlId) -> {
|
||||
var storage = storageService.allocateStorage(FileStorageType.EXPORT, "atag-export", "Anchor Tags " + LocalDateTime.now());
|
||||
case Export(long responseMsgId, FileStorageId crawlId) -> {
|
||||
persistence.updateMessageState(responseMsgId, MqMessageState.ACK);
|
||||
|
||||
if (storage == null) yield new Error("Bad storage id");
|
||||
yield new Run(crawlId, storage.id());
|
||||
var storage = storageService.allocateStorage(FileStorageType.EXPORT, "atags-export", "Atags " + LocalDateTime.now());
|
||||
|
||||
if (storage == null) yield new Fail(responseMsgId, "Bad storage id");
|
||||
|
||||
yield new Run(responseMsgId, crawlId, storage.id());
|
||||
}
|
||||
case Run(FileStorageId crawlId, FileStorageId destId) -> {
|
||||
case Run(long responseMsgId, FileStorageId crawlId, FileStorageId destId, long msgId) when msgId < 0 -> {
|
||||
storageService.setFileStorageState(destId, FileStorageState.NEW);
|
||||
|
||||
try {
|
||||
atagExporter.export(crawlId, destId);
|
||||
storageService.setFileStorageState(destId, FileStorageState.UNSET);
|
||||
}
|
||||
catch (Exception ex) {
|
||||
storageService.setFileStorageState(destId, FileStorageState.DELETE);
|
||||
yield new Error("Failed to export data");
|
||||
}
|
||||
long newMsgId = exportTasksOutbox.sendAsync(ExportTaskRequest.atags(crawlId, destId));
|
||||
yield new Run(responseMsgId, crawlId, destId, newMsgId);
|
||||
}
|
||||
case Run(long responseMsgId, FileStorageId crawlId, FileStorageId destId, long msgId) -> {
|
||||
var rsp = processWatcher.waitResponse(exportTasksOutbox, ProcessService.ProcessId.EXPORT_TASKS, msgId);
|
||||
|
||||
yield new End();
|
||||
if (rsp.state() != MqMessageState.OK) {
|
||||
storageService.flagFileForDeletion(destId);
|
||||
yield new Fail(responseMsgId, "Exporter failed");
|
||||
}
|
||||
else {
|
||||
storageService.setFileStorageState(destId, FileStorageState.UNSET);
|
||||
persistence.updateMessageState(responseMsgId, MqMessageState.OK);
|
||||
yield new End();
|
||||
}
|
||||
}
|
||||
case Fail(long responseMsgId, String message) -> {
|
||||
persistence.updateMessageState(responseMsgId, MqMessageState.ERR);
|
||||
yield new Error(message);
|
||||
}
|
||||
default -> new Error();
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public String describe() {
|
||||
return "Export anchor tags from crawl data";
|
||||
@@ -55,11 +83,15 @@ public class ExportAtagsActor extends RecordActorPrototype {
|
||||
@Inject
|
||||
public ExportAtagsActor(Gson gson,
|
||||
FileStorageService storageService,
|
||||
AtagExporter atagExporter)
|
||||
ProcessOutboxes processOutboxes,
|
||||
MqPersistence persistence,
|
||||
ActorProcessWatcher processWatcher)
|
||||
{
|
||||
super(gson);
|
||||
this.exportTasksOutbox = processOutboxes.getExportTasksOutbox();
|
||||
this.storageService = storageService;
|
||||
this.atagExporter = atagExporter;
|
||||
this.persistence = persistence;
|
||||
this.processWatcher = processWatcher;
|
||||
}
|
||||
|
||||
}
|
||||
|
@@ -5,8 +5,12 @@ import com.google.inject.Inject;
|
||||
import com.google.inject.Singleton;
|
||||
import nu.marginalia.actor.prototype.RecordActorPrototype;
|
||||
import nu.marginalia.actor.state.ActorStep;
|
||||
import nu.marginalia.extractor.ExporterIf;
|
||||
import nu.marginalia.extractor.FeedExporter;
|
||||
import nu.marginalia.mq.MqMessageState;
|
||||
import nu.marginalia.mq.outbox.MqOutbox;
|
||||
import nu.marginalia.mq.persistence.MqPersistence;
|
||||
import nu.marginalia.mqapi.tasks.ExportTaskRequest;
|
||||
import nu.marginalia.process.ProcessOutboxes;
|
||||
import nu.marginalia.process.ProcessService;
|
||||
import nu.marginalia.storage.FileStorageService;
|
||||
import nu.marginalia.storage.model.FileStorageId;
|
||||
import nu.marginalia.storage.model.FileStorageState;
|
||||
@@ -19,33 +23,52 @@ import java.time.LocalDateTime;
|
||||
@Singleton
|
||||
public class ExportFeedsActor extends RecordActorPrototype {
|
||||
private final FileStorageService storageService;
|
||||
private final ActorProcessWatcher processWatcher;
|
||||
private final MqOutbox exportTasksOutbox;
|
||||
private final MqPersistence persistence;
|
||||
private final Logger logger = LoggerFactory.getLogger(getClass());
|
||||
|
||||
private final ExporterIf feedExporter;
|
||||
public record Export(FileStorageId crawlId) implements ActorStep {}
|
||||
public record Run(FileStorageId crawlId, FileStorageId destId) implements ActorStep {}
|
||||
public record Export(long responseMsgId, FileStorageId crawlId) implements ActorStep {}
|
||||
public record Run(long responseMsgId, FileStorageId crawlId, FileStorageId destId, long msgId) implements ActorStep {
|
||||
public Run(long responseMsgId, FileStorageId crawlId, FileStorageId destId) {
|
||||
this(responseMsgId, crawlId, destId, -1);
|
||||
}
|
||||
}
|
||||
public record Fail(long responseMsgId, String message) implements ActorStep {}
|
||||
|
||||
@Override
|
||||
public ActorStep transition(ActorStep self) throws Exception {
|
||||
return switch(self) {
|
||||
case Export(FileStorageId crawlId) -> {
|
||||
case Export(long responseMsgId, FileStorageId crawlId) -> {
|
||||
persistence.updateMessageState(responseMsgId, MqMessageState.ACK);
|
||||
|
||||
var storage = storageService.allocateStorage(FileStorageType.EXPORT, "feed-export", "Feeds " + LocalDateTime.now());
|
||||
|
||||
if (storage == null) yield new Error("Bad storage id");
|
||||
yield new Run(crawlId, storage.id());
|
||||
if (storage == null) yield new Fail(responseMsgId, "Bad storage id");
|
||||
yield new Run(responseMsgId, crawlId, storage.id());
|
||||
}
|
||||
case Run(FileStorageId crawlId, FileStorageId destId) -> {
|
||||
case Run(long responseMsgId, FileStorageId crawlId, FileStorageId destId, long msgId) when msgId < 0 -> {
|
||||
storageService.setFileStorageState(destId, FileStorageState.NEW);
|
||||
|
||||
try {
|
||||
feedExporter.export(crawlId, destId);
|
||||
storageService.setFileStorageState(destId, FileStorageState.UNSET);
|
||||
}
|
||||
catch (Exception ex) {
|
||||
storageService.setFileStorageState(destId, FileStorageState.DELETE);
|
||||
yield new Error("Failed to export data");
|
||||
}
|
||||
long newMsgId = exportTasksOutbox.sendAsync(ExportTaskRequest.feeds(crawlId, destId));
|
||||
yield new Run(responseMsgId, crawlId, destId, newMsgId);
|
||||
}
|
||||
case Run(long responseMsgId, _, FileStorageId destId, long msgId) -> {
|
||||
var rsp = processWatcher.waitResponse(exportTasksOutbox, ProcessService.ProcessId.EXPORT_TASKS, msgId);
|
||||
|
||||
yield new End();
|
||||
if (rsp.state() != MqMessageState.OK) {
|
||||
storageService.flagFileForDeletion(destId);
|
||||
yield new Fail(responseMsgId, "Exporter failed");
|
||||
}
|
||||
else {
|
||||
storageService.setFileStorageState(destId, FileStorageState.UNSET);
|
||||
persistence.updateMessageState(responseMsgId, MqMessageState.OK);
|
||||
yield new End();
|
||||
}
|
||||
}
|
||||
case Fail(long responseMsgId, String message) -> {
|
||||
persistence.updateMessageState(responseMsgId, MqMessageState.ERR);
|
||||
yield new Error(message);
|
||||
}
|
||||
default -> new Error();
|
||||
};
|
||||
@@ -60,11 +83,14 @@ public class ExportFeedsActor extends RecordActorPrototype {
|
||||
@Inject
|
||||
public ExportFeedsActor(Gson gson,
|
||||
FileStorageService storageService,
|
||||
FeedExporter feedExporter)
|
||||
ActorProcessWatcher processWatcher,
|
||||
ProcessOutboxes outboxes, MqPersistence persistence)
|
||||
{
|
||||
super(gson);
|
||||
this.storageService = storageService;
|
||||
this.feedExporter = feedExporter;
|
||||
this.processWatcher = processWatcher;
|
||||
this.exportTasksOutbox = outboxes.getExportTasksOutbox();
|
||||
this.persistence = persistence;
|
||||
}
|
||||
|
||||
}
|
||||
|
@@ -5,7 +5,11 @@ import com.google.inject.Inject;
|
||||
import com.google.inject.Singleton;
|
||||
import nu.marginalia.actor.prototype.RecordActorPrototype;
|
||||
import nu.marginalia.actor.state.ActorStep;
|
||||
import nu.marginalia.extractor.SampleDataExporter;
|
||||
import nu.marginalia.mq.MqMessageState;
|
||||
import nu.marginalia.mq.outbox.MqOutbox;
|
||||
import nu.marginalia.mqapi.tasks.ExportTaskRequest;
|
||||
import nu.marginalia.process.ProcessOutboxes;
|
||||
import nu.marginalia.process.ProcessService;
|
||||
import nu.marginalia.storage.FileStorageService;
|
||||
import nu.marginalia.storage.model.FileStorageId;
|
||||
import nu.marginalia.storage.model.FileStorageState;
|
||||
@@ -18,45 +22,52 @@ import java.time.LocalDateTime;
|
||||
@Singleton
|
||||
public class ExportSampleDataActor extends RecordActorPrototype {
|
||||
private final FileStorageService storageService;
|
||||
private final ActorProcessWatcher processWatcher;
|
||||
private final MqOutbox exportTasksOutbox;
|
||||
private final Logger logger = LoggerFactory.getLogger(getClass());
|
||||
|
||||
private final SampleDataExporter dataExporter;
|
||||
public record Export(FileStorageId crawlId, int size, String name) implements ActorStep {}
|
||||
public record Run(FileStorageId crawlId, FileStorageId destId, int size, String name) implements ActorStep {}
|
||||
public record Run(FileStorageId crawlId, FileStorageId destId, int size, String name, long msgId) implements ActorStep {
|
||||
public Run(FileStorageId crawlId, FileStorageId destId, int size, String name) {
|
||||
this(crawlId, destId, size, name, -1);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public ActorStep transition(ActorStep self) throws Exception {
|
||||
return switch(self) {
|
||||
case Export(FileStorageId crawlId, int size, String name) -> {
|
||||
var storage = storageService.allocateStorage(FileStorageType.EXPORT,
|
||||
"crawl-sample-export",
|
||||
STR."Crawl Data Sample \{name}/\{size} \{LocalDateTime.now()}"
|
||||
"Crawl Data Sample " + name + "/" + size + " " + LocalDateTime.now()
|
||||
);
|
||||
|
||||
if (storage == null) yield new Error("Bad storage id");
|
||||
yield new Run(crawlId, storage.id(), size, name);
|
||||
}
|
||||
case Run(FileStorageId crawlId, FileStorageId destId, int size, String name) -> {
|
||||
case Run(FileStorageId crawlId, FileStorageId destId, int size, String name, long msgId) when msgId < 0 -> {
|
||||
storageService.setFileStorageState(destId, FileStorageState.NEW);
|
||||
|
||||
try {
|
||||
dataExporter.export(crawlId, destId, size, name);
|
||||
storageService.setFileStorageState(destId, FileStorageState.UNSET);
|
||||
}
|
||||
catch (Exception ex) {
|
||||
storageService.setFileStorageState(destId, FileStorageState.DELETE);
|
||||
|
||||
logger.error("Failed to export data", ex);
|
||||
|
||||
yield new Error("Failed to export data");
|
||||
}
|
||||
|
||||
yield new End();
|
||||
long newMsgId = exportTasksOutbox.sendAsync(ExportTaskRequest.sampleData(crawlId, destId, size, name));
|
||||
yield new Run(crawlId, destId, size, name, newMsgId);
|
||||
}
|
||||
case Run(_, FileStorageId destId, _, _, long msgId) -> {
|
||||
var rsp = processWatcher.waitResponse(exportTasksOutbox, ProcessService.ProcessId.EXPORT_TASKS, msgId);
|
||||
|
||||
if (rsp.state() != MqMessageState.OK) {
|
||||
storageService.flagFileForDeletion(destId);
|
||||
yield new Error("Exporter failed");
|
||||
}
|
||||
else {
|
||||
storageService.setFileStorageState(destId, FileStorageState.UNSET);
|
||||
yield new End();
|
||||
}
|
||||
}
|
||||
|
||||
default -> new Error();
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public String describe() {
|
||||
return "Export RSS/Atom feeds from crawl data";
|
||||
@@ -65,11 +76,13 @@ public class ExportSampleDataActor extends RecordActorPrototype {
|
||||
@Inject
|
||||
public ExportSampleDataActor(Gson gson,
|
||||
FileStorageService storageService,
|
||||
SampleDataExporter dataExporter)
|
||||
ProcessOutboxes processOutboxes,
|
||||
ActorProcessWatcher processWatcher)
|
||||
{
|
||||
super(gson);
|
||||
this.storageService = storageService;
|
||||
this.dataExporter = dataExporter;
|
||||
this.processWatcher = processWatcher;
|
||||
this.exportTasksOutbox = processOutboxes.getExportTasksOutbox();
|
||||
}
|
||||
|
||||
}
|
||||
|
@@ -5,45 +5,70 @@ import com.google.inject.Inject;
|
||||
import com.google.inject.Singleton;
|
||||
import nu.marginalia.actor.prototype.RecordActorPrototype;
|
||||
import nu.marginalia.actor.state.ActorStep;
|
||||
import nu.marginalia.extractor.ExporterIf;
|
||||
import nu.marginalia.extractor.TermFrequencyExporter;
|
||||
import nu.marginalia.mq.MqMessageState;
|
||||
import nu.marginalia.mq.outbox.MqOutbox;
|
||||
import nu.marginalia.mq.persistence.MqPersistence;
|
||||
import nu.marginalia.mqapi.tasks.ExportTaskRequest;
|
||||
import nu.marginalia.process.ProcessOutboxes;
|
||||
import nu.marginalia.process.ProcessService;
|
||||
import nu.marginalia.storage.FileStorageService;
|
||||
import nu.marginalia.storage.model.FileStorageId;
|
||||
import nu.marginalia.storage.model.FileStorageState;
|
||||
import nu.marginalia.storage.model.FileStorageType;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.time.LocalDateTime;
|
||||
|
||||
@Singleton
|
||||
public class ExportTermFreqActor extends RecordActorPrototype {
|
||||
private final FileStorageService storageService;
|
||||
private final ExporterIf exporter;
|
||||
public record Export(FileStorageId crawlId) implements ActorStep {}
|
||||
public record Run(FileStorageId crawlId, FileStorageId destId) implements ActorStep {}
|
||||
private final ActorProcessWatcher processWatcher;
|
||||
private final MqOutbox exportTasksOutbox;
|
||||
private final Logger logger = LoggerFactory.getLogger(getClass());
|
||||
private final MqPersistence persistence;
|
||||
|
||||
public record Export(long responseMsgId, FileStorageId crawlId) implements ActorStep {}
|
||||
public record Run(long responseMsgId, FileStorageId crawlId, FileStorageId destId, long msgId) implements ActorStep {
|
||||
public Run(long responseMsgId, FileStorageId crawlId, FileStorageId destId) {
|
||||
this(responseMsgId, crawlId, destId, -1);
|
||||
}
|
||||
}
|
||||
public record Fail(long responseMsgId, String message) implements ActorStep {}
|
||||
@Override
|
||||
public ActorStep transition(ActorStep self) throws Exception {
|
||||
return switch(self) {
|
||||
case Export(FileStorageId crawlId) -> {
|
||||
var storage = storageService.allocateStorage(FileStorageType.EXPORT, "term-freq-export", "Term Frequencies " + LocalDateTime.now());
|
||||
case Export(long responseMsgId, FileStorageId crawlId) -> {
|
||||
persistence.updateMessageState(responseMsgId, MqMessageState.ACK);
|
||||
var storage = storageService.allocateStorage(FileStorageType.EXPORT, "term-freq", "Term Frequencies " + LocalDateTime.now());
|
||||
|
||||
if (storage == null) yield new Error("Bad storage id");
|
||||
yield new Run(crawlId, storage.id());
|
||||
if (storage == null) yield new Fail(responseMsgId, "Bad storage id");
|
||||
yield new Run(responseMsgId, crawlId, storage.id());
|
||||
}
|
||||
case Run(FileStorageId crawlId, FileStorageId destId) -> {
|
||||
case Run(long responseMsgId, FileStorageId crawlId, FileStorageId destId, long msgId) when msgId < 0 -> {
|
||||
storageService.setFileStorageState(destId, FileStorageState.NEW);
|
||||
|
||||
try {
|
||||
exporter.export(crawlId, destId);
|
||||
storageService.setFileStorageState(destId, FileStorageState.UNSET);
|
||||
}
|
||||
catch (Exception ex) {
|
||||
storageService.setFileStorageState(destId, FileStorageState.DELETE);
|
||||
yield new Error("Failed to export data");
|
||||
}
|
||||
|
||||
yield new End();
|
||||
long newMsgId = exportTasksOutbox.sendAsync(ExportTaskRequest.termFreq(crawlId, destId));
|
||||
yield new Run(responseMsgId, crawlId, destId, newMsgId);
|
||||
}
|
||||
case Run(long responseMsgId, _, FileStorageId destId, long msgId) -> {
|
||||
var rsp = processWatcher.waitResponse(exportTasksOutbox, ProcessService.ProcessId.EXPORT_TASKS, msgId);
|
||||
|
||||
if (rsp.state() != MqMessageState.OK) {
|
||||
storageService.flagFileForDeletion(destId);
|
||||
yield new Fail(responseMsgId, "Exporter failed");
|
||||
}
|
||||
else {
|
||||
storageService.setFileStorageState(destId, FileStorageState.UNSET);
|
||||
persistence.updateMessageState(responseMsgId, MqMessageState.OK);
|
||||
yield new End();
|
||||
}
|
||||
}
|
||||
case Fail(long responseMsgId, String message) -> {
|
||||
persistence.updateMessageState(responseMsgId, MqMessageState.ERR);
|
||||
yield new Error(message);
|
||||
}
|
||||
|
||||
default -> new Error();
|
||||
};
|
||||
}
|
||||
@@ -57,11 +82,15 @@ public class ExportTermFreqActor extends RecordActorPrototype {
|
||||
@Inject
|
||||
public ExportTermFreqActor(Gson gson,
|
||||
FileStorageService storageService,
|
||||
TermFrequencyExporter exporter)
|
||||
ProcessOutboxes processOutboxes,
|
||||
MqPersistence persistence,
|
||||
ActorProcessWatcher processWatcher)
|
||||
{
|
||||
super(gson);
|
||||
this.storageService = storageService;
|
||||
this.exporter = exporter;
|
||||
this.persistence = persistence;
|
||||
this.processWatcher = processWatcher;
|
||||
this.exportTasksOutbox = processOutboxes.getExportTasksOutbox();
|
||||
}
|
||||
|
||||
}
|
||||
|
114
code/execution/java/nu/marginalia/actor/task/LiveCrawlActor.java
Normal file
114
code/execution/java/nu/marginalia/actor/task/LiveCrawlActor.java
Normal file
@@ -0,0 +1,114 @@
|
||||
package nu.marginalia.actor.task;
|
||||
|
||||
import com.google.gson.Gson;
|
||||
import com.google.inject.Inject;
|
||||
import com.google.inject.Singleton;
|
||||
import nu.marginalia.IndexLocations;
|
||||
import nu.marginalia.actor.ExecutorActor;
|
||||
import nu.marginalia.actor.ExecutorActorStateMachines;
|
||||
import nu.marginalia.actor.prototype.RecordActorPrototype;
|
||||
import nu.marginalia.actor.state.ActorStep;
|
||||
import nu.marginalia.api.feeds.FeedsClient;
|
||||
import nu.marginalia.mq.MqMessageState;
|
||||
import nu.marginalia.mq.outbox.MqOutbox;
|
||||
import nu.marginalia.mqapi.crawling.LiveCrawlRequest;
|
||||
import nu.marginalia.process.ProcessOutboxes;
|
||||
import nu.marginalia.process.ProcessService;
|
||||
import nu.marginalia.storage.FileStorageService;
|
||||
import org.apache.commons.io.FileUtils;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.time.Duration;
|
||||
import java.util.Objects;
|
||||
|
||||
@Singleton
|
||||
public class LiveCrawlActor extends RecordActorPrototype {
|
||||
|
||||
// STATES
|
||||
private final ActorProcessWatcher processWatcher;
|
||||
private final MqOutbox mqLiveCrawlerOutbox;
|
||||
private final ExecutorActorStateMachines executorActorStateMachines;
|
||||
private final FeedsClient feedsClient;
|
||||
private final Logger logger = LoggerFactory.getLogger(getClass());
|
||||
|
||||
private final FileStorageService fileStorageService;
|
||||
|
||||
public record Initial() implements ActorStep {}
|
||||
public record Monitor(String feedsHash) implements ActorStep {}
|
||||
public record LiveCrawl(String feedsHash, long msgId) implements ActorStep {
|
||||
public LiveCrawl(String feedsHash) { this(feedsHash, -1); }
|
||||
}
|
||||
|
||||
@Override
|
||||
public ActorStep transition(ActorStep self) throws Exception {
|
||||
logger.info("{}", self);
|
||||
return switch (self) {
|
||||
case Initial() -> {
|
||||
yield new Monitor("-");
|
||||
}
|
||||
case Monitor(String feedsHash) -> {
|
||||
// Sleep initially in case this is during start-up
|
||||
for (;;) {
|
||||
try {
|
||||
Thread.sleep(Duration.ofMinutes(15));
|
||||
String currentHash = feedsClient.getFeedDataHash();
|
||||
if (!Objects.equals(currentHash, feedsHash)) {
|
||||
yield new LiveCrawl(currentHash);
|
||||
}
|
||||
}
|
||||
catch (RuntimeException ex) {
|
||||
logger.error("Failed to fetch feed data hash");
|
||||
}
|
||||
}
|
||||
}
|
||||
case LiveCrawl(String feedsHash, long msgId) when msgId < 0 -> {
|
||||
// Clear the index journal before starting the crawl
|
||||
Path indexJournalLocation = IndexLocations.getIndexConstructionArea(fileStorageService).resolve("index-journal");
|
||||
if (Files.isDirectory(indexJournalLocation)) {
|
||||
FileUtils.deleteDirectory(indexJournalLocation.toFile());
|
||||
}
|
||||
|
||||
long id = mqLiveCrawlerOutbox.sendAsync(new LiveCrawlRequest());
|
||||
yield new LiveCrawl(feedsHash, id);
|
||||
}
|
||||
case LiveCrawl(String feedsHash, long msgId) -> {
|
||||
var rsp = processWatcher.waitResponse(mqLiveCrawlerOutbox, ProcessService.ProcessId.LIVE_CRAWLER, msgId);
|
||||
|
||||
if (rsp.state() != MqMessageState.OK) {
|
||||
yield new Error("Crawler failed");
|
||||
}
|
||||
|
||||
// Build the index
|
||||
executorActorStateMachines.initFrom(ExecutorActor.CONVERT_AND_LOAD, new ConvertAndLoadActor.Rerank());
|
||||
|
||||
yield new Monitor(feedsHash);
|
||||
}
|
||||
default -> new Error("Unknown state");
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public String describe() {
|
||||
return "Actor that polls the feeds database for changes, and triggers the live crawler when needed";
|
||||
}
|
||||
|
||||
@Inject
|
||||
public LiveCrawlActor(ActorProcessWatcher processWatcher,
|
||||
ProcessOutboxes processOutboxes,
|
||||
FeedsClient feedsClient,
|
||||
Gson gson,
|
||||
ExecutorActorStateMachines executorActorStateMachines, FileStorageService fileStorageService)
|
||||
{
|
||||
super(gson);
|
||||
this.processWatcher = processWatcher;
|
||||
this.mqLiveCrawlerOutbox = processOutboxes.getLiveCrawlerOutbox();
|
||||
this.executorActorStateMachines = executorActorStateMachines;
|
||||
this.feedsClient = feedsClient;
|
||||
this.fileStorageService = fileStorageService;
|
||||
}
|
||||
|
||||
|
||||
}
|
@@ -5,53 +5,59 @@ import com.google.inject.Inject;
|
||||
import com.google.inject.Singleton;
|
||||
import nu.marginalia.actor.prototype.RecordActorPrototype;
|
||||
import nu.marginalia.actor.state.ActorStep;
|
||||
import nu.marginalia.mq.MqMessageState;
|
||||
import nu.marginalia.mq.outbox.MqOutbox;
|
||||
import nu.marginalia.mqapi.tasks.ExportTaskRequest;
|
||||
import nu.marginalia.process.ProcessOutboxes;
|
||||
import nu.marginalia.process.ProcessService;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.Executors;
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
|
||||
@Singleton
|
||||
public class TriggerAdjacencyCalculationActor extends RecordActorPrototype {
|
||||
|
||||
private final ActorProcessWatcher processWatcher;
|
||||
private final MqOutbox exportTasksOutbox;
|
||||
private final Logger logger = LoggerFactory.getLogger(getClass());
|
||||
private final ProcessService processService;
|
||||
private final ExecutorService executor = Executors.newSingleThreadExecutor();
|
||||
|
||||
public record Run() implements ActorStep {}
|
||||
public record Run(long msgId) implements ActorStep {
|
||||
public Run() {
|
||||
this(-1);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public ActorStep transition(ActorStep self) throws Exception {
|
||||
return switch (self) {
|
||||
case Run() -> {
|
||||
AtomicBoolean hasError = new AtomicBoolean(false);
|
||||
var future = executor.submit(() -> {
|
||||
try {
|
||||
processService.trigger(ProcessService.ProcessId.ADJACENCIES_CALCULATOR, "load");
|
||||
}
|
||||
catch (Exception ex) {
|
||||
logger.warn("Error triggering adjacency calculation", ex);
|
||||
hasError.set(true);
|
||||
}
|
||||
});
|
||||
future.get();
|
||||
|
||||
if (hasError.get()) {
|
||||
yield new Error("Error triggering adjacency calculation");
|
||||
}
|
||||
yield new End();
|
||||
return switch(self) {
|
||||
case Run(long msgId) when msgId < 0 -> {
|
||||
long newMsgId = exportTasksOutbox.sendAsync(ExportTaskRequest.adjacencies());
|
||||
yield new Run(newMsgId);
|
||||
}
|
||||
case Run(long msgId) -> {
|
||||
var rsp = processWatcher.waitResponse(exportTasksOutbox, ProcessService.ProcessId.EXPORT_TASKS, msgId);
|
||||
|
||||
if (rsp.state() != MqMessageState.OK) {
|
||||
yield new Error("Exporter failed");
|
||||
}
|
||||
else {
|
||||
yield new End();
|
||||
}
|
||||
}
|
||||
|
||||
default -> new Error();
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
@Inject
|
||||
public TriggerAdjacencyCalculationActor(Gson gson,
|
||||
ProcessService processService) {
|
||||
ProcessOutboxes processOutboxes,
|
||||
ActorProcessWatcher processWatcher) {
|
||||
super(gson);
|
||||
this.processService = processService;
|
||||
|
||||
this.exportTasksOutbox = processOutboxes.getExportTasksOutbox();
|
||||
this.processWatcher = processWatcher;
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@@ -5,8 +5,10 @@ import com.google.inject.Singleton;
|
||||
import io.grpc.stub.StreamObserver;
|
||||
import nu.marginalia.actor.ExecutorActor;
|
||||
import nu.marginalia.actor.ExecutorActorControlService;
|
||||
import nu.marginalia.actor.precession.ExportAllPrecessionActor;
|
||||
import nu.marginalia.actor.task.*;
|
||||
import nu.marginalia.functions.execution.api.*;
|
||||
import nu.marginalia.service.module.ServiceConfiguration;
|
||||
import nu.marginalia.service.server.DiscoverableService;
|
||||
import nu.marginalia.storage.model.FileStorageId;
|
||||
|
||||
@@ -16,17 +18,21 @@ public class ExecutorExportGrpcService
|
||||
implements DiscoverableService
|
||||
{
|
||||
private final ExecutorActorControlService actorControlService;
|
||||
private final ServiceConfiguration serviceConfiguration;
|
||||
|
||||
@Inject
|
||||
public ExecutorExportGrpcService(ExecutorActorControlService actorControlService) {
|
||||
public ExecutorExportGrpcService(ExecutorActorControlService actorControlService, ServiceConfiguration serviceConfiguration) {
|
||||
this.actorControlService = actorControlService;
|
||||
this.serviceConfiguration = serviceConfiguration;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void exportAtags(RpcFileStorageId request, StreamObserver<Empty> responseObserver) {
|
||||
public void exportAtags(RpcExportRequest request, StreamObserver<Empty> responseObserver) {
|
||||
try {
|
||||
actorControlService.startFrom(ExecutorActor.EXPORT_ATAGS,
|
||||
new ExportAtagsActor.Export(FileStorageId.of(request.getFileStorageId()))
|
||||
new ExportAtagsActor.Export(
|
||||
request.getMsgId(),
|
||||
FileStorageId.of(request.getFileStorageId()))
|
||||
);
|
||||
responseObserver.onNext(Empty.getDefaultInstance());
|
||||
responseObserver.onCompleted();
|
||||
@@ -55,10 +61,12 @@ public class ExecutorExportGrpcService
|
||||
}
|
||||
|
||||
@Override
|
||||
public void exportRssFeeds(RpcFileStorageId request, StreamObserver<Empty> responseObserver) {
|
||||
public void exportRssFeeds(RpcExportRequest request, StreamObserver<Empty> responseObserver) {
|
||||
try {
|
||||
actorControlService.startFrom(ExecutorActor.EXPORT_FEEDS,
|
||||
new ExportFeedsActor.Export(FileStorageId.of(request.getFileStorageId()))
|
||||
new ExportFeedsActor.Export(
|
||||
request.getMsgId(),
|
||||
FileStorageId.of(request.getFileStorageId()))
|
||||
);
|
||||
responseObserver.onNext(Empty.getDefaultInstance());
|
||||
responseObserver.onCompleted();
|
||||
@@ -69,10 +77,10 @@ public class ExecutorExportGrpcService
|
||||
}
|
||||
|
||||
@Override
|
||||
public void exportTermFrequencies(RpcFileStorageId request, StreamObserver<Empty> responseObserver) {
|
||||
public void exportTermFrequencies(RpcExportRequest request, StreamObserver<Empty> responseObserver) {
|
||||
try {
|
||||
actorControlService.startFrom(ExecutorActor.EXPORT_TERM_FREQUENCIES,
|
||||
new ExportTermFreqActor.Export(FileStorageId.of(request.getFileStorageId()))
|
||||
new ExportTermFreqActor.Export(request.getMsgId(), FileStorageId.of(request.getFileStorageId()))
|
||||
);
|
||||
responseObserver.onNext(Empty.getDefaultInstance());
|
||||
responseObserver.onCompleted();
|
||||
@@ -109,4 +117,48 @@ public class ExecutorExportGrpcService
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void exportAllAtags(Empty request, StreamObserver<Empty> responseObserver) {
|
||||
if (serviceConfiguration.node() != 1) {
|
||||
responseObserver.onError(new IllegalArgumentException("Export all atags is only available on node 1"));
|
||||
}
|
||||
try {
|
||||
actorControlService.startFrom(ExecutorActor.PREC_EXPORT_ALL,
|
||||
new ExportAllPrecessionActor.Initial(ExportAllPrecessionActor.ExportTask.ATAGS)
|
||||
);
|
||||
responseObserver.onNext(Empty.getDefaultInstance());
|
||||
responseObserver.onCompleted();
|
||||
}
|
||||
catch (Exception e) {
|
||||
responseObserver.onError(e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void exportAllFeeds(Empty request, StreamObserver<Empty> responseObserver) {
|
||||
try {
|
||||
actorControlService.startFrom(ExecutorActor.PREC_EXPORT_ALL,
|
||||
new ExportAllPrecessionActor.Initial(ExportAllPrecessionActor.ExportTask.FEEDS)
|
||||
);
|
||||
responseObserver.onNext(Empty.getDefaultInstance());
|
||||
responseObserver.onCompleted();
|
||||
}
|
||||
catch (Exception e) {
|
||||
responseObserver.onError(e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void exportAllTfreqs(Empty request, StreamObserver<Empty> responseObserver) {
|
||||
try {
|
||||
actorControlService.startFrom(ExecutorActor.PREC_EXPORT_ALL,
|
||||
new ExportAllPrecessionActor.Initial(ExportAllPrecessionActor.ExportTask.TFREQ)
|
||||
);
|
||||
responseObserver.onNext(Empty.getDefaultInstance());
|
||||
responseObserver.onCompleted();
|
||||
}
|
||||
catch (Exception e) {
|
||||
responseObserver.onError(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -2,7 +2,6 @@ package nu.marginalia.execution;
|
||||
|
||||
import com.google.inject.Inject;
|
||||
import io.grpc.stub.StreamObserver;
|
||||
import lombok.SneakyThrows;
|
||||
import nu.marginalia.WmsaHome;
|
||||
import nu.marginalia.actor.ActorApi;
|
||||
import nu.marginalia.actor.ExecutorActor;
|
||||
@@ -16,9 +15,12 @@ import nu.marginalia.service.module.ServiceConfiguration;
|
||||
import nu.marginalia.service.server.DiscoverableService;
|
||||
import nu.marginalia.storage.FileStorageService;
|
||||
import nu.marginalia.storage.model.FileStorageId;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.time.Duration;
|
||||
import java.time.LocalDateTime;
|
||||
import java.time.ZoneId;
|
||||
import java.time.format.DateTimeFormatter;
|
||||
@@ -33,6 +35,8 @@ public class ExecutorGrpcService
|
||||
private final ServiceConfiguration serviceConfiguration;
|
||||
private final ExecutorActorControlService actorControlService;
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(ExecutorGrpcService.class);
|
||||
|
||||
@Inject
|
||||
public ExecutorGrpcService(ActorApi actorApi,
|
||||
FileStorageService fileStorageService,
|
||||
@@ -228,14 +232,35 @@ public class ExecutorGrpcService
|
||||
}
|
||||
}
|
||||
|
||||
@SneakyThrows
|
||||
private RpcFileStorageEntry createFileModel(Path path) {
|
||||
return RpcFileStorageEntry.newBuilder()
|
||||
.setName(path.toFile().getName())
|
||||
.setSize(Files.size(path))
|
||||
.setLastModifiedTime(Files.getLastModifiedTime(path).toInstant().toString())
|
||||
.build();
|
||||
try {
|
||||
return RpcFileStorageEntry.newBuilder()
|
||||
.setName(path.toFile().getName())
|
||||
.setSize(Files.size(path))
|
||||
.setLastModifiedTime(Files.getLastModifiedTime(path).toInstant().toString())
|
||||
.build();
|
||||
}
|
||||
catch (Exception e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void restartExecutorService(Empty request, StreamObserver<Empty> responseObserver) {
|
||||
responseObserver.onNext(Empty.getDefaultInstance());
|
||||
responseObserver.onCompleted();
|
||||
|
||||
logger.info("Restarting executor service on node {}", serviceConfiguration.node());
|
||||
|
||||
try {
|
||||
// Wait for the response to be sent before restarting
|
||||
Thread.sleep(Duration.ofSeconds(5));
|
||||
}
|
||||
catch (InterruptedException e) {
|
||||
logger.warn("Interrupted while waiting for restart", e);
|
||||
}
|
||||
|
||||
System.exit(0);
|
||||
}
|
||||
|
||||
}
|
||||
|
@@ -13,6 +13,8 @@ public class ProcessOutboxes {
|
||||
private final MqOutbox loaderOutbox;
|
||||
private final MqOutbox crawlerOutbox;
|
||||
private final MqOutbox indexConstructorOutbox;
|
||||
private final MqOutbox liveCrawlerOutbox;
|
||||
private final MqOutbox exportTasksOutbox;
|
||||
|
||||
@Inject
|
||||
public ProcessOutboxes(BaseServiceParams params, MqPersistence persistence) {
|
||||
@@ -44,6 +46,22 @@ public class ProcessOutboxes {
|
||||
params.configuration.node(),
|
||||
params.configuration.instanceUuid()
|
||||
);
|
||||
|
||||
liveCrawlerOutbox = new MqOutbox(persistence,
|
||||
ProcessInboxNames.LIVE_CRAWLER_INBOX,
|
||||
params.configuration.node(),
|
||||
params.configuration.serviceName(),
|
||||
params.configuration.node(),
|
||||
params.configuration.instanceUuid()
|
||||
);
|
||||
|
||||
exportTasksOutbox = new MqOutbox(persistence,
|
||||
ProcessInboxNames.EXPORT_TASK_INBOX,
|
||||
params.configuration.node(),
|
||||
params.configuration.serviceName(),
|
||||
params.configuration.node(),
|
||||
params.configuration.instanceUuid()
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
@@ -60,4 +78,8 @@ public class ProcessOutboxes {
|
||||
}
|
||||
|
||||
public MqOutbox getIndexConstructorOutbox() { return indexConstructorOutbox; }
|
||||
|
||||
public MqOutbox getLiveCrawlerOutbox() { return liveCrawlerOutbox; }
|
||||
|
||||
public MqOutbox getExportTasksOutbox() { return exportTasksOutbox; }
|
||||
}
|
||||
|
@@ -3,14 +3,14 @@ package nu.marginalia.process;
|
||||
import com.google.inject.Inject;
|
||||
import com.google.inject.Singleton;
|
||||
import nu.marginalia.WmsaHome;
|
||||
import nu.marginalia.adjacencies.WebsiteAdjacenciesCalculator;
|
||||
import nu.marginalia.converting.ConverterMain;
|
||||
import nu.marginalia.crawl.CrawlerMain;
|
||||
import nu.marginalia.index.IndexConstructorMain;
|
||||
import nu.marginalia.livecrawler.LiveCrawlerMain;
|
||||
import nu.marginalia.loading.LoaderMain;
|
||||
import nu.marginalia.service.ProcessMainClass;
|
||||
import nu.marginalia.service.control.ServiceEventLog;
|
||||
import nu.marginalia.service.server.BaseServiceParams;
|
||||
import nu.marginalia.task.ExportTasksMain;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.slf4j.Marker;
|
||||
@@ -37,23 +37,24 @@ public class ProcessService {
|
||||
private final int node;
|
||||
|
||||
|
||||
public static ProcessService.ProcessId translateExternalIdBase(String id) {
|
||||
public static ProcessId translateExternalIdBase(String id) {
|
||||
return switch (id) {
|
||||
case "converter" -> ProcessService.ProcessId.CONVERTER;
|
||||
case "crawler" -> ProcessService.ProcessId.CRAWLER;
|
||||
case "loader" -> ProcessService.ProcessId.LOADER;
|
||||
case "website-adjacencies-calculator" -> ProcessService.ProcessId.ADJACENCIES_CALCULATOR;
|
||||
case "index-constructor" -> ProcessService.ProcessId.INDEX_CONSTRUCTOR;
|
||||
case "converter" -> ProcessId.CONVERTER;
|
||||
case "crawler" -> ProcessId.CRAWLER;
|
||||
case "loader" -> ProcessId.LOADER;
|
||||
case "export-tasks" -> ProcessId.EXPORT_TASKS;
|
||||
case "index-constructor" -> ProcessId.INDEX_CONSTRUCTOR;
|
||||
default -> null;
|
||||
};
|
||||
}
|
||||
|
||||
public enum ProcessId {
|
||||
CRAWLER(CrawlerMain.class),
|
||||
LIVE_CRAWLER(LiveCrawlerMain.class),
|
||||
CONVERTER(ConverterMain.class),
|
||||
LOADER(LoaderMain.class),
|
||||
INDEX_CONSTRUCTOR(IndexConstructorMain.class),
|
||||
ADJACENCIES_CALCULATOR(WebsiteAdjacenciesCalculator.class)
|
||||
EXPORT_TASKS(ExportTasksMain.class),
|
||||
;
|
||||
|
||||
public final String mainClass;
|
||||
@@ -64,10 +65,11 @@ public class ProcessService {
|
||||
List<String> envOpts() {
|
||||
String variable = switch (this) {
|
||||
case CRAWLER -> "CRAWLER_PROCESS_OPTS";
|
||||
case LIVE_CRAWLER -> "LIVE_CRAWLER_PROCESS_OPTS";
|
||||
case CONVERTER -> "CONVERTER_PROCESS_OPTS";
|
||||
case LOADER -> "LOADER_PROCESS_OPTS";
|
||||
case INDEX_CONSTRUCTOR -> "INDEX_CONSTRUCTION_PROCESS_OPTS";
|
||||
case ADJACENCIES_CALCULATOR -> "ADJACENCIES_CALCULATOR_PROCESS_OPTS";
|
||||
case EXPORT_TASKS -> "EXPORT_TASKS_PROCESS_OPTS";
|
||||
};
|
||||
String value = System.getenv(variable);
|
||||
|
||||
|
@@ -9,7 +9,8 @@ import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.StandardCopyOption;
|
||||
import java.util.*;
|
||||
import java.util.Set;
|
||||
import java.util.TreeSet;
|
||||
import java.util.concurrent.Executors;
|
||||
import java.util.concurrent.ForkJoinPool;
|
||||
import java.util.stream.Collectors;
|
||||
@@ -97,7 +98,7 @@ public class RedditSideloadHelper {
|
||||
|
||||
private static Path getRedditDbPath(RedditFilePair pair) throws IOException {
|
||||
String hash = SideloadHelper.getCrc32FileHash(pair.commentsPath());
|
||||
return pair.rootDir().resolve(STR."\{pair.fileNameBase}.\{hash}.db");
|
||||
return pair.rootDir().resolve(pair.fileNameBase + "." + hash + ".db");
|
||||
}
|
||||
|
||||
}
|
@@ -83,7 +83,7 @@ public class StackExchangeSideloadHelper {
|
||||
String fileName = sourcePath.toFile().getName();
|
||||
String hash = SideloadHelper.getCrc32FileHash(sourcePath);
|
||||
|
||||
return sourcePath.getParent().resolve(STR."\{fileName}.\{hash}.db");
|
||||
return sourcePath.getParent().resolve(fileName + "." + hash + ".db");
|
||||
}
|
||||
|
||||
private static Optional<String> getStackexchangeDomainFromFilename(String fileName) {
|
||||
|
@@ -1,24 +0,0 @@
|
||||
plugins {
|
||||
id 'java'
|
||||
id 'jvm-test-suite'
|
||||
}
|
||||
|
||||
java {
|
||||
toolchain {
|
||||
languageVersion.set(JavaLanguageVersion.of(rootProject.ext.jvmVersion))
|
||||
}
|
||||
}
|
||||
|
||||
apply from: "$rootProject.projectDir/srcsets.gradle"
|
||||
|
||||
dependencies {
|
||||
implementation libs.bundles.slf4j
|
||||
|
||||
implementation libs.notnull
|
||||
implementation libs.gson
|
||||
|
||||
testImplementation libs.bundles.slf4j.test
|
||||
testImplementation libs.bundles.junit
|
||||
testImplementation libs.mockito
|
||||
|
||||
}
|
@@ -1,58 +0,0 @@
|
||||
package nu.marginalia.feedlot;
|
||||
|
||||
import com.google.gson.Gson;
|
||||
import nu.marginalia.feedlot.model.FeedItems;
|
||||
|
||||
import java.net.URI;
|
||||
import java.net.http.HttpClient;
|
||||
import java.net.http.HttpRequest;
|
||||
import java.net.http.HttpResponse;
|
||||
import java.time.Duration;
|
||||
import java.util.concurrent.Executors;
|
||||
import java.util.concurrent.CompletableFuture;
|
||||
|
||||
public class FeedlotClient {
|
||||
private final String feedlotHost;
|
||||
private final int feedlotPort;
|
||||
private final Gson gson;
|
||||
private final HttpClient httpClient;
|
||||
private final Duration requestTimeout;
|
||||
|
||||
public FeedlotClient(String feedlotHost,
|
||||
int feedlotPort,
|
||||
Gson gson,
|
||||
Duration connectTimeout,
|
||||
Duration requestTimeout
|
||||
)
|
||||
{
|
||||
this.feedlotHost = feedlotHost;
|
||||
this.feedlotPort = feedlotPort;
|
||||
this.gson = gson;
|
||||
|
||||
httpClient = HttpClient.newBuilder()
|
||||
.executor(Executors.newCachedThreadPool())
|
||||
.connectTimeout(connectTimeout)
|
||||
.build();
|
||||
this.requestTimeout = requestTimeout;
|
||||
}
|
||||
|
||||
public CompletableFuture<FeedItems> getFeedItems(String domainName) {
|
||||
return httpClient.sendAsync(
|
||||
HttpRequest.newBuilder()
|
||||
.uri(URI.create("http://%s:%d/feed/%s".formatted(feedlotHost, feedlotPort, domainName)))
|
||||
.GET()
|
||||
.timeout(requestTimeout)
|
||||
.build(),
|
||||
HttpResponse.BodyHandlers.ofString()
|
||||
).thenApply(HttpResponse::body)
|
||||
.thenApply(this::parseFeedItems);
|
||||
}
|
||||
|
||||
private FeedItems parseFeedItems(String s) {
|
||||
return gson.fromJson(s, FeedItems.class);
|
||||
}
|
||||
|
||||
public void stop() {
|
||||
httpClient.close();
|
||||
}
|
||||
}
|
@@ -1,17 +0,0 @@
|
||||
package nu.marginalia.feedlot.model;
|
||||
|
||||
public record FeedItem(String title, String date, String description, String url) {
|
||||
|
||||
public String pubDay() { // Extract the date from an ISO style date string
|
||||
if (date.length() > 10) {
|
||||
return date.substring(0, 10);
|
||||
}
|
||||
return date;
|
||||
}
|
||||
|
||||
public String descriptionSafe() {
|
||||
return description
|
||||
.replace("<", "<")
|
||||
.replace(">", ">");
|
||||
}
|
||||
}
|
@@ -1,6 +0,0 @@
|
||||
package nu.marginalia.feedlot.model;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
public record FeedItems(String domain, String feedUrl, String updated, List<FeedItem> items) {
|
||||
}
|
@@ -1,20 +0,0 @@
|
||||
Client for [FeedlotTheFeedBot](https://github.com/MarginaliaSearch/FeedLotTheFeedBot),
|
||||
the RSS/Atom feed fetcher and cache for Marginalia Search.
|
||||
|
||||
This service is external to the Marginalia Search codebase,
|
||||
as it is not a core part of the search engine and has other
|
||||
utilities.
|
||||
|
||||
## Example
|
||||
|
||||
```java
|
||||
|
||||
import java.time.Duration;
|
||||
|
||||
var client = new FeedlotClient("localhost", 8080,
|
||||
gson,
|
||||
Duration.ofMillis(100), // connect timeout
|
||||
Duration.ofMillis(100)); // request timeout
|
||||
|
||||
CompleteableFuture<FeedItems> items = client.getFeedItems("www.marginalia.nu");
|
||||
```
|
@@ -6,4 +6,8 @@ public record BrowseResultSet(Collection<BrowseResult> results, String focusDoma
|
||||
public BrowseResultSet(Collection<BrowseResult> results) {
|
||||
this(results, "");
|
||||
}
|
||||
|
||||
public boolean hasFocusDomain() {
|
||||
return focusDomain != null && !focusDomain.isBlank();
|
||||
}
|
||||
}
|
||||
|
@@ -3,7 +3,6 @@ package nu.marginalia.screenshot;
|
||||
import com.google.common.base.Strings;
|
||||
import com.google.inject.Inject;
|
||||
import com.zaxxer.hikari.HikariDataSource;
|
||||
import lombok.SneakyThrows;
|
||||
import nu.marginalia.db.DbDomainQueries;
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.slf4j.Logger;
|
||||
@@ -11,6 +10,7 @@ import org.slf4j.LoggerFactory;
|
||||
import spark.Request;
|
||||
import spark.Response;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.sql.SQLException;
|
||||
|
||||
import static java.lang.Integer.parseInt;
|
||||
@@ -48,7 +48,6 @@ public class ScreenshotService {
|
||||
return false;
|
||||
}
|
||||
|
||||
@SneakyThrows
|
||||
public Object serveScreenshotRequest(Request request, Response response) {
|
||||
if (Strings.isNullOrEmpty(request.params("id"))) {
|
||||
response.redirect("https://search.marginalia.nu/");
|
||||
@@ -75,6 +74,9 @@ public class ScreenshotService {
|
||||
return "";
|
||||
}
|
||||
}
|
||||
catch (IOException ex) {
|
||||
logger.warn("IO error", ex);
|
||||
}
|
||||
catch (SQLException ex) {
|
||||
logger.warn("SQL error", ex);
|
||||
}
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user