diff --git a/code/common/config/java/nu/marginalia/WmsaHome.java b/code/common/config/java/nu/marginalia/WmsaHome.java
index 5ce8a910a..e815fd0c2 100644
--- a/code/common/config/java/nu/marginalia/WmsaHome.java
+++ b/code/common/config/java/nu/marginalia/WmsaHome.java
@@ -114,4 +114,7 @@ public class WmsaHome {
}
+ public static Path getLangugeConfig() {
+ return getHomePath().resolve("conf/languages.xml");
+ }
}
diff --git a/code/libraries/language-processing/java/nu/marginalia/language/config/LanguageConfiguration.java b/code/libraries/language-processing/java/nu/marginalia/language/config/LanguageConfiguration.java
index 11890e642..976d2d8b8 100644
--- a/code/libraries/language-processing/java/nu/marginalia/language/config/LanguageConfiguration.java
+++ b/code/libraries/language-processing/java/nu/marginalia/language/config/LanguageConfiguration.java
@@ -111,6 +111,19 @@ public class LanguageConfiguration {
logger.info("Loaded language configuration: {}", languages);
}
+ InputStream findLanguageConfiguration() throws IOException {
+ Path filesystemPath = WmsaHome.getLangugeConfig();
+ if (Files.exists(filesystemPath)) {
+ return Files.newInputStream(filesystemPath, StandardOpenOption.READ);
+ }
+ if (Boolean.getBoolean("language.experimental")) {
+ return ClassLoader.getSystemResourceAsStream("languages-experimental.xml");
+ }
+ else {
+ return ClassLoader.getSystemResourceAsStream("languages-default.xml");
+ }
+ }
+
private void parseLanguages(Document doc) {
NodeList languageNodes = doc.getElementsByTagName("language");
diff --git a/code/libraries/language-processing/resources/languages-experimental.xml b/code/libraries/language-processing/resources/languages-experimental.xml
new file mode 100644
index 000000000..d2268e3ac
--- /dev/null
+++ b/code/libraries/language-processing/resources/languages-experimental.xml
@@ -0,0 +1,135 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+]>
+
+
+
+
+
+ !(IN TO CC DT)
+
+
+
+
+
+ NNP*
+ NNP* NNP*
+ NNP* (NNP* IN DT CC) NNP*
+ NNP* (NNP* IN DT CC) (NNP* IN DT CC) NNP*
+
+
+ VBG
+ RB VBG
+ (NNP* JJ)
+ (NN* JJ) NN*
+ (NN* JJ) (NN* JJ) NN*
+ (NN* JJ) (NN* JJ) (NN* JJ) NN*
+ (NNP* JJ) (NNP* IN TO CC) NNP*
+ (NNP* JJ) (NNP* IN TO CC) DT NNP*
+ (NNP* JJ) (NNP* IN TO CC) (NNP* IN TO CC) NNP*
+
+
+ (VBD VBZ)
+ MD VB
+ VBZ DT
+ (DT RB VBD VBP VBN JJ*) (VBD VBG VBP VBN VBZ)
+
+
+ !(CC IN DT TO)
+ !CC !(IN DT TO)
+ !CC * !(IN DT TO)
+ !CC * * !(IN DT TO)
+
+
+
+ (N* VBG VBN JJ* R* VBG)
+
+ (N* VBG VBN JJ* R* VBG) (N* VBG VBN)
+ (N* VBG VBN) CD
+
+ (N* VBG VBN JJ* R* VBG) (N* VBG VBN JJ* R* VBG) (N* VBG VBN)
+ NNP* (IN TO CC NNP*) (N* VBG VBN)
+ (N* VBG VBN) (N* VBG VBN) CD
+
+ (N* VBG VBN JJ* R* VBG) (N* VBG VBN JJ* R* VBG) (N* VBG VBN JJ* R* VBG) (N* VBG VBN)
+ NNP* (DT IN TO CC) (IN TO CC) NNP*
+
+
+
+
+
+
+
+
+
+
+ PROPN
+ PROPN PROPN
+ PROPN PROPN PROPN
+ PROPN PROPN PROPN PROPN
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/code/libraries/language-processing/resources/languages.xml b/code/libraries/language-processing/resources/languages.xml
index d2268e3ac..a3255e477 100644
--- a/code/libraries/language-processing/resources/languages.xml
+++ b/code/libraries/language-processing/resources/languages.xml
@@ -101,35 +101,9 @@
(N* VBG VBN JJ* R* VBG) (N* VBG VBN JJ* R* VBG) (N* VBG VBN JJ* R* VBG) (N* VBG VBN)
NNP* (DT IN TO CC) (IN TO CC) NNP*
+
-
-
-
-
-
-
-
-
- PROPN
- PROPN PROPN
- PROPN PROPN PROPN
- PROPN PROPN PROPN PROPN
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file