2025-08-25 09:05:31 +02:00
|
|
|
package nu.marginalia.util;
|
2022-05-19 18:05:10 +02:00
|
|
|
|
2023-03-04 13:19:01 +01:00
|
|
|
import nu.marginalia.LanguageModels;
|
|
|
|
import nu.marginalia.WmsaHome;
|
2022-05-19 18:05:10 +02:00
|
|
|
|
|
|
|
import java.nio.file.Files;
|
|
|
|
import java.nio.file.Path;
|
|
|
|
import java.util.Optional;
|
|
|
|
|
|
|
|
public class TestLanguageModels {
|
2022-05-25 18:02:19 +02:00
|
|
|
private static final Path LANGUAGE_MODELS_DEFAULT = WmsaHome.getHomePath().resolve("model");
|
2022-05-19 18:05:10 +02:00
|
|
|
|
2022-05-25 18:02:19 +02:00
|
|
|
public static Path getLanguageModelsPath() {
|
2022-05-19 18:05:10 +02:00
|
|
|
final Path languageModelsHome = Optional.ofNullable(System.getenv("LANGUAGE_MODELS_HOME"))
|
|
|
|
.map(Path::of)
|
|
|
|
.orElse(LANGUAGE_MODELS_DEFAULT);
|
|
|
|
|
|
|
|
if (!Files.isDirectory(languageModelsHome)) {
|
|
|
|
throw new IllegalStateException("Could not find $LANGUAGE_MODELS_HOME, see doc/language-models.md");
|
|
|
|
}
|
2022-05-25 18:02:19 +02:00
|
|
|
return languageModelsHome;
|
|
|
|
}
|
|
|
|
|
|
|
|
public static LanguageModels getLanguageModels() {
|
|
|
|
|
|
|
|
var languageModelsHome = getLanguageModelsPath();
|
2022-05-19 18:05:10 +02:00
|
|
|
|
|
|
|
return new LanguageModels(
|
2023-01-08 11:11:44 +01:00
|
|
|
languageModelsHome.resolve("tfreq-new-algo3.bin"),
|
2022-05-25 18:02:19 +02:00
|
|
|
languageModelsHome.resolve("opennlp-sentence.bin"),
|
2022-05-19 18:05:10 +02:00
|
|
|
languageModelsHome.resolve("English.RDR"),
|
|
|
|
languageModelsHome.resolve("English.DICT"),
|
2024-03-19 10:33:29 +01:00
|
|
|
languageModelsHome.resolve("lid.176.ftz"),
|
|
|
|
languageModelsHome.resolve("segments.bin")
|
2022-05-19 18:05:10 +02:00
|
|
|
);
|
|
|
|
}
|
|
|
|
}
|