1
1
mirror of https://github.com/MarginaliaSearch/MarginaliaSearch.git synced 2025-10-05 21:22:39 +02:00

(setup) Remove OpenNLP tokenization model

This update eliminates all occurrences of the OpenNLP token model from the setup script, configuration, and test files, as this model file is no longer used.
This commit is contained in:
Viktor Lofgren
2024-11-28 16:03:05 +01:00
parent 5fdd2c71f8
commit fdc3efa250
8 changed files with 2 additions and 18 deletions

View File

@@ -8,7 +8,6 @@ public class LanguageModels {
public final Path openNLPSentenceDetectionData;
public final Path posRules;
public final Path posDict;
public final Path openNLPTokenData;
public final Path fasttextLanguageModel;
public final Path segments;
@@ -16,14 +15,12 @@ public class LanguageModels {
Path openNLPSentenceDetectionData,
Path posRules,
Path posDict,
Path openNLPTokenData,
Path fasttextLanguageModel,
Path segments) {
this.termFrequencies = termFrequencies;
this.openNLPSentenceDetectionData = openNLPSentenceDetectionData;
this.posRules = posRules;
this.posDict = posDict;
this.openNLPTokenData = openNLPTokenData;
this.fasttextLanguageModel = fasttextLanguageModel;
this.segments = segments;
}
@@ -37,7 +34,6 @@ public class LanguageModels {
private Path openNLPSentenceDetectionData;
private Path posRules;
private Path posDict;
private Path openNLPTokenData;
private Path fasttextLanguageModel;
private Path segments;
@@ -64,11 +60,6 @@ public class LanguageModels {
return this;
}
public LanguageModelsBuilder openNLPTokenData(Path openNLPTokenData) {
this.openNLPTokenData = openNLPTokenData;
return this;
}
public LanguageModelsBuilder fasttextLanguageModel(Path fasttextLanguageModel) {
this.fasttextLanguageModel = fasttextLanguageModel;
return this;
@@ -80,11 +71,11 @@ public class LanguageModels {
}
public LanguageModels build() {
return new LanguageModels(this.termFrequencies, this.openNLPSentenceDetectionData, this.posRules, this.posDict, this.openNLPTokenData, this.fasttextLanguageModel, this.segments);
return new LanguageModels(this.termFrequencies, this.openNLPSentenceDetectionData, this.posRules, this.posDict, this.fasttextLanguageModel, this.segments);
}
public String toString() {
return "LanguageModels.LanguageModelsBuilder(termFrequencies=" + this.termFrequencies + ", openNLPSentenceDetectionData=" + this.openNLPSentenceDetectionData + ", posRules=" + this.posRules + ", posDict=" + this.posDict + ", openNLPTokenData=" + this.openNLPTokenData + ", fasttextLanguageModel=" + this.fasttextLanguageModel + ", segments=" + this.segments + ")";
return "LanguageModels.LanguageModelsBuilder(termFrequencies=" + this.termFrequencies + ", openNLPSentenceDetectionData=" + this.openNLPSentenceDetectionData + ", posRules=" + this.posRules + ", posDict=" + this.posDict + ", fasttextLanguageModel=" + this.fasttextLanguageModel + ", segments=" + this.segments + ")";
}
}
}

View File

@@ -104,7 +104,6 @@ public class WmsaHome {
home.resolve("model/opennlp-sentence.bin"),
home.resolve("model/English.RDR"),
home.resolve("model/English.DICT"),
home.resolve("model/opennlp-tok.bin"),
home.resolve("model/lid.176.ftz"),
home.resolve("model/segments.bin")
);

View File

@@ -30,7 +30,6 @@ public class TestLanguageModels {
languageModelsHome.resolve("opennlp-sentence.bin"),
languageModelsHome.resolve("English.RDR"),
languageModelsHome.resolve("English.DICT"),
languageModelsHome.resolve("opennlp-tokens.bin"),
languageModelsHome.resolve("lid.176.ftz"),
languageModelsHome.resolve("segments.bin")
);

View File

@@ -30,7 +30,6 @@ public class TestLanguageModels {
languageModelsHome.resolve("opennlp-sentence.bin"),
languageModelsHome.resolve("English.RDR"),
languageModelsHome.resolve("English.DICT"),
languageModelsHome.resolve("opennlp-tokens.bin"),
languageModelsHome.resolve("lid.176.ftz"),
languageModelsHome.resolve("segments.bin")
);

View File

@@ -30,7 +30,6 @@ public class TestLanguageModels {
languageModelsHome.resolve("opennlp-sentence.bin"),
languageModelsHome.resolve("English.RDR"),
languageModelsHome.resolve("English.DICT"),
languageModelsHome.resolve("opennlp-tokens.bin"),
languageModelsHome.resolve("lid.176.ftz"),
languageModelsHome.resolve("segments.bin")
);

View File

@@ -30,7 +30,6 @@ public class TestLanguageModels {
languageModelsHome.resolve("opennlp-sentence.bin"),
languageModelsHome.resolve("English.RDR"),
languageModelsHome.resolve("English.DICT"),
languageModelsHome.resolve("opennlp-tokens.bin"),
languageModelsHome.resolve("lid.176.ftz"),
languageModelsHome.resolve("segments.bin")
);

View File

@@ -30,7 +30,6 @@ public class TestLanguageModels {
languageModelsHome.resolve("opennlp-sentence.bin"),
languageModelsHome.resolve("English.RDR"),
languageModelsHome.resolve("English.DICT"),
languageModelsHome.resolve("opennlp-tokens.bin"),
languageModelsHome.resolve("lid.176.ftz"),
languageModelsHome.resolve("segments.bin")
);

View File

@@ -68,7 +68,6 @@ download_model model/English.DICT https://raw.githubusercontent.com/datquocnguye
download_model model/English.RDR https://raw.githubusercontent.com/datquocnguyen/RDRPOSTagger/e0fa60db14eae90b66dc67691f0f519eb19e3e66/Models/POS/English.RDR bec40a1160e12c33a1dd0563677104e4
download_model model/opennlp-sentence.bin https://archive.apache.org/dist/opennlp/models/ud-models-1.0/opennlp-en-ud-ewt-sentence-1.0-1.9.3.bin 5965ada99a2ca77beb8632bb47741b7a
download_model model/opennlp-tokens.bin https://archive.apache.org/dist/opennlp/models/ud-models-1.0/opennlp-en-ud-ewt-tokens-1.0-1.9.3.bin f097e14bce9edb3f558f6aaf2c3f7622
download_model model/segments.bin https://huggingface.co/MarginaliaNu/MarginaliaModelData/resolve/c9339e4224f1dfad7f628809c32687e748198ae3/segments.bin?download=true a2650796c77968b1bd9db0d7c01e3150
download_model model/tfreq-new-algo3.bin https://huggingface.co/MarginaliaNu/MarginaliaModelData/resolve/c9339e4224f1dfad7f628809c32687e748198ae3/tfreq-new-algo3.bin?download=true a38f0809f983723001dfc784d88ebb6d