mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-10-05 21:22:39 +02:00
(language) Update documentation for the language processing function
This commit is contained in:
@@ -2,6 +2,7 @@ plugins {
|
||||
id 'java'
|
||||
id 'jvm-test-suite'
|
||||
id 'gg.jte.gradle' version '3.1.15'
|
||||
id 'application'
|
||||
}
|
||||
|
||||
java {
|
||||
@@ -9,6 +10,10 @@ java {
|
||||
languageVersion.set(JavaLanguageVersion.of(rootProject.ext.jvmVersion))
|
||||
}
|
||||
}
|
||||
application {
|
||||
mainClass = 'nu.marginalia.language.LanguageProcessingTool'
|
||||
applicationName = 'language-processing-tool'
|
||||
}
|
||||
|
||||
apply from: "$rootProject.projectDir/srcsets.gradle"
|
||||
|
||||
|
@@ -28,6 +28,7 @@ public class LanguageProcessingTool extends Jooby {
|
||||
private static final Logger logger = LoggerFactory.getLogger(LanguageProcessingTool.class);
|
||||
private final ThreadLocalSentenceExtractorProvider sentenceExtractorProvider;
|
||||
private final TermFrequencyDict termFrequencyDict;
|
||||
|
||||
static void main(String[] args) {
|
||||
Jooby.runApp(args, LanguageProcessingTool::new);
|
||||
}
|
||||
@@ -41,7 +42,14 @@ public class LanguageProcessingTool extends Jooby {
|
||||
new LanguageConfiguration(languageModels, new LanguageConfigLocation.Experimental()),
|
||||
languageModels
|
||||
);
|
||||
|
||||
// Depending on how the tool is started, we may be in the project root, or the module root;
|
||||
// so here's some guesswork to try to suss out which one it is...
|
||||
Path basePath = Path.of("code/functions/language-processing/").toAbsolutePath();
|
||||
if (!Files.exists(basePath)) {
|
||||
basePath = Path.of(".").toAbsolutePath();
|
||||
}
|
||||
|
||||
System.out.println("Base path: " + basePath);
|
||||
|
||||
if (Files.exists(basePath.resolve("resources/ltt/jte")))
|
||||
|
@@ -1,14 +1,23 @@
|
||||
# Language Processing
|
||||
|
||||
This library contains various tools used in language processing.
|
||||
This function gathers various tools used in language processing,
|
||||
keyword extraction, and so on.
|
||||
|
||||
## Language Processing Tool
|
||||
|
||||
It also houses a tool for inspecting the output of keyword extraction,
|
||||
which can be accessed by running the command below from the root of the project.
|
||||
The tool becomes accessible on port 8080.
|
||||
|
||||
```bash
|
||||
$ ./gradlew :code:functions:language-processing:run
|
||||
```
|
||||
|
||||
## Central Classes
|
||||
|
||||
* [SentenceExtractor](java/nu/marginalia/language/sentence/SentenceExtractor.java) -
|
||||
Creates a [DocumentLanguageData](java/nu/marginalia/language/model/DocumentLanguageData.java) from a text, containing
|
||||
its words, how they stem, POS tags, and so on.
|
||||
|
||||
## See Also
|
||||
|
||||
[converting-process/ft-keyword-extraction](../../processes/converting-process/ft-keyword-extraction) uses this code to identify which keywords
|
||||
are important.
|
||||
* [LanguageConfiguration](java/nu/marginalia/language/config/LanguageConfiguration.java) - parses langauge configuration xml files into LanguageDefinition objects
|
||||
* [LanguageDefinition](java/nu/marginalia/language/model/LanguageDefinition.java) - holds all per-language cusotmizations that are fed into the language processing pipeline
|
||||
* [DocumentKeywordExtractor](java/nu/marginalia/keyword/DocumentKeywordExtractor.java) - extracts keywords from documents
|
||||
|
Reference in New Issue
Block a user