mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-10-05 21:22:39 +02:00
(language) Basic test for LanguageConfiguration
This commit is contained in:
@@ -86,7 +86,7 @@ public class LanguageConfiguration {
|
||||
|
||||
public Optional<LanguageDefinition> identifyLanguage(org.jsoup.nodes.Document jsoupDoc) {
|
||||
StringBuilder sampleBuilder = new StringBuilder();
|
||||
jsoupDoc.body().traverse((node, depth) -> {
|
||||
jsoupDoc.body().traverse((node, _) -> {
|
||||
if (sampleBuilder.length() > 4096) return;
|
||||
if (!(node instanceof TextNode tn)) return;
|
||||
|
||||
|
@@ -0,0 +1,43 @@
|
||||
package nu.marginalia.language.config;
|
||||
|
||||
import nu.marginalia.language.filter.TestLanguageModels;
|
||||
import org.junit.jupiter.api.Assertions;
|
||||
import org.junit.jupiter.api.BeforeAll;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.xml.sax.SAXException;
|
||||
|
||||
import javax.xml.parsers.ParserConfigurationException;
|
||||
import java.io.IOException;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
|
||||
public class LanguageConfigurationTest {
|
||||
static LanguageConfiguration languageConfiguration;
|
||||
|
||||
@BeforeAll
|
||||
public static void setUpAll() throws IOException, SAXException, ParserConfigurationException {
|
||||
languageConfiguration = new LanguageConfiguration(TestLanguageModels.getLanguageModels());
|
||||
}
|
||||
|
||||
@Test
|
||||
void testBasic() {
|
||||
Assertions.assertNotNull(languageConfiguration.getLanguage("en"));
|
||||
Assertions.assertNotNull(languageConfiguration.getLanguage("sv"));
|
||||
Assertions.assertNotNull(languageConfiguration.getLanguage("xx"));
|
||||
Assertions.assertNull(languageConfiguration.getLanguage("!!"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testStemming() {
|
||||
var svStemmer = languageConfiguration.getLanguage("sv").stemmer();
|
||||
var enStemmer = languageConfiguration.getLanguage("en").stemmer();
|
||||
|
||||
Assertions.assertNotNull(svStemmer);
|
||||
Assertions.assertNotNull(enStemmer);
|
||||
|
||||
assertEquals("bil", svStemmer.stem("bilar"));
|
||||
assertEquals("dogged", svStemmer.stem("dogged"));
|
||||
assertEquals("bilar", enStemmer.stem("bilar"));
|
||||
assertEquals("dog", enStemmer.stem("dogged"));
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user