1
1
mirror of https://github.com/MarginaliaSearch/MarginaliaSearch.git synced 2025-10-05 21:22:39 +02:00

(language) Basic test for LanguageConfiguration

This commit is contained in:
Viktor Lofgren
2025-08-19 15:55:54 +02:00
parent de67006c4f
commit f682425594
2 changed files with 44 additions and 1 deletions

View File

@@ -86,7 +86,7 @@ public class LanguageConfiguration {
public Optional<LanguageDefinition> identifyLanguage(org.jsoup.nodes.Document jsoupDoc) {
StringBuilder sampleBuilder = new StringBuilder();
jsoupDoc.body().traverse((node, depth) -> {
jsoupDoc.body().traverse((node, _) -> {
if (sampleBuilder.length() > 4096) return;
if (!(node instanceof TextNode tn)) return;

View File

@@ -0,0 +1,43 @@
package nu.marginalia.language.config;
import nu.marginalia.language.filter.TestLanguageModels;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
import org.xml.sax.SAXException;
import javax.xml.parsers.ParserConfigurationException;
import java.io.IOException;
import static org.junit.jupiter.api.Assertions.assertEquals;
public class LanguageConfigurationTest {
static LanguageConfiguration languageConfiguration;
@BeforeAll
public static void setUpAll() throws IOException, SAXException, ParserConfigurationException {
languageConfiguration = new LanguageConfiguration(TestLanguageModels.getLanguageModels());
}
@Test
void testBasic() {
Assertions.assertNotNull(languageConfiguration.getLanguage("en"));
Assertions.assertNotNull(languageConfiguration.getLanguage("sv"));
Assertions.assertNotNull(languageConfiguration.getLanguage("xx"));
Assertions.assertNull(languageConfiguration.getLanguage("!!"));
}
@Test
public void testStemming() {
var svStemmer = languageConfiguration.getLanguage("sv").stemmer();
var enStemmer = languageConfiguration.getLanguage("en").stemmer();
Assertions.assertNotNull(svStemmer);
Assertions.assertNotNull(enStemmer);
assertEquals("bil", svStemmer.stem("bilar"));
assertEquals("dogged", svStemmer.stem("dogged"));
assertEquals("bilar", enStemmer.stem("bilar"));
assertEquals("dog", enStemmer.stem("dogged"));
}
}