1
1
mirror of https://github.com/MarginaliaSearch/MarginaliaSearch.git synced 2025-10-05 21:22:39 +02:00
Files
MarginaliaSearch/code/libraries/language-processing/test/nu/marginalia/language/config/LanguageConfigurationTest.java

73 lines
2.8 KiB
Java
Raw Normal View History

package nu.marginalia.language.config;
import it.unimi.dsi.fastutil.longs.LongList;
import nu.marginalia.language.filter.TestLanguageModels;
2025-08-21 12:45:14 +02:00
import nu.marginalia.language.pos.PosPattern;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
import org.xml.sax.SAXException;
import javax.xml.parsers.ParserConfigurationException;
import java.io.IOException;
import static org.junit.jupiter.api.Assertions.assertEquals;
public class LanguageConfigurationTest {
2025-08-21 12:45:14 +02:00
private static LanguageConfiguration languageConfiguration;
@BeforeAll
public static void setUpAll() throws IOException, SAXException, ParserConfigurationException {
languageConfiguration = new LanguageConfiguration(TestLanguageModels.getLanguageModels());
}
@Test
void testBasic() {
Assertions.assertNotNull(languageConfiguration.getLanguage("en"));
Assertions.assertNotNull(languageConfiguration.getLanguage("sv"));
Assertions.assertNotNull(languageConfiguration.getLanguage("xx"));
Assertions.assertNull(languageConfiguration.getLanguage("!!"));
}
@Test
public void testStemming() {
var svStemmer = languageConfiguration.getLanguage("sv").stemmer();
var enStemmer = languageConfiguration.getLanguage("en").stemmer();
Assertions.assertNotNull(svStemmer);
Assertions.assertNotNull(enStemmer);
assertEquals("bil", svStemmer.stem("bilar"));
assertEquals("dogged", svStemmer.stem("dogged"));
assertEquals("bilar", enStemmer.stem("bilar"));
assertEquals("dog", enStemmer.stem("dogged"));
}
@Test
public void testPosData() {
var svPos = languageConfiguration.getLanguage("sv").posTaggingData();
var enPos = languageConfiguration.getLanguage("en").posTaggingData();
Assertions.assertNotNull(svPos);
Assertions.assertNotNull(enPos);
System.out.println(enPos);
System.out.println(svPos);
Assertions.assertNotEquals(svPos.tags, enPos.tags);
}
2025-08-21 12:45:14 +02:00
@Test
public void testPosPattern() {
var enPos = languageConfiguration.getLanguage("en").posTaggingData();
System.out.println(new PosPattern(enPos.tags, "NNP").pattern);
2025-08-21 12:45:14 +02:00
System.out.println(new PosPattern(enPos.tags, "NNP").pattern);
System.out.println(new PosPattern(enPos.tags, "NNP NNPS").pattern);
System.out.println(new PosPattern(enPos.tags,"NNPS (NNPS DT) DT").pattern);
System.out.println(new PosPattern(enPos.tags, "(NNP NNPS) (NNP NNPS IN DT CC) (NNP NNPS IN DT CC) (NNP NNPS)").pattern);
assertEquals(new PosPattern(enPos.tags, "NNP*").pattern, new PosPattern(enPos.tags, "(NNP NNPS)").pattern);
assertEquals(LongList.of(0L), new PosPattern(enPos.tags, "Hello").pattern);
2025-08-21 12:45:14 +02:00
}
}