mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-10-05 21:22:39 +02:00
It's long surpassed the single-responsibility library it once was, and is as such out of place in its original location, and fits better among the function-type modules.
87 lines
2.7 KiB
Java
87 lines
2.7 KiB
Java
package nu.marginalia.language.encoding;
|
|
|
|
public enum UnicodeRanges {
|
|
GREEK(false, 0x0370,0x03FF),
|
|
CYRILLIC(false, 0x0400,0x04FF),
|
|
CYRILLIC2(false, 0x0500,0x052F),
|
|
ARMENIAN(false, 0x0530,0x058F),
|
|
HEBREW(false, 0x0590,0x05FF),
|
|
ARABIC(false, 0x0600,0x06FF),
|
|
SYRIAC(false, 0x0700,0x074F),
|
|
THAANA(false, 0x0780,0x07BF),
|
|
DEVANAGARI(false, 0x0900,0x097F),
|
|
BENGALI(false, 0x0980,0x09FF),
|
|
GURMUKHI(false, 0x0A00,0x0A7F),
|
|
GUJARATI(false, 0x0A80,0x0AFF),
|
|
ORIYA(false, 0x0B00,0x0B7F),
|
|
TAMIL(false, 0x0B80,0x0BFF),
|
|
TELUGU(false, 0x0C00,0x0C7F),
|
|
KANNADA(false, 0x0C80,0x0CFF),
|
|
MALAYALAM(false, 0x0D00,0x0D7F),
|
|
SINHALA(false, 0x0D80,0x0DFF),
|
|
THAI(false, 0x0E00,0x0E7F),
|
|
LAO(false, 0x0E80,0x0EFF),
|
|
TIBETAN(false, 0x0F00,0x0FFF),
|
|
MYANMAR(false, 0x1000,0x109F),
|
|
GEORGIAN(false, 0x10A0,0x10FF),
|
|
HANGUL(false, 0x1100,0x11FF),
|
|
ETHIOPIC(false, 0x1200,0x137F),
|
|
CHEROKEE(false, 0x13A0,0x13FF),
|
|
ABORIGINAL(false, 0x1400,0x167F),
|
|
OGHAM(false, 0x1680,0x169F),
|
|
RUNIC(false, 0x16A0,0x16FF),
|
|
TAGALOG(false, 0x1700,0x171F),
|
|
HANUNOO(false, 0x1720,0x173F),
|
|
BUHID(false, 0x1740,0x175F),
|
|
TAGBANWA(false, 0x1760,0x177F),
|
|
KHMER(false, 0x1780,0x17FF),
|
|
MONGOLIAN(false, 0x1800,0x18AF),
|
|
LIMBU(false, 0x1900,0x194F),
|
|
TAILE(false, 0x1950,0x197F),
|
|
KHMER2(false, 0x19E0,0x19FF),
|
|
CJKRADICALS(true,0x2E80,0x2EFF),
|
|
KANGXIRADICALS(true, 0x2F00,0x2FDF),
|
|
IDEOGRAPHICDESCRIPTION(true,0x2FF0,0x2FFF),
|
|
CJKSYMBOLS(true, 0x3000,0x303F),
|
|
HIRAGANA(true,0x3040,0x309F),
|
|
KATAKANA(true,0x30A0,0x30FF),
|
|
BOPOMOFO(true,0x3100,0x312F),
|
|
HANGULJAMO(true,0x3130,0x318F),
|
|
KANBUN(true,0x3190,0x319F),
|
|
BOPOMOFOEXTENDED(true,0x31A0,0x31BF),
|
|
KATAKANAPHONETIC(true, 0x31F0,0x31FF),
|
|
ENCLOSEDCJK(true, 0x3200,0x32FF),
|
|
CJKCOMPATIBILITY(true,0x3300,0x33FF),
|
|
CJKUNIFIEDA(true,0x3400,0x4DBF),
|
|
YIJINGHEXAGRAMSYMBOLS(true,0x4DC0,0x4DFF),
|
|
CJKUNIFIEDIDEOGRAPHS(true,0x4E00,0x9FFF),
|
|
YISYLLABLES(true,0xA000,0xA48F),
|
|
YIRADICALS(true, 0xA490,0xA4CF),
|
|
HANGULSYLLABLES(true, 0xAC00,0xD7AF)
|
|
;
|
|
final int min;
|
|
final int max;
|
|
final boolean sensitive;
|
|
UnicodeRanges(boolean sensitive, int min, int max) {
|
|
this.sensitive = sensitive;
|
|
this.min = min;
|
|
this.max = max;
|
|
}
|
|
|
|
public boolean test(String text) {
|
|
int count = 0;
|
|
int max = sensitive ? 15 : 100;
|
|
|
|
for (int i = 0; i < Math.min(2000, text.length()); i++) {
|
|
char c = text.charAt(i);
|
|
if (c >= min && c <= this.max) {
|
|
if (count++ > max) {
|
|
return true;
|
|
}
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
}
|