1
1
mirror of https://github.com/MarginaliaSearch/MarginaliaSearch.git synced 2025-10-05 21:22:39 +02:00

(language) Fix keyword pattern matching unicode handling

This commit is contained in:
Viktor Lofgren
2025-09-22 10:27:46 +02:00
parent 5b2bec6144
commit e9ed0c5669

View File

@@ -164,12 +164,13 @@ public class DocumentPositionMapper {
int i = 0;
for (int run = 0; run < 15 && i < s.length(); run++) {
int cp = s.charAt(i);
int cp = s.codePointAt(i);
i += Character.charCount(cp);
if (Character.isAlphabetic(cp)) continue;
if (Character.isDigit(cp)) continue;
if (Character.isAlphabetic(cp) || Character.isDigit(cp)) {
i += Character.charCount(cp);
continue;
}
break;
}
@@ -180,19 +181,21 @@ public class DocumentPositionMapper {
for (int j = 0; j < 8; j++) {
if (i == s.length()) return true;
if (wordPartSeparator.indexOf(s.charAt(i)) < 0) {
if (wordPartSeparator.indexOf(s.codePointAt(i)) < 0) {
return false;
}
i++;
for (int run = 0; run < 10 && i < s.length(); run++) {
int cp = s.charAt(i);
int cp = s.codePointAt(i);
i += Character.charCount(cp);
if (Character.isAlphabetic(cp) || Character.isDigit(cp)) {
i += Character.charCount(cp);
continue;
}
if (Character.isAlphabetic(cp)) continue;
if (Character.isDigit(cp)) continue;
break;
}
}