1
1
mirror of https://github.com/MarginaliaSearch/MarginaliaSearch.git synced 2025-10-05 21:22:39 +02:00

(index) Update verbatim match handling to account for matches that span multiple tags

This commit is contained in:
Viktor Lofgren
2025-09-24 15:43:00 +02:00
parent 32394f42b9
commit 708caa8791

View File

@@ -684,22 +684,24 @@ public class IndexResultRankingService {
PhraseConstraintGroupList.PhraseConstraintGroup fullGroup = constraints.getFullGroup();
IntList fullGroupIntersections = fullGroup.findIntersections(positions);
int totalFullCnts = 0;
if (!fullGroupIntersections.isEmpty()) {
int totalFullCnts = 0;
// Capture full query matches
for (var tag : HtmlTag.includedTags) {
int cnts = spans.getSpan(tag).countRangeMatches(fullGroupIntersections, fullGroup.size);
if (cnts > 0) {
matches.set(tag.ordinal());
score += (float) (weights_full[tag.ordinal()] * fullGroup.size + (1 + Math.log(2 + cnts)));
totalFullCnts += cnts;
// Capture full query matches
for (var tag : HtmlTag.includedTags) {
int cnts = spans.getSpan(tag).countRangeMatches(fullGroupIntersections, fullGroup.size);
if (cnts > 0) {
matches.set(tag.ordinal());
score += (float) (weights_full[tag.ordinal()] * fullGroup.size + (1 + Math.log(2 + cnts)));
totalFullCnts += cnts;
}
}
}
// Handle matches that span multiple tags; treat them as BODY matches
if (totalFullCnts != fullGroupIntersections.size()) {
int mixedCnts = fullGroupIntersections.size() - totalFullCnts;
score += (float) (weights_full[HtmlTag.BODY.ordinal()] * fullGroup.size * (1 + Math.log(2 + mixedCnts)));
// Handle matches that span multiple tags; treat them as BODY matches
if (totalFullCnts != fullGroupIntersections.size()) {
int mixedCnts = fullGroupIntersections.size() - totalFullCnts;
score += (float) (weights_full[HtmlTag.BODY.ordinal()] * fullGroup.size * (1 + Math.log(2 + mixedCnts)));
}
}
/**