mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-10-05 21:22:39 +02:00
Compare commits
8 Commits
deploy-018
...
deploy-019
Author | SHA1 | Date | |
---|---|---|---|
|
4edc0d3267 | ||
|
890f521d0d | ||
|
b1814a30f7 | ||
|
f59a9eb025 | ||
|
599534806b | ||
|
7e8253dac7 | ||
|
97a6780ea3 | ||
|
eb634beec8 |
@@ -112,14 +112,6 @@ public class EdgeDomain implements Serializable {
|
||||
return topDomain;
|
||||
}
|
||||
|
||||
public String getDomainKey() {
|
||||
int cutPoint = topDomain.indexOf('.');
|
||||
if (cutPoint < 0) {
|
||||
return topDomain;
|
||||
}
|
||||
return topDomain.substring(0, cutPoint).toLowerCase();
|
||||
}
|
||||
|
||||
/** If possible, try to provide an alias domain,
|
||||
* i.e. a domain name that is very likely to link to this one
|
||||
* */
|
||||
|
@@ -8,14 +8,6 @@ import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
|
||||
class EdgeDomainTest {
|
||||
|
||||
@Test
|
||||
public void testSkepdic() throws URISyntaxException {
|
||||
var domain = new EdgeUrl("http://www.skepdic.com/astrology.html");
|
||||
assertEquals("skepdic", domain.getDomain().getDomainKey());
|
||||
var domain2 = new EdgeUrl("http://skepdic.com/astrology.html");
|
||||
assertEquals("skepdic", domain2.getDomain().getDomainKey());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHkDomain() throws URISyntaxException {
|
||||
var domain = new EdgeUrl("http://l7072i3.l7c.net");
|
||||
|
@@ -84,7 +84,7 @@ public class ForwardIndexConverter {
|
||||
|
||||
LongArray docFileData = LongArrayFactory.mmapForWritingConfined(outputFileDocsData, ForwardIndexParameters.ENTRY_SIZE * docsFileId.size());
|
||||
|
||||
ByteBuffer workArea = ByteBuffer.allocate(65536);
|
||||
ByteBuffer workArea = ByteBuffer.allocate(1024*1024*100);
|
||||
for (var instance : journal.pages()) {
|
||||
try (var slopTable = new SlopTable(instance.baseDir(), instance.page()))
|
||||
{
|
||||
|
@@ -2002,12 +2002,11 @@ public class HeadingAwarePDFTextStripper extends LegacyPDFStreamEngine
|
||||
float minFontWeight = Integer.MAX_VALUE;
|
||||
for (var word : line)
|
||||
{
|
||||
int i = 0;
|
||||
for (var textPosition : word.getTextPositions())
|
||||
{
|
||||
if (word.text.charAt(i++) == ' ') {
|
||||
continue;
|
||||
}
|
||||
// Skip empty text positions as they may have a different font
|
||||
if (word.text.isBlank()) continue;
|
||||
|
||||
var font = textPosition.getFont();
|
||||
if (font == null) continue;
|
||||
var descriptor = font.getFontDescriptor();
|
||||
|
@@ -61,7 +61,7 @@ public class UrlDeduplicator {
|
||||
|
||||
private boolean limitResultsPerDomain(DecoratedSearchResultItem details) {
|
||||
final var domain = details.getUrl().getDomain();
|
||||
final String key = domain.getDomainKey();
|
||||
final String key = domain.toString();
|
||||
|
||||
return keyCount.adjustOrPutValue(key, 1, 1) <= resultsPerKey;
|
||||
}
|
||||
|
@@ -25,6 +25,7 @@ public class UrlDeduplicator {
|
||||
}
|
||||
|
||||
public boolean shouldRemove(DecoratedSearchResultItem details) {
|
||||
|
||||
if (!deduplicateOnSuperficialHash(details))
|
||||
return true;
|
||||
if (!deduplicateOnLSH(details))
|
||||
@@ -61,7 +62,7 @@ public class UrlDeduplicator {
|
||||
|
||||
private boolean limitResultsPerDomain(DecoratedSearchResultItem details) {
|
||||
final var domain = details.getUrl().getDomain();
|
||||
final String key = domain.getDomainKey();
|
||||
final String key = domain.toString();
|
||||
|
||||
return keyCount.adjustOrPutValue(key, 1, 1) <= resultsPerKey;
|
||||
}
|
||||
|
@@ -7,4 +7,5 @@
|
||||
2025-05-04: Deploy qs, search and api-services.
|
||||
2025-05-05: Deploy executor partition 4.
|
||||
2025-05-05: Deploy control.
|
||||
2025-05-08: Deploy assistant.
|
||||
2025-05-08: Deploy assistant.
|
||||
2025-05-17: Redeploy all.
|
Reference in New Issue
Block a user