1
1
mirror of https://github.com/MarginaliaSearch/MarginaliaSearch.git synced 2025-10-05 21:22:39 +02:00
Files
MarginaliaSearch/code/functions/live-capture/java/nu/marginalia/rss/db/FeedDbReader.java
Viktor Lofgren 3bc99639a0 (feed-fetcher) Make feed fetcher requests conditional
Add `If-None-Match` and `If-Modified-Since` headers as appropriate to the feed fetcher's requests.  On well-configured web servers, this should short-circuit the request and reduce the amount of bandwidth and processing that is necessary.

A new table was added to the FeedDb to hold one etag per domain.

If-Modified-Since semantics are based on the creation date for the feed database, which should serve as a cutoff date for the earliest update we can have received.

This completes the changes for Issue #136.
2024-12-27 15:10:15 +01:00

177 lines
5.6 KiB
Java

package nu.marginalia.rss.db;
import com.google.gson.Gson;
import com.google.gson.GsonBuilder;
import nu.marginalia.model.EdgeDomain;
import nu.marginalia.rss.model.FeedDefinition;
import nu.marginalia.rss.model.FeedItems;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.annotation.Nullable;
import java.nio.file.Path;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.SQLException;
import java.time.Instant;
import java.util.*;
import java.util.function.BiConsumer;
public class FeedDbReader implements AutoCloseable {
private static final Logger logger = LoggerFactory.getLogger(FeedDbReader.class);
private static final Gson gson = new GsonBuilder().create();
private final Connection connection;
public FeedDbReader(Path dbPath) throws SQLException {
String dbUrl = "jdbc:sqlite:" + dbPath.toAbsolutePath();
logger.info("Opening feed db at " + dbUrl);
connection = DriverManager.getConnection(dbUrl);
// Create table if it doesn't exist to avoid errors before any feeds have been fetched
try (var stmt = connection.createStatement()) {
stmt.executeUpdate("CREATE TABLE IF NOT EXISTS feed (domain TEXT PRIMARY KEY, feed JSON)");
stmt.executeUpdate("CREATE TABLE IF NOT EXISTS errors (domain TEXT PRIMARY KEY, cnt INT DEFAULT 0)");
stmt.executeUpdate("CREATE TABLE IF NOT EXISTS etags (domain TEXT PRIMARY KEY, etag TEXT)");
}
}
public List<FeedDefinition> getAllFeeds() {
List<FeedDefinition> ret = new ArrayList<>();
try (var stmt = connection.createStatement()) {
var rs = stmt.executeQuery("""
select
json_extract(feed, '$.domain') as domain,
json_extract(feed, '$.feedUrl') as url,
json_extract(feed, '$.updated') as updated
from feed
""");
while (rs.next()) {
ret.add(new FeedDefinition(
rs.getString("domain"),
rs.getString("url"),
rs.getString("updated")));
}
} catch (SQLException e) {
logger.error("Error getting all feeds", e);
}
return ret;
}
public Optional<String> getFeedAsJson(String domain) {
try (var stmt = connection.prepareStatement("SELECT FEED FROM feed WHERE DOMAIN = ?")) {
stmt.setString(1, domain);
var rs = stmt.executeQuery();
if (rs.next()) {
return Optional.of(rs.getString(1));
}
} catch (SQLException e) {
logger.error("Error getting feed for " + domain, e);
}
return Optional.empty();
}
public Map<String, Integer> getAllErrorCounts() {
Map<String, Integer> ret = new HashMap<>(100_000);
try (var stmt = connection.prepareStatement("SELECT domain, cnt FROM errors")) {
var rs = stmt.executeQuery();
while (rs.next()) {
ret.put(rs.getString(1), rs.getInt(2));
}
} catch (SQLException e) {
logger.error("Error getting errors", e);
}
return ret;
}
public FeedItems getFeed(EdgeDomain domain) {
try (var stmt = connection.prepareStatement("SELECT FEED FROM feed WHERE DOMAIN = ?")) {
stmt.setString(1, domain.toString());
var rs = stmt.executeQuery();
if (rs.next()) {
return deserialize(rs.getString(1));
}
} catch (SQLException e) {
logger.error("Error getting feed for " + domain, e);
}
return FeedItems.none();
}
@Nullable
public String getEtag(EdgeDomain domain) {
try (var stmt = connection.prepareStatement("SELECT etag FROM etags WHERE DOMAIN = ?")) {
stmt.setString(1, domain.toString());
var rs = stmt.executeQuery();
if (rs.next()) {
return rs.getString(1);
}
} catch (SQLException e) {
logger.error("Error getting etag for " + domain, e);
}
return null;
}
private FeedItems deserialize(String string) {
return gson.fromJson(string, FeedItems.class);
}
@Override
public void close() throws SQLException {
connection.close();
}
public void getLinksUpdatedSince(Instant since, BiConsumer<String, List<String>> consumer) {
try (var stmt = connection.prepareStatement("SELECT FEED FROM feed")) {
var rs = stmt.executeQuery();
while (rs.next()) {
FeedItems items = deserialize(rs.getString(1));
List<String> urls = new ArrayList<>();
for (var item : items.items()) {
if (item.getUpdateTimeZD().toInstant().isAfter(since)) {
urls.add(item.url());
}
}
if (!urls.isEmpty()) {
consumer.accept(items.domain(), new ArrayList<>(urls));
urls.clear();
}
}
} catch (SQLException e) {
logger.error("Error getting updated links", e);
}
}
public boolean hasData() {
try (var stmt = connection.prepareStatement("SELECT 1 FROM feed LIMIT 1")) {
var rs = stmt.executeQuery();
if (rs.next()) {
return rs.getBoolean(1);
}
else {
return false;
}
}
catch (SQLException ex) {
return false;
}
}
}