diff --git a/app/server/.gitignore b/app/server/.gitignore
index 09331c6..eebc7cd 100644
--- a/app/server/.gitignore
+++ b/app/server/.gitignore
@@ -110,4 +110,5 @@ fabric.properties
# Android studio 3.1+ serialized cache file
.idea/caches/build_file_checksums.ser
-lucene/
\ No newline at end of file
+lucene/
+feed_cache/
\ No newline at end of file
diff --git a/app/server/huntly-server/pom.xml b/app/server/huntly-server/pom.xml
index f7d860a..556f442 100644
--- a/app/server/huntly-server/pom.xml
+++ b/app/server/huntly-server/pom.xml
@@ -146,6 +146,11 @@
cn.shenyanchao.ik-analyzer
ik-analyzer
+
+ com.squareup.okhttp3
+ okhttp
+ 4.12.0
+
org.springframework.boot
diff --git a/app/server/huntly-server/src/main/java/com/huntly/server/connector/rss/FeedUtils.java b/app/server/huntly-server/src/main/java/com/huntly/server/connector/rss/FeedUtils.java
index e56ced8..31704db 100644
--- a/app/server/huntly-server/src/main/java/com/huntly/server/connector/rss/FeedUtils.java
+++ b/app/server/huntly-server/src/main/java/com/huntly/server/connector/rss/FeedUtils.java
@@ -8,55 +8,76 @@
import com.rometools.rome.io.FeedException;
import com.rometools.rome.io.SyndFeedInput;
import lombok.experimental.UtilityClass;
+import okhttp3.OkHttpClient;
+import okhttp3.Request;
+import okhttp3.Response;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.commons.lang3.StringUtils;
import java.io.IOException;
import java.io.StringReader;
-import java.net.URI;
-import java.net.http.HttpClient;
-import java.net.http.HttpRequest;
-import java.net.http.HttpResponse;
import java.nio.charset.Charset;
-import java.time.Duration;
/**
* Utility methods related to feed handling
- * code from commafeed project
*/
@UtilityClass
public class FeedUtils {
- public static SyndFeed parseFeedUrl(String feedUrl, HttpClient client) {
- HttpRequest request = HttpRequest.newBuilder().GET().uri(URI.create(feedUrl))
+ public static SyndFeed parseFeedUrl(String feedUrl, OkHttpClient client) {
+ Request request = new Request.Builder()
+ .url(feedUrl)
.build();
- HttpResponse response = null;
- try {
- response = client.send(request, HttpResponse.BodyHandlers.ofByteArray());
+ try(Response response = client.newCall(request).execute()) {
+ byte[] xmlBytes = null;
+ if (response.body() == null) {
+ throw new ConnectorFetchException("xml response null for url: " + feedUrl);
+ }
+
+ xmlBytes = response.body().bytes();
+ Charset encoding = FeedUtils.guessEncoding(xmlBytes);
+ String xmlString = XmlUtils.removeInvalidXmlCharacters(new String(xmlBytes, encoding));
+ if (xmlString == null) {
+ throw new ConnectorFetchException("xml fetch failed for url: " + feedUrl);
+ }
+ return new SyndFeedInput().build(new StringReader(xmlString));
} catch (IOException e) {
throw new RuntimeException(e);
- } catch (InterruptedException e) {
- throw new RuntimeException(e);
- }
- var xmlBytes = response.body();
- Charset encoding = FeedUtils.guessEncoding(xmlBytes);
- String xmlString = XmlUtils.removeInvalidXmlCharacters(new String(xmlBytes, encoding));
- if (xmlString == null) {
- throw new ConnectorFetchException("xml fetch failed for url: " + feedUrl);
- }
-
- try {
- SyndFeed feed = new SyndFeedInput().build(new StringReader(xmlString));
- return feed;
} catch (FeedException e) {
throw new RuntimeException(e);
}
}
- public static SyndFeed parseFeedUrl(String feedUrl) {
- var client = HttpClient.newBuilder().connectTimeout(Duration.ofSeconds(60))
- .followRedirects(HttpClient.Redirect.ALWAYS).build();
- return parseFeedUrl(feedUrl, client);
- }
+// public static SyndFeed parseFeedUrl(String feedUrl, HttpClient client) {
+// HttpRequest request = HttpRequest.newBuilder().GET().uri(URI.create(feedUrl))
+// .build();
+// HttpResponse response = null;
+// try {
+// response = client.send(request, HttpResponse.BodyHandlers.ofByteArray());
+// } catch (IOException e) {
+// throw new RuntimeException(e);
+// } catch (InterruptedException e) {
+// throw new RuntimeException(e);
+// }
+// var xmlBytes = response.body();
+// Charset encoding = FeedUtils.guessEncoding(xmlBytes);
+// String xmlString = XmlUtils.removeInvalidXmlCharacters(new String(xmlBytes, encoding));
+// if (xmlString == null) {
+// throw new ConnectorFetchException("xml fetch failed for url: " + feedUrl);
+// }
+//
+// try {
+// SyndFeed feed = new SyndFeedInput().build(new StringReader(xmlString));
+// return feed;
+// } catch (FeedException e) {
+// throw new RuntimeException(e);
+// }
+// }
+
+// public static SyndFeed parseFeedUrl(String feedUrl) {
+// var client = HttpClient.newBuilder().connectTimeout(Duration.ofSeconds(60))
+// .followRedirects(HttpClient.Redirect.ALWAYS).build();
+// return parseFeedUrl(feedUrl, client);
+// }
public static Charset guessEncoding(byte[] bytes) {
String extracted = extractDeclaredEncoding(bytes);
diff --git a/app/server/huntly-server/src/main/java/com/huntly/server/connector/rss/RSSConnector.java b/app/server/huntly-server/src/main/java/com/huntly/server/connector/rss/RSSConnector.java
index df71870..03ca261 100644
--- a/app/server/huntly-server/src/main/java/com/huntly/server/connector/rss/RSSConnector.java
+++ b/app/server/huntly-server/src/main/java/com/huntly/server/connector/rss/RSSConnector.java
@@ -5,12 +5,14 @@
import com.huntly.server.connector.ConnectorProperties;
import com.huntly.server.connector.InfoConnector;
import com.huntly.server.domain.exceptions.ConnectorFetchException;
+import com.huntly.server.util.HttpUtils;
import com.huntly.server.util.SiteUtils;
import com.rometools.rome.feed.synd.SyndCategory;
import com.rometools.rome.feed.synd.SyndContent;
import com.rometools.rome.feed.synd.SyndEntry;
import com.rometools.rome.feed.synd.SyndFeed;
import lombok.extern.slf4j.Slf4j;
+import okhttp3.OkHttpClient;
import org.apache.commons.lang3.ObjectUtils;
import org.apache.commons.lang3.StringUtils;
@@ -28,18 +30,16 @@
public class RSSConnector extends InfoConnector {
private final ConnectorProperties connectorProperties;
+ private final OkHttpClient okClient;
+
private final HttpClient client;
public RSSConnector(ConnectorProperties connectorProperties) {
this.connectorProperties = connectorProperties;
+ this.okClient = HttpUtils.buildFeedOkHttpClient(connectorProperties.getProxySetting());
this.client = buildHttpClient(connectorProperties);
}
- public RSSConnector(ConnectorProperties connectorProperties, HttpClient httpClient) {
- this.connectorProperties = connectorProperties;
- this.client = httpClient;
- }
-
@Override
public List fetchAllPages() {
return fetchNewestPages();
@@ -52,7 +52,7 @@ public List fetchNewestPages() {
}
try {
- SyndFeed feed = FeedUtils.parseFeedUrl(connectorProperties.getSubscribeUrl(), client);
+ SyndFeed feed = FeedUtils.parseFeedUrl(connectorProperties.getSubscribeUrl(), okClient);
var entries = feed.getEntries();
List pages = new ArrayList<>();
for (var entry : entries) {
diff --git a/app/server/huntly-server/src/main/java/com/huntly/server/domain/constant/AppConstants.java b/app/server/huntly-server/src/main/java/com/huntly/server/domain/constant/AppConstants.java
index 108c8a8..45e8c8d 100644
--- a/app/server/huntly-server/src/main/java/com/huntly/server/domain/constant/AppConstants.java
+++ b/app/server/huntly-server/src/main/java/com/huntly/server/domain/constant/AppConstants.java
@@ -9,6 +9,10 @@
public class AppConstants {
public static final String DEFAULT_LUCENE_DIR = "lucene";
+ public static final String HTTP_FEED_CACHE_DIR = "feed_cache";
+
+ public static final Long HTTP_FEED_CACHE_MAXSIZE = 50L * 1024L * 1024L; // 50 MB
+
public static final Integer DEFAULT_FETCH_INTERVAL_SECONDS = 600;
public static final Integer DEFAULT_COLD_DATA_KEEP_DAYS = 60;
diff --git a/app/server/huntly-server/src/main/java/com/huntly/server/service/ConnectorFetchService.java b/app/server/huntly-server/src/main/java/com/huntly/server/service/ConnectorFetchService.java
index 79adb90..0f0ac7b 100644
--- a/app/server/huntly-server/src/main/java/com/huntly/server/service/ConnectorFetchService.java
+++ b/app/server/huntly-server/src/main/java/com/huntly/server/service/ConnectorFetchService.java
@@ -147,7 +147,13 @@ private void fetchPages(Connector connector) {
}
}
- var savedPage = capturePageService.save(page);
+ Page savedPage = null;
+ //Avoid frequent updates of RSS articles.
+ if (isRssFetch && existPage != null && Objects.equals(existPage.getTitle(), page.getTitle()) && Objects.equals(existPage.getConnectedAt(), page.getConnectedAt())) {
+ savedPage = existPage;
+ } else {
+ savedPage = capturePageService.save(page);
+ }
if (isRssFetch && isExecuteFetch) {
pageArticleContentService.saveContent(savedPage.getId(), rawContent, ArticleContentCategory.RAW_CONTENT);
diff --git a/app/server/huntly-server/src/main/java/com/huntly/server/service/FeedsService.java b/app/server/huntly-server/src/main/java/com/huntly/server/service/FeedsService.java
index 6dd8c1c..8976eea 100644
--- a/app/server/huntly-server/src/main/java/com/huntly/server/service/FeedsService.java
+++ b/app/server/huntly-server/src/main/java/com/huntly/server/service/FeedsService.java
@@ -71,8 +71,10 @@ public Connector followFeed(String subscribeUrl) {
public PreviewFeedsInfo previewFeeds(String subscribeUrl) {
PreviewFeedsInfo feedsInfo = new PreviewFeedsInfo();
feedsInfo.setFeedUrl(subscribeUrl);
- var httpClient = HttpUtils.buildHttpClient(globalSettingService.getProxySetting());
- SyndFeed syndFeed = FeedUtils.parseFeedUrl(subscribeUrl, httpClient);
+ var proxySetting = globalSettingService.getProxySetting();
+ var httpClient = HttpUtils.buildHttpClient(proxySetting);
+ var feedClient = HttpUtils.buildFeedOkHttpClient(proxySetting);
+ SyndFeed syndFeed = FeedUtils.parseFeedUrl(subscribeUrl, feedClient);
if (syndFeed != null) {
feedsInfo.setSiteLink(syndFeed.getLink());
feedsInfo.setTitle(syndFeed.getTitle());
diff --git a/app/server/huntly-server/src/main/java/com/huntly/server/util/HttpUtils.java b/app/server/huntly-server/src/main/java/com/huntly/server/util/HttpUtils.java
index e9616d8..c42f1d0 100644
--- a/app/server/huntly-server/src/main/java/com/huntly/server/util/HttpUtils.java
+++ b/app/server/huntly-server/src/main/java/com/huntly/server/util/HttpUtils.java
@@ -1,14 +1,20 @@
package com.huntly.server.util;
+import com.huntly.server.domain.constant.AppConstants;
import com.huntly.server.domain.model.ProxySetting;
import lombok.experimental.UtilityClass;
+import okhttp3.Cache;
+import okhttp3.ConnectionSpec;
+import okhttp3.OkHttpClient;
import org.apache.commons.lang3.StringUtils;
import javax.net.ssl.KeyManager;
import javax.net.ssl.SSLContext;
import javax.net.ssl.TrustManager;
import javax.net.ssl.X509TrustManager;
+import java.io.File;
import java.net.InetSocketAddress;
+import java.net.Proxy;
import java.net.ProxySelector;
import java.net.http.HttpClient;
import java.security.KeyManagementException;
@@ -16,12 +22,38 @@
import java.security.cert.CertificateException;
import java.security.cert.X509Certificate;
import java.time.Duration;
+import java.util.Arrays;
/**
* @author lcomplete
*/
@UtilityClass
public class HttpUtils {
+ public static OkHttpClient buildFeedOkHttpClient(ProxySetting proxySetting, Integer timeoutSeconds) {
+ var builder = new OkHttpClient.Builder()
+ .cache(new Cache(
+ new File(AppConstants.HTTP_FEED_CACHE_DIR), AppConstants.HTTP_FEED_CACHE_MAXSIZE
+ ))
+ .connectionSpecs(Arrays.asList(ConnectionSpec.MODERN_TLS, ConnectionSpec.COMPATIBLE_TLS, ConnectionSpec.CLEARTEXT))
+ .followRedirects(true);
+ if (proxySetting != null && StringUtils.isNotBlank(proxySetting.getHost())) {
+ builder = builder.proxy(
+ new Proxy(
+ Proxy.Type.HTTP,
+ new InetSocketAddress(proxySetting.getHost(), proxySetting.getPort())
+ )
+ );
+ }
+ if (timeoutSeconds != null) {
+ builder = builder.callTimeout(Duration.ofSeconds(timeoutSeconds));
+ }
+ return builder.build();
+ }
+
+ public static OkHttpClient buildFeedOkHttpClient(ProxySetting proxySetting) {
+ return buildFeedOkHttpClient(proxySetting, 30);
+ }
+
public static HttpClient buildHttpClient(ProxySetting proxySetting, Integer timeoutSeconds) {
// Configure SSLContext with a TrustManager that accepts any certificate
SSLContext sslContext = null;
@@ -31,7 +63,7 @@ public static HttpClient buildHttpClient(ProxySetting proxySetting, Integer time
} catch (NoSuchAlgorithmException | KeyManagementException e) {
throw new RuntimeException(e);
}
-
+
var clientBuilder = HttpClient.newBuilder().connectTimeout(Duration.ofSeconds(timeoutSeconds))
.sslContext(sslContext)
.followRedirects(HttpClient.Redirect.ALWAYS);