diff --git a/.gitignore b/.gitignore
index 3c66885..deec77a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,6 +4,7 @@ target/
!**/src/test/**/target/
.kotlin
+logs/*
*.output.json
### IntelliJ IDEA ###
diff --git a/pom.xml b/pom.xml
index d4b92f0..4f2c756 100644
--- a/pom.xml
+++ b/pom.xml
@@ -24,5 +24,15 @@
2.14.0
compile
+
+ org.slf4j
+ slf4j-api
+ 2.0.16
+
+
+ ch.qos.logback
+ logback-classic
+ 1.5.25
+
\ No newline at end of file
diff --git a/src/main/java/internal/hw/crawler/strategies/crawl/IthomeCrawlStrategy.java b/src/main/java/internal/hw/crawler/strategies/crawl/IthomeCrawlStrategy.java
index 844cf4a..2bbc11a 100644
--- a/src/main/java/internal/hw/crawler/strategies/crawl/IthomeCrawlStrategy.java
+++ b/src/main/java/internal/hw/crawler/strategies/crawl/IthomeCrawlStrategy.java
@@ -5,6 +5,8 @@ import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
import java.net.URL;
import java.util.ArrayList;
@@ -15,6 +17,7 @@ import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class IthomeCrawlStrategy implements CrawlStrategy {
+ private static final Logger log = LoggerFactory.getLogger(IthomeCrawlStrategy.class);
private final Pattern idRegex = Pattern.compile("(\\d+)/(\\d+)/(\\d+)\\.htm");
@Override
@@ -47,7 +50,8 @@ public class IthomeCrawlStrategy implements CrawlStrategy {
URL articleUrl = new URL(href);
Document articleDoc = Jsoup.parse(articleUrl, 5000);
articles.add(parseSingle(articleUrl, articleDoc));
- } catch (Exception ignored) {
+ } catch (Exception e) {
+ log.warn("Failed to fetch article: {}", href, e);
}
}
return articles;
diff --git a/src/main/java/internal/hw/crawler/strategies/crawl/PeopleCnCrawlStrategy.java b/src/main/java/internal/hw/crawler/strategies/crawl/PeopleCnCrawlStrategy.java
index f166af5..0ce0f93 100644
--- a/src/main/java/internal/hw/crawler/strategies/crawl/PeopleCnCrawlStrategy.java
+++ b/src/main/java/internal/hw/crawler/strategies/crawl/PeopleCnCrawlStrategy.java
@@ -5,6 +5,8 @@ import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
import java.net.URL;
import java.util.ArrayList;
@@ -16,6 +18,7 @@ import java.util.regex.Pattern;
import java.util.stream.Collectors;
public class PeopleCnCrawlStrategy implements CrawlStrategy {
+ private static final Logger log = LoggerFactory.getLogger(PeopleCnCrawlStrategy.class);
private final List supportedDomains = List.of("people.cn", "people.com.cn");
/* 示例 URL:http://env.people.com.cn/n1/2026/0530/c1010-40730688.html */
private final Pattern idRegex = Pattern.compile("(\\d+)/(\\d+)/c(\\d+)-(\\d+).html");
@@ -57,7 +60,8 @@ public class PeopleCnCrawlStrategy implements CrawlStrategy {
URL articleUrl = new URL(href);
Document articleDoc = Jsoup.parse(articleUrl, 5000);
articles.add(parseSingle(articleUrl, articleDoc));
- } catch (Exception ignored) {
+ } catch (Exception e) {
+ log.warn("Failed to fetch article: {}", href, e);
}
}
return articles;
diff --git a/src/main/resources/logback.xml b/src/main/resources/logback.xml
new file mode 100644
index 0000000..12920e1
--- /dev/null
+++ b/src/main/resources/logback.xml
@@ -0,0 +1,20 @@
+
+
+
+ %d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n
+
+
+
+
+ logs/default.log
+ true
+
+ %d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n
+
+
+
+
+
+
+
+