Browse Source

setup logging

master
283375 1 month ago
parent
commit
7ab5363932
Failed to extract signature
  1. 1
      .gitignore
  2. 10
      pom.xml
  3. 6
      src/main/java/internal/hw/crawler/strategies/crawl/IthomeCrawlStrategy.java
  4. 6
      src/main/java/internal/hw/crawler/strategies/crawl/PeopleCnCrawlStrategy.java
  5. 20
      src/main/resources/logback.xml

1
.gitignore

@ -4,6 +4,7 @@ target/
!**/src/test/**/target/
.kotlin
logs/*
*.output.json
### IntelliJ IDEA ###

10
pom.xml

@ -24,5 +24,15 @@
<version>2.14.0</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<version>2.0.16</version>
</dependency>
<dependency>
<groupId>ch.qos.logback</groupId>
<artifactId>logback-classic</artifactId>
<version>1.5.25</version>
</dependency>
</dependencies>
</project>

6
src/main/java/internal/hw/crawler/strategies/crawl/IthomeCrawlStrategy.java

@ -5,6 +5,8 @@ import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.net.URL;
import java.util.ArrayList;
@ -15,6 +17,7 @@ import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class IthomeCrawlStrategy implements CrawlStrategy {
private static final Logger log = LoggerFactory.getLogger(IthomeCrawlStrategy.class);
private final Pattern idRegex = Pattern.compile("(\\d+)/(\\d+)/(\\d+)\\.htm");
@Override
@ -47,7 +50,8 @@ public class IthomeCrawlStrategy implements CrawlStrategy {
URL articleUrl = new URL(href);
Document articleDoc = Jsoup.parse(articleUrl, 5000);
articles.add(parseSingle(articleUrl, articleDoc));
} catch (Exception ignored) {
} catch (Exception e) {
log.warn("Failed to fetch article: {}", href, e);
}
}
return articles;

6
src/main/java/internal/hw/crawler/strategies/crawl/PeopleCnCrawlStrategy.java

@ -5,6 +5,8 @@ import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.net.URL;
import java.util.ArrayList;
@ -16,6 +18,7 @@ import java.util.regex.Pattern;
import java.util.stream.Collectors;
public class PeopleCnCrawlStrategy implements CrawlStrategy {
private static final Logger log = LoggerFactory.getLogger(PeopleCnCrawlStrategy.class);
private final List<String> supportedDomains = List.of("people.cn", "people.com.cn");
/* 示例 URL:http://env.people.com.cn/n1/2026/0530/c1010-40730688.html */
private final Pattern idRegex = Pattern.compile("(\\d+)/(\\d+)/c(\\d+)-(\\d+).html");
@ -57,7 +60,8 @@ public class PeopleCnCrawlStrategy implements CrawlStrategy {
URL articleUrl = new URL(href);
Document articleDoc = Jsoup.parse(articleUrl, 5000);
articles.add(parseSingle(articleUrl, articleDoc));
} catch (Exception ignored) {
} catch (Exception e) {
log.warn("Failed to fetch article: {}", href, e);
}
}
return articles;

20
src/main/resources/logback.xml

@ -0,0 +1,20 @@
<configuration>
<appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
<encoder>
<pattern>%d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n</pattern>
</encoder>
</appender>
<appender name="FILE" class="ch.qos.logback.core.FileAppender">
<file>logs/default.log</file>
<append>true</append>
<encoder>
<pattern>%d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n</pattern>
</encoder>
</appender>
<root level="INFO">
<appender-ref ref="STDOUT" />
<appender-ref ref="FILE" />
</root>
</configuration>
Loading…
Cancel
Save