From 1066f75e91b974c5b4979b5580af57db8221d17a Mon Sep 17 00:00:00 2001
From: Hanminxi <1772454398@qq.com>
Date: Sun, 31 May 2026 00:05:26 +0800
Subject: [PATCH] =?UTF-8?q?=E4=B8=8A=E4=BC=A0=E6=96=87=E4=BB=B6=E8=87=B3?=
=?UTF-8?q?=20''?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
BaiduStrategy.java | 32 ++++++++++++++++++++++++++++++++
BingStrategy.java | 32 ++++++++++++++++++++++++++++++++
CrawlStrategy.java | 10 ++++++++++
HttpBinStrategy.java | 33 +++++++++++++++++++++++++++++++++
JjwxcStrategy.java | 33 +++++++++++++++++++++++++++++++++
5 files changed, 140 insertions(+)
create mode 100644 BaiduStrategy.java
create mode 100644 BingStrategy.java
create mode 100644 CrawlStrategy.java
create mode 100644 HttpBinStrategy.java
create mode 100644 JjwxcStrategy.java
diff --git a/BaiduStrategy.java b/BaiduStrategy.java
new file mode 100644
index 0000000..71273ab
--- /dev/null
+++ b/BaiduStrategy.java
@@ -0,0 +1,32 @@
+package strategy;
+
+import model.Article;
+import util.HttpUtil;
+import exception.SpiderException;
+
+public class BaiduStrategy implements CrawlStrategy {
+ @Override
+ public String getName() {
+ return "百度";
+ }
+
+ @Override
+ public String getUrl() {
+ return "https://www.baidu.com/";
+ }
+
+ @Override
+ public Article crawl() throws SpiderException {
+ String html = HttpUtil.get(getUrl(), "UTF-8");
+
+ String title = HttpUtil.extractTagSafe(html, "
", "");
+
+ Article article = new Article();
+ article.setTitle(title);
+ article.setContent("百度首页");
+ article.setUrl(getUrl());
+ article.setSource(getName());
+
+ return article;
+ }
+}
diff --git a/BingStrategy.java b/BingStrategy.java
new file mode 100644
index 0000000..1029b7a
--- /dev/null
+++ b/BingStrategy.java
@@ -0,0 +1,32 @@
+package strategy;
+
+import model.Article;
+import util.HttpUtil;
+import exception.SpiderException;
+
+public class BingStrategy implements CrawlStrategy {
+ @Override
+ public String getName() {
+ return "必应搜索";
+ }
+
+ @Override
+ public String getUrl() {
+ return "https://cn.bing.com/";
+ }
+
+ @Override
+ public Article crawl() throws SpiderException {
+ String html = HttpUtil.get(getUrl(), "UTF-8");
+
+ String title = HttpUtil.extractTagSafe(html, "", "");
+
+ Article article = new Article();
+ article.setTitle(title);
+ article.setContent("微软必应搜索引擎首页");
+ article.setUrl(getUrl());
+ article.setSource(getName());
+
+ return article;
+ }
+}
diff --git a/CrawlStrategy.java b/CrawlStrategy.java
new file mode 100644
index 0000000..78e1325
--- /dev/null
+++ b/CrawlStrategy.java
@@ -0,0 +1,10 @@
+package strategy;
+
+import model.Article;
+import exception.SpiderException;
+
+public interface CrawlStrategy {
+ String getName();
+ String getUrl();
+ Article crawl() throws SpiderException;
+}
diff --git a/HttpBinStrategy.java b/HttpBinStrategy.java
new file mode 100644
index 0000000..8683ac9
--- /dev/null
+++ b/HttpBinStrategy.java
@@ -0,0 +1,33 @@
+package strategy;
+
+import model.Article;
+import util.HttpUtil;
+import exception.SpiderException;
+
+public class HttpBinStrategy implements CrawlStrategy {
+ @Override
+ public String getName() {
+ return "HttpBin";
+ }
+
+ @Override
+ public String getUrl() {
+ return "https://httpbin.org/html";
+ }
+
+ @Override
+ public Article crawl() throws SpiderException {
+ String html = HttpUtil.get(getUrl(), "UTF-8");
+
+ String title = HttpUtil.extractTagSafe(html, "", "
");
+ String content = HttpUtil.extractTagSafe(html, "", "
");
+
+ Article article = new Article();
+ article.setTitle(title);
+ article.setContent(content);
+ article.setUrl(getUrl());
+ article.setSource(getName());
+
+ return article;
+ }
+}
diff --git a/JjwxcStrategy.java b/JjwxcStrategy.java
new file mode 100644
index 0000000..e6fe34f
--- /dev/null
+++ b/JjwxcStrategy.java
@@ -0,0 +1,33 @@
+package strategy;
+
+import model.Article;
+import util.HttpUtil;
+import exception.SpiderException;
+
+public class JjwxcStrategy implements CrawlStrategy {
+ @Override
+ public String getName() {
+ return "晋江文学城";
+ }
+
+ @Override
+ public String getUrl() {
+ return "https://www.jjwxc.net/";
+ }
+
+ @Override
+ public Article crawl() throws SpiderException {
+ String html = HttpUtil.get(getUrl(), "GB18030");
+
+ String title = HttpUtil.extractTagSafe(html, "", "");
+ String description = "晋江文学城(www.jjwxc.net)创立于2003年8月,是具备相当规模女性网络文学原创基地";
+
+ Article article = new Article();
+ article.setTitle(title);
+ article.setContent(description);
+ article.setUrl(getUrl());
+ article.setSource(getName());
+
+ return article;
+ }
+}