From 1066f75e91b974c5b4979b5580af57db8221d17a Mon Sep 17 00:00:00 2001 From: Hanminxi <1772454398@qq.com> Date: Sun, 31 May 2026 00:05:26 +0800 Subject: [PATCH] =?UTF-8?q?=E4=B8=8A=E4=BC=A0=E6=96=87=E4=BB=B6=E8=87=B3?= =?UTF-8?q?=20''?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- BaiduStrategy.java | 32 ++++++++++++++++++++++++++++++++ BingStrategy.java | 32 ++++++++++++++++++++++++++++++++ CrawlStrategy.java | 10 ++++++++++ HttpBinStrategy.java | 33 +++++++++++++++++++++++++++++++++ JjwxcStrategy.java | 33 +++++++++++++++++++++++++++++++++ 5 files changed, 140 insertions(+) create mode 100644 BaiduStrategy.java create mode 100644 BingStrategy.java create mode 100644 CrawlStrategy.java create mode 100644 HttpBinStrategy.java create mode 100644 JjwxcStrategy.java diff --git a/BaiduStrategy.java b/BaiduStrategy.java new file mode 100644 index 0000000..71273ab --- /dev/null +++ b/BaiduStrategy.java @@ -0,0 +1,32 @@ +package strategy; + +import model.Article; +import util.HttpUtil; +import exception.SpiderException; + +public class BaiduStrategy implements CrawlStrategy { + @Override + public String getName() { + return "百度"; + } + + @Override + public String getUrl() { + return "https://www.baidu.com/"; + } + + @Override + public Article crawl() throws SpiderException { + String html = HttpUtil.get(getUrl(), "UTF-8"); + + String title = HttpUtil.extractTagSafe(html, "", ""); + + Article article = new Article(); + article.setTitle(title); + article.setContent("百度首页"); + article.setUrl(getUrl()); + article.setSource(getName()); + + return article; + } +} diff --git a/BingStrategy.java b/BingStrategy.java new file mode 100644 index 0000000..1029b7a --- /dev/null +++ b/BingStrategy.java @@ -0,0 +1,32 @@ +package strategy; + +import model.Article; +import util.HttpUtil; +import exception.SpiderException; + +public class BingStrategy implements CrawlStrategy { + @Override + public String getName() { + return "必应搜索"; + } + + @Override + public String getUrl() { + return "https://cn.bing.com/"; + } + + @Override + public Article crawl() throws SpiderException { + String html = HttpUtil.get(getUrl(), "UTF-8"); + + String title = HttpUtil.extractTagSafe(html, "", ""); + + Article article = new Article(); + article.setTitle(title); + article.setContent("微软必应搜索引擎首页"); + article.setUrl(getUrl()); + article.setSource(getName()); + + return article; + } +} diff --git a/CrawlStrategy.java b/CrawlStrategy.java new file mode 100644 index 0000000..78e1325 --- /dev/null +++ b/CrawlStrategy.java @@ -0,0 +1,10 @@ +package strategy; + +import model.Article; +import exception.SpiderException; + +public interface CrawlStrategy { + String getName(); + String getUrl(); + Article crawl() throws SpiderException; +} diff --git a/HttpBinStrategy.java b/HttpBinStrategy.java new file mode 100644 index 0000000..8683ac9 --- /dev/null +++ b/HttpBinStrategy.java @@ -0,0 +1,33 @@ +package strategy; + +import model.Article; +import util.HttpUtil; +import exception.SpiderException; + +public class HttpBinStrategy implements CrawlStrategy { + @Override + public String getName() { + return "HttpBin"; + } + + @Override + public String getUrl() { + return "https://httpbin.org/html"; + } + + @Override + public Article crawl() throws SpiderException { + String html = HttpUtil.get(getUrl(), "UTF-8"); + + String title = HttpUtil.extractTagSafe(html, "

", "

"); + String content = HttpUtil.extractTagSafe(html, "

", "

"); + + Article article = new Article(); + article.setTitle(title); + article.setContent(content); + article.setUrl(getUrl()); + article.setSource(getName()); + + return article; + } +} diff --git a/JjwxcStrategy.java b/JjwxcStrategy.java new file mode 100644 index 0000000..e6fe34f --- /dev/null +++ b/JjwxcStrategy.java @@ -0,0 +1,33 @@ +package strategy; + +import model.Article; +import util.HttpUtil; +import exception.SpiderException; + +public class JjwxcStrategy implements CrawlStrategy { + @Override + public String getName() { + return "晋江文学城"; + } + + @Override + public String getUrl() { + return "https://www.jjwxc.net/"; + } + + @Override + public Article crawl() throws SpiderException { + String html = HttpUtil.get(getUrl(), "GB18030"); + + String title = HttpUtil.extractTagSafe(html, "", ""); + String description = "晋江文学城(www.jjwxc.net)创立于2003年8月,是具备相当规模女性网络文学原创基地"; + + Article article = new Article(); + article.setTitle(title); + article.setContent(description); + article.setUrl(getUrl()); + article.setSource(getName()); + + return article; + } +}