Browse Source

上传文件至 'project'

main
Hanminxi 3 weeks ago
parent
commit
03f92a0475
  1. 58
      project/Article.java
  2. BIN
      project/CrawlerController.class
  3. 122
      project/CrawlerController.java
  4. 26
      project/NetworkException.java
  5. 20
      project/ParseException.java
  6. 11
      project/SpiderException.java

58
project/Article.java

@ -0,0 +1,58 @@
package model;
public class Article {
private String title;
private String content;
private String url;
private String source;
public Article() {
}
public Article(String title, String content, String url, String source) {
this.title = title;
this.content = content;
this.url = url;
this.source = source;
}
public String getTitle() {
return title;
}
public void setTitle(String title) {
this.title = title;
}
public String getContent() {
return content;
}
public void setContent(String content) {
this.content = content;
}
public String getUrl() {
return url;
}
public void setUrl(String url) {
this.url = url;
}
public String getSource() {
return source;
}
public void setSource(String source) {
this.source = source;
}
@Override
public String toString() {
return "【" + source + "】" + title + "\n" +
"链接:" + url + "\n" +
"内容:" + (content != null && content.length() > 100 ?
content.substring(0, 100) + "..." : content);
}
}

BIN
project/CrawlerController.class

Binary file not shown.

122
project/CrawlerController.java

@ -0,0 +1,122 @@
package controller;
import view.ConsoleView;
import model.Article;
import strategy.*;
import command.*;
import java.util.ArrayList;
import java.util.List;
public class CrawlerController {
private ConsoleView view;
private List<Article> articles;
private List<CrawlStrategy> strategies;
public CrawlerController() {
this.view = new ConsoleView();
this.articles = new ArrayList<>();
this.strategies = new ArrayList<>();
strategies.add(new JjwxcStrategy());
strategies.add(new BaiduStrategy());
strategies.add(new HttpBinStrategy());
strategies.add(new BingStrategy());
}
public ConsoleView getView() {
return view;
}
public List<Article> getArticles() {
return articles;
}
public void addArticle(Article article) {
articles.add(article);
}
public void clearArticles() {
articles.clear();
}
public String[] getStrategyNames() {
String[] names = new String[strategies.size()];
for (int i = 0; i < strategies.size(); i++) {
names[i] = strategies.get(i).getName();
}
return names;
}
public void run() {
view.showWelcome();
view.showHelp();
boolean running = true;
while (running) {
String input = view.getInput();
if (input.isEmpty()) {
continue;
}
switch (input) {
case "1":
case "jjwxc":
executeCommand(new CrawlCommand(strategies.get(0), this));
break;
case "2":
case "baidu":
executeCommand(new CrawlCommand(strategies.get(1), this));
break;
case "3":
case "httpbin":
executeCommand(new CrawlCommand(strategies.get(2), this));
break;
case "4":
case "bing":
executeCommand(new CrawlCommand(strategies.get(3), this));
break;
case "all":
crawlAll();
break;
case "list":
executeCommand(new ListCommand(this));
break;
case "save":
executeCommand(new SaveCommand(this));
break;
case "help":
executeCommand(new HelpCommand(this));
break;
case "exit":
case "quit":
running = false;
view.showGoodbye();
break;
default:
view.showError("未知命令: " + input + ",输入 help 查看帮助");
}
}
}
private void executeCommand(Command command) {
command.execute();
}
private void crawlAll() {
view.showMessage("\n开始爬取所有网站...\n");
for (CrawlStrategy strategy : strategies) {
executeCommand(new CrawlCommand(strategy, this));
}
view.showMessage("\n全部爬取完成!共 " + articles.size() + " 条数据");
}
}

26
project/NetworkException.java

@ -0,0 +1,26 @@
package exception;
public class NetworkException extends SpiderException {
public enum ErrorType {
CONNECTION_TIMEOUT,
CONNECTION_REFUSED,
HOST_NOT_FOUND,
RESPONSE_ERROR
}
private final ErrorType errorType;
public NetworkException(String message, ErrorType errorType) {
super(message);
this.errorType = errorType;
}
public NetworkException(String message, ErrorType errorType, Throwable cause) {
super(message, cause);
this.errorType = errorType;
}
public ErrorType getErrorType() {
return errorType;
}
}

20
project/ParseException.java

@ -0,0 +1,20 @@
package exception;
public class ParseException extends SpiderException {
public enum ErrorType {
INVALID_HTML,
TAG_NOT_FOUND,
REGEX_ERROR
}
private final ErrorType errorType;
public ParseException(String message, ErrorType errorType) {
super(message);
this.errorType = errorType;
}
public ErrorType getErrorType() {
return errorType;
}
}

11
project/SpiderException.java

@ -0,0 +1,11 @@
package exception;
public class SpiderException extends Exception {
public SpiderException(String message) {
super(message);
}
public SpiderException(String message, Throwable cause) {
super(message, cause);
}
}
Loading…
Cancel
Save