6 changed files with 237 additions and 0 deletions
@ -0,0 +1,58 @@ |
|||||
|
package model; |
||||
|
|
||||
|
public class Article { |
||||
|
private String title; |
||||
|
private String content; |
||||
|
private String url; |
||||
|
private String source; |
||||
|
|
||||
|
public Article() { |
||||
|
} |
||||
|
|
||||
|
public Article(String title, String content, String url, String source) { |
||||
|
this.title = title; |
||||
|
this.content = content; |
||||
|
this.url = url; |
||||
|
this.source = source; |
||||
|
} |
||||
|
|
||||
|
public String getTitle() { |
||||
|
return title; |
||||
|
} |
||||
|
|
||||
|
public void setTitle(String title) { |
||||
|
this.title = title; |
||||
|
} |
||||
|
|
||||
|
public String getContent() { |
||||
|
return content; |
||||
|
} |
||||
|
|
||||
|
public void setContent(String content) { |
||||
|
this.content = content; |
||||
|
} |
||||
|
|
||||
|
public String getUrl() { |
||||
|
return url; |
||||
|
} |
||||
|
|
||||
|
public void setUrl(String url) { |
||||
|
this.url = url; |
||||
|
} |
||||
|
|
||||
|
public String getSource() { |
||||
|
return source; |
||||
|
} |
||||
|
|
||||
|
public void setSource(String source) { |
||||
|
this.source = source; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String toString() { |
||||
|
return "【" + source + "】" + title + "\n" + |
||||
|
"链接:" + url + "\n" + |
||||
|
"内容:" + (content != null && content.length() > 100 ? |
||||
|
content.substring(0, 100) + "..." : content); |
||||
|
} |
||||
|
} |
||||
Binary file not shown.
@ -0,0 +1,122 @@ |
|||||
|
package controller; |
||||
|
|
||||
|
import view.ConsoleView; |
||||
|
import model.Article; |
||||
|
import strategy.*; |
||||
|
import command.*; |
||||
|
import java.util.ArrayList; |
||||
|
import java.util.List; |
||||
|
|
||||
|
public class CrawlerController { |
||||
|
private ConsoleView view; |
||||
|
private List<Article> articles; |
||||
|
private List<CrawlStrategy> strategies; |
||||
|
|
||||
|
public CrawlerController() { |
||||
|
this.view = new ConsoleView(); |
||||
|
this.articles = new ArrayList<>(); |
||||
|
this.strategies = new ArrayList<>(); |
||||
|
|
||||
|
strategies.add(new JjwxcStrategy()); |
||||
|
strategies.add(new BaiduStrategy()); |
||||
|
strategies.add(new HttpBinStrategy()); |
||||
|
strategies.add(new BingStrategy()); |
||||
|
} |
||||
|
|
||||
|
public ConsoleView getView() { |
||||
|
return view; |
||||
|
} |
||||
|
|
||||
|
public List<Article> getArticles() { |
||||
|
return articles; |
||||
|
} |
||||
|
|
||||
|
public void addArticle(Article article) { |
||||
|
articles.add(article); |
||||
|
} |
||||
|
|
||||
|
public void clearArticles() { |
||||
|
articles.clear(); |
||||
|
} |
||||
|
|
||||
|
public String[] getStrategyNames() { |
||||
|
String[] names = new String[strategies.size()]; |
||||
|
for (int i = 0; i < strategies.size(); i++) { |
||||
|
names[i] = strategies.get(i).getName(); |
||||
|
} |
||||
|
return names; |
||||
|
} |
||||
|
|
||||
|
public void run() { |
||||
|
view.showWelcome(); |
||||
|
view.showHelp(); |
||||
|
|
||||
|
boolean running = true; |
||||
|
while (running) { |
||||
|
String input = view.getInput(); |
||||
|
|
||||
|
if (input.isEmpty()) { |
||||
|
continue; |
||||
|
} |
||||
|
|
||||
|
switch (input) { |
||||
|
case "1": |
||||
|
case "jjwxc": |
||||
|
executeCommand(new CrawlCommand(strategies.get(0), this)); |
||||
|
break; |
||||
|
|
||||
|
case "2": |
||||
|
case "baidu": |
||||
|
executeCommand(new CrawlCommand(strategies.get(1), this)); |
||||
|
break; |
||||
|
|
||||
|
case "3": |
||||
|
case "httpbin": |
||||
|
executeCommand(new CrawlCommand(strategies.get(2), this)); |
||||
|
break; |
||||
|
|
||||
|
case "4": |
||||
|
case "bing": |
||||
|
executeCommand(new CrawlCommand(strategies.get(3), this)); |
||||
|
break; |
||||
|
|
||||
|
case "all": |
||||
|
crawlAll(); |
||||
|
break; |
||||
|
|
||||
|
case "list": |
||||
|
executeCommand(new ListCommand(this)); |
||||
|
break; |
||||
|
|
||||
|
case "save": |
||||
|
executeCommand(new SaveCommand(this)); |
||||
|
break; |
||||
|
|
||||
|
case "help": |
||||
|
executeCommand(new HelpCommand(this)); |
||||
|
break; |
||||
|
|
||||
|
case "exit": |
||||
|
case "quit": |
||||
|
running = false; |
||||
|
view.showGoodbye(); |
||||
|
break; |
||||
|
|
||||
|
default: |
||||
|
view.showError("未知命令: " + input + ",输入 help 查看帮助"); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
private void executeCommand(Command command) { |
||||
|
command.execute(); |
||||
|
} |
||||
|
|
||||
|
private void crawlAll() { |
||||
|
view.showMessage("\n开始爬取所有网站...\n"); |
||||
|
for (CrawlStrategy strategy : strategies) { |
||||
|
executeCommand(new CrawlCommand(strategy, this)); |
||||
|
} |
||||
|
view.showMessage("\n全部爬取完成!共 " + articles.size() + " 条数据"); |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,26 @@ |
|||||
|
package exception; |
||||
|
|
||||
|
public class NetworkException extends SpiderException { |
||||
|
public enum ErrorType { |
||||
|
CONNECTION_TIMEOUT, |
||||
|
CONNECTION_REFUSED, |
||||
|
HOST_NOT_FOUND, |
||||
|
RESPONSE_ERROR |
||||
|
} |
||||
|
|
||||
|
private final ErrorType errorType; |
||||
|
|
||||
|
public NetworkException(String message, ErrorType errorType) { |
||||
|
super(message); |
||||
|
this.errorType = errorType; |
||||
|
} |
||||
|
|
||||
|
public NetworkException(String message, ErrorType errorType, Throwable cause) { |
||||
|
super(message, cause); |
||||
|
this.errorType = errorType; |
||||
|
} |
||||
|
|
||||
|
public ErrorType getErrorType() { |
||||
|
return errorType; |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,20 @@ |
|||||
|
package exception; |
||||
|
|
||||
|
public class ParseException extends SpiderException { |
||||
|
public enum ErrorType { |
||||
|
INVALID_HTML, |
||||
|
TAG_NOT_FOUND, |
||||
|
REGEX_ERROR |
||||
|
} |
||||
|
|
||||
|
private final ErrorType errorType; |
||||
|
|
||||
|
public ParseException(String message, ErrorType errorType) { |
||||
|
super(message); |
||||
|
this.errorType = errorType; |
||||
|
} |
||||
|
|
||||
|
public ErrorType getErrorType() { |
||||
|
return errorType; |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,11 @@ |
|||||
|
package exception; |
||||
|
|
||||
|
public class SpiderException extends Exception { |
||||
|
public SpiderException(String message) { |
||||
|
super(message); |
||||
|
} |
||||
|
|
||||
|
public SpiderException(String message, Throwable cause) { |
||||
|
super(message, cause); |
||||
|
} |
||||
|
} |
||||
Loading…
Reference in new issue