6 changed files with 237 additions and 0 deletions
@ -0,0 +1,58 @@ |
|||
package model; |
|||
|
|||
public class Article { |
|||
private String title; |
|||
private String content; |
|||
private String url; |
|||
private String source; |
|||
|
|||
public Article() { |
|||
} |
|||
|
|||
public Article(String title, String content, String url, String source) { |
|||
this.title = title; |
|||
this.content = content; |
|||
this.url = url; |
|||
this.source = source; |
|||
} |
|||
|
|||
public String getTitle() { |
|||
return title; |
|||
} |
|||
|
|||
public void setTitle(String title) { |
|||
this.title = title; |
|||
} |
|||
|
|||
public String getContent() { |
|||
return content; |
|||
} |
|||
|
|||
public void setContent(String content) { |
|||
this.content = content; |
|||
} |
|||
|
|||
public String getUrl() { |
|||
return url; |
|||
} |
|||
|
|||
public void setUrl(String url) { |
|||
this.url = url; |
|||
} |
|||
|
|||
public String getSource() { |
|||
return source; |
|||
} |
|||
|
|||
public void setSource(String source) { |
|||
this.source = source; |
|||
} |
|||
|
|||
@Override |
|||
public String toString() { |
|||
return "【" + source + "】" + title + "\n" + |
|||
"链接:" + url + "\n" + |
|||
"内容:" + (content != null && content.length() > 100 ? |
|||
content.substring(0, 100) + "..." : content); |
|||
} |
|||
} |
|||
Binary file not shown.
@ -0,0 +1,122 @@ |
|||
package controller; |
|||
|
|||
import view.ConsoleView; |
|||
import model.Article; |
|||
import strategy.*; |
|||
import command.*; |
|||
import java.util.ArrayList; |
|||
import java.util.List; |
|||
|
|||
public class CrawlerController { |
|||
private ConsoleView view; |
|||
private List<Article> articles; |
|||
private List<CrawlStrategy> strategies; |
|||
|
|||
public CrawlerController() { |
|||
this.view = new ConsoleView(); |
|||
this.articles = new ArrayList<>(); |
|||
this.strategies = new ArrayList<>(); |
|||
|
|||
strategies.add(new JjwxcStrategy()); |
|||
strategies.add(new BaiduStrategy()); |
|||
strategies.add(new HttpBinStrategy()); |
|||
strategies.add(new BingStrategy()); |
|||
} |
|||
|
|||
public ConsoleView getView() { |
|||
return view; |
|||
} |
|||
|
|||
public List<Article> getArticles() { |
|||
return articles; |
|||
} |
|||
|
|||
public void addArticle(Article article) { |
|||
articles.add(article); |
|||
} |
|||
|
|||
public void clearArticles() { |
|||
articles.clear(); |
|||
} |
|||
|
|||
public String[] getStrategyNames() { |
|||
String[] names = new String[strategies.size()]; |
|||
for (int i = 0; i < strategies.size(); i++) { |
|||
names[i] = strategies.get(i).getName(); |
|||
} |
|||
return names; |
|||
} |
|||
|
|||
public void run() { |
|||
view.showWelcome(); |
|||
view.showHelp(); |
|||
|
|||
boolean running = true; |
|||
while (running) { |
|||
String input = view.getInput(); |
|||
|
|||
if (input.isEmpty()) { |
|||
continue; |
|||
} |
|||
|
|||
switch (input) { |
|||
case "1": |
|||
case "jjwxc": |
|||
executeCommand(new CrawlCommand(strategies.get(0), this)); |
|||
break; |
|||
|
|||
case "2": |
|||
case "baidu": |
|||
executeCommand(new CrawlCommand(strategies.get(1), this)); |
|||
break; |
|||
|
|||
case "3": |
|||
case "httpbin": |
|||
executeCommand(new CrawlCommand(strategies.get(2), this)); |
|||
break; |
|||
|
|||
case "4": |
|||
case "bing": |
|||
executeCommand(new CrawlCommand(strategies.get(3), this)); |
|||
break; |
|||
|
|||
case "all": |
|||
crawlAll(); |
|||
break; |
|||
|
|||
case "list": |
|||
executeCommand(new ListCommand(this)); |
|||
break; |
|||
|
|||
case "save": |
|||
executeCommand(new SaveCommand(this)); |
|||
break; |
|||
|
|||
case "help": |
|||
executeCommand(new HelpCommand(this)); |
|||
break; |
|||
|
|||
case "exit": |
|||
case "quit": |
|||
running = false; |
|||
view.showGoodbye(); |
|||
break; |
|||
|
|||
default: |
|||
view.showError("未知命令: " + input + ",输入 help 查看帮助"); |
|||
} |
|||
} |
|||
} |
|||
|
|||
private void executeCommand(Command command) { |
|||
command.execute(); |
|||
} |
|||
|
|||
private void crawlAll() { |
|||
view.showMessage("\n开始爬取所有网站...\n"); |
|||
for (CrawlStrategy strategy : strategies) { |
|||
executeCommand(new CrawlCommand(strategy, this)); |
|||
} |
|||
view.showMessage("\n全部爬取完成!共 " + articles.size() + " 条数据"); |
|||
} |
|||
} |
|||
@ -0,0 +1,26 @@ |
|||
package exception; |
|||
|
|||
public class NetworkException extends SpiderException { |
|||
public enum ErrorType { |
|||
CONNECTION_TIMEOUT, |
|||
CONNECTION_REFUSED, |
|||
HOST_NOT_FOUND, |
|||
RESPONSE_ERROR |
|||
} |
|||
|
|||
private final ErrorType errorType; |
|||
|
|||
public NetworkException(String message, ErrorType errorType) { |
|||
super(message); |
|||
this.errorType = errorType; |
|||
} |
|||
|
|||
public NetworkException(String message, ErrorType errorType, Throwable cause) { |
|||
super(message, cause); |
|||
this.errorType = errorType; |
|||
} |
|||
|
|||
public ErrorType getErrorType() { |
|||
return errorType; |
|||
} |
|||
} |
|||
@ -0,0 +1,20 @@ |
|||
package exception; |
|||
|
|||
public class ParseException extends SpiderException { |
|||
public enum ErrorType { |
|||
INVALID_HTML, |
|||
TAG_NOT_FOUND, |
|||
REGEX_ERROR |
|||
} |
|||
|
|||
private final ErrorType errorType; |
|||
|
|||
public ParseException(String message, ErrorType errorType) { |
|||
super(message); |
|||
this.errorType = errorType; |
|||
} |
|||
|
|||
public ErrorType getErrorType() { |
|||
return errorType; |
|||
} |
|||
} |
|||
@ -0,0 +1,11 @@ |
|||
package exception; |
|||
|
|||
public class SpiderException extends Exception { |
|||
public SpiderException(String message) { |
|||
super(message); |
|||
} |
|||
|
|||
public SpiderException(String message, Throwable cause) { |
|||
super(message, cause); |
|||
} |
|||
} |
|||
Loading…
Reference in new issue