You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
500 lines
27 KiB
500 lines
27 KiB
import org.jsoup.Jsoup;
|
|
import org.jsoup.nodes.Document;
|
|
import org.jsoup.nodes.Element;
|
|
import org.jsoup.select.Elements;
|
|
|
|
import java.io.*;
|
|
import java.time.LocalDateTime;
|
|
import java.time.format.DateTimeFormatter;
|
|
import java.util.*;
|
|
|
|
// ==================== 1. 异常体系 ====================
|
|
enum ErrorCode {
|
|
CONNECTION_ERROR("连接失败"),
|
|
PARSE_ERROR("解析失败"),
|
|
VALIDATION_ERROR("参数验证失败"),
|
|
IO_ERROR("IO操作失败"),
|
|
UNKNOWN_ERROR("未知错误"),
|
|
NO_RESULTS_ERROR("未获取到数据");
|
|
|
|
private final String message;
|
|
ErrorCode(String message) { this.message = message; }
|
|
public String getMessage() { return message; }
|
|
}
|
|
|
|
class CrawlerException extends Exception {
|
|
private final ErrorCode errorCode;
|
|
public CrawlerException(ErrorCode errorCode, String message) { super(message); this.errorCode = errorCode; }
|
|
public CrawlerException(ErrorCode errorCode, String message, Throwable cause) { super(message, cause); this.errorCode = errorCode; }
|
|
public ErrorCode getErrorCode() { return errorCode; }
|
|
}
|
|
|
|
// ==================== 2. 数据模型 (Model) ====================
|
|
class Book {
|
|
private String title; private String authors; private String publisher;
|
|
private String publishDate; private String price; private double rating;
|
|
private String summary; private String url;
|
|
|
|
public String getTitle() { return title; } public void setTitle(String title) { this.title = title; }
|
|
public String getAuthors() { return authors; } public void setAuthors(String authors) { this.authors = authors; }
|
|
public String getPublisher() { return publisher; } public void setPublisher(String publisher) { this.publisher = publisher; }
|
|
public String getPublishDate() { return publishDate; } public void setPublishDate(String publishDate) { this.publishDate = publishDate; }
|
|
public String getPrice() { return price; } public void setPrice(String price) { this.price = price; }
|
|
public double getRating() { return rating; } public void setRating(double rating) { this.rating = rating; }
|
|
public String getSummary() { return summary; } public void setSummary(String summary) { this.summary = summary; }
|
|
public String getUrl() { return url; } public void setUrl(String url) { this.url = url; }
|
|
}
|
|
|
|
class Movie {
|
|
private String title; private String director; private String actors;
|
|
private String year; private String type; private double rating;
|
|
private String url;
|
|
|
|
public String getTitle() { return title; } public void setTitle(String title) { this.title = title; }
|
|
public String getDirector() { return director; } public void setDirector(String director) { this.director = director; }
|
|
public String getActors() { return actors; } public void setActors(String actors) { this.actors = actors; }
|
|
public String getYear() { return year; } public void setYear(String year) { this.year = year; }
|
|
public String getType() { return type; } public void setType(String type) { this.type = type; }
|
|
public double getRating() { return rating; } public void setRating(double rating) { this.rating = rating; }
|
|
public String getUrl() { return url; } public void setUrl(String url) { this.url = url; }
|
|
}
|
|
|
|
class Music {
|
|
private String title; private String artist; private String album;
|
|
private String releaseDate; private double rating; private String url;
|
|
|
|
public String getTitle() { return title; } public void setTitle(String title) { this.title = title; }
|
|
public String getArtist() { return artist; } public void setArtist(String artist) { this.artist = artist; }
|
|
public String getAlbum() { return album; } public void setAlbum(String album) { this.album = album; }
|
|
public String getReleaseDate() { return releaseDate; } public void setReleaseDate(String releaseDate) { this.releaseDate = releaseDate; }
|
|
public double getRating() { return rating; } public void setRating(double rating) { this.rating = rating; }
|
|
public String getUrl() { return url; } public void setUrl(String url) { this.url = url; }
|
|
}
|
|
|
|
class Article {
|
|
private String title; private String author; private String publishDate;
|
|
private String views; private String url;
|
|
|
|
public String getTitle() { return title; } public void setTitle(String title) { this.title = title; }
|
|
public String getAuthor() { return author; } public void setAuthor(String author) { this.author = author; }
|
|
public String getPublishDate() { return publishDate; } public void setPublishDate(String publishDate) { this.publishDate = publishDate; }
|
|
public String getViews() { return views; } public void setViews(String views) { this.views = views; }
|
|
public String getUrl() { return url; } public void setUrl(String url) { this.url = url; }
|
|
}
|
|
|
|
class CrawlResult {
|
|
private String source; private List<Object> items; private long crawlTime; private int totalCount;
|
|
public CrawlResult(String source, List<Object> items, long crawlTime) {
|
|
this.source = source; this.items = items; this.crawlTime = crawlTime; this.totalCount = items.size();
|
|
}
|
|
public String getSource() { return source; }
|
|
public List<Object> getItems() { return items; }
|
|
public long getCrawlTime() { return crawlTime; }
|
|
public int getTotalCount() { return totalCount; }
|
|
}
|
|
|
|
// ==================== 3. 策略模式 (Strategy) ====================
|
|
interface CrawlStrategy {
|
|
List<Object> crawl(int pageCount) throws CrawlerException;
|
|
String getSourceName();
|
|
String getItemType();
|
|
}
|
|
|
|
class DoubanBookStrategy implements CrawlStrategy {
|
|
@Override
|
|
public List<Object> crawl(int pageCount) throws CrawlerException {
|
|
List<Object> books = new ArrayList<>();
|
|
try {
|
|
for (int page = 0; page < pageCount && books.size() < 35; page++) {
|
|
String url = "https://book.douban.com/tag/计算机?start=" + (page * 20);
|
|
System.out.println("正在爬取豆瓣读书: " + url);
|
|
Document doc = HttpUtil.getDocument(url);
|
|
Elements bookElements = doc.select(".subject-item");
|
|
for (Element element : bookElements) {
|
|
Book book = new Book();
|
|
Element titleElement = element.selectFirst(".info h2 a");
|
|
if (titleElement != null) { book.setTitle(titleElement.text().trim()); book.setUrl(titleElement.attr("href")); }
|
|
Element infoElement = element.selectFirst(".info .pub");
|
|
if (infoElement != null) {
|
|
String[] parts = infoElement.text().trim().split("/");
|
|
if (parts.length >= 4) {
|
|
book.setAuthors(parts[0].trim());
|
|
book.setPublisher(parts[1].trim());
|
|
book.setPublishDate(parts[2].trim());
|
|
book.setPrice(parts[3].trim());
|
|
}
|
|
}
|
|
Element ratingElement = element.selectFirst(".info .rating_nums");
|
|
if (ratingElement != null) {
|
|
try { book.setRating(Double.parseDouble(ratingElement.text().trim())); }
|
|
catch (NumberFormatException e) { book.setRating(0.0); }
|
|
}
|
|
if (book.getTitle() != null && !book.getTitle().isEmpty()) books.add(book);
|
|
if (books.size() >= 35) break;
|
|
}
|
|
Thread.sleep(800);
|
|
}
|
|
} catch (IOException e) { throw new CrawlerException(ErrorCode.CONNECTION_ERROR, "连接豆瓣失败", e); }
|
|
catch (InterruptedException e) { Thread.currentThread().interrupt(); throw new CrawlerException(ErrorCode.UNKNOWN_ERROR, "爬取被中断", e); }
|
|
if (books.isEmpty()) throw new CrawlerException(ErrorCode.NO_RESULTS_ERROR, "未获取到豆瓣图书数据");
|
|
return books;
|
|
}
|
|
@Override public String getSourceName() { return "豆瓣读书"; }
|
|
@Override public String getItemType() { return "Book"; }
|
|
}
|
|
|
|
class DoubanMovieStrategy implements CrawlStrategy {
|
|
@Override
|
|
public List<Object> crawl(int pageCount) throws CrawlerException {
|
|
List<Object> movies = new ArrayList<>();
|
|
try {
|
|
for (int page = 0; page < pageCount && movies.size() < 35; page++) {
|
|
String url = "https://movie.douban.com/top250?start=" + (page * 25);
|
|
System.out.println("正在爬取豆瓣电影: " + url);
|
|
Document doc = HttpUtil.getDocument(url);
|
|
Elements movieElements = doc.select(".item");
|
|
for (Element element : movieElements) {
|
|
Movie movie = new Movie();
|
|
Element titleElement = element.selectFirst(".hd a");
|
|
if (titleElement != null) {
|
|
movie.setTitle(titleElement.text().replaceAll("\\s+", " ").trim());
|
|
movie.setUrl(titleElement.attr("href"));
|
|
}
|
|
Element infoElement = element.selectFirst(".bd p:first-child");
|
|
if (infoElement != null) {
|
|
String info = infoElement.text();
|
|
if (info.contains("导演")) {
|
|
String[] parts = info.split("\\s+");
|
|
movie.setDirector(parts[1]);
|
|
StringBuilder actors = new StringBuilder();
|
|
for (int i = 3; i < parts.length && i < 6; i++) actors.append(parts[i]).append(" ");
|
|
movie.setActors(actors.toString().trim());
|
|
}
|
|
}
|
|
Element ratingElement = element.selectFirst(".rating_num");
|
|
if (ratingElement != null) {
|
|
try { movie.setRating(Double.parseDouble(ratingElement.text().trim())); }
|
|
catch (NumberFormatException e) { movie.setRating(0.0); }
|
|
}
|
|
Element yearElement = element.selectFirst(".bd p:nth-child(2)");
|
|
if (yearElement != null) {
|
|
String text = yearElement.text();
|
|
int idx = text.indexOf("上映日期:");
|
|
if (idx > 0) movie.setYear(text.substring(idx + 5).trim().substring(0, 4));
|
|
}
|
|
if (movie.getTitle() != null && !movie.getTitle().isEmpty()) movies.add(movie);
|
|
if (movies.size() >= 35) break;
|
|
}
|
|
Thread.sleep(800);
|
|
}
|
|
} catch (IOException e) { throw new CrawlerException(ErrorCode.CONNECTION_ERROR, "连接豆瓣电影失败", e); }
|
|
catch (InterruptedException e) { Thread.currentThread().interrupt(); throw new CrawlerException(ErrorCode.UNKNOWN_ERROR, "爬取被中断", e); }
|
|
if (movies.isEmpty()) throw new CrawlerException(ErrorCode.NO_RESULTS_ERROR, "未获取到豆瓣电影数据");
|
|
return movies;
|
|
}
|
|
@Override public String getSourceName() { return "豆瓣电影"; }
|
|
@Override public String getItemType() { return "Movie"; }
|
|
}
|
|
|
|
class DoubanMusicStrategy implements CrawlStrategy {
|
|
@Override
|
|
public List<Object> crawl(int pageCount) throws CrawlerException {
|
|
List<Object> musics = new ArrayList<>();
|
|
try {
|
|
for (int page = 0; page < pageCount && musics.size() < 35; page++) {
|
|
String url = "https://music.douban.com/chart";
|
|
System.out.println("正在爬取豆瓣音乐: " + url);
|
|
Document doc = HttpUtil.getDocument(url);
|
|
Elements musicElements = doc.select(".clearfix");
|
|
for (Element element : musicElements) {
|
|
Music music = new Music();
|
|
Element titleElement = element.selectFirst(".song-name a");
|
|
if (titleElement != null) { music.setTitle(titleElement.text().trim()); music.setUrl(titleElement.attr("href")); }
|
|
Element artistElement = element.selectFirst(".artist a");
|
|
if (artistElement != null) music.setArtist(artistElement.text().trim());
|
|
Element albumElement = element.selectFirst(".album a");
|
|
if (albumElement != null) music.setAlbum(albumElement.text().trim());
|
|
Element ratingElement = element.selectFirst(".rating_nums");
|
|
if (ratingElement != null) {
|
|
try { music.setRating(Double.parseDouble(ratingElement.text().trim())); }
|
|
catch (NumberFormatException e) { music.setRating(0.0); }
|
|
}
|
|
if (music.getTitle() != null && !music.getTitle().isEmpty()) musics.add(music);
|
|
if (musics.size() >= 35) break;
|
|
}
|
|
Thread.sleep(800);
|
|
}
|
|
} catch (IOException e) { throw new CrawlerException(ErrorCode.CONNECTION_ERROR, "连接豆瓣音乐失败", e); }
|
|
catch (InterruptedException e) { Thread.currentThread().interrupt(); throw new CrawlerException(ErrorCode.UNKNOWN_ERROR, "爬取被中断", e); }
|
|
if (musics.isEmpty()) throw new CrawlerException(ErrorCode.NO_RESULTS_ERROR, "未获取到豆瓣音乐数据");
|
|
return musics;
|
|
}
|
|
@Override public String getSourceName() { return "豆瓣音乐"; }
|
|
@Override public String getItemType() { return "Music"; }
|
|
}
|
|
|
|
class CsdnBlogStrategy implements CrawlStrategy {
|
|
@Override
|
|
public List<Object> crawl(int pageCount) throws CrawlerException {
|
|
List<Object> articles = new ArrayList<>();
|
|
try {
|
|
for (int page = 1; page <= pageCount && articles.size() < 35; page++) {
|
|
String url = "https://www.csdn.net/nav/java?page=" + page;
|
|
System.out.println("正在爬取CSDN博客: " + url);
|
|
Document doc = HttpUtil.getDocument(url);
|
|
Elements articleElements = doc.select(".list-item");
|
|
for (Element element : articleElements) {
|
|
Article article = new Article();
|
|
Element titleElement = element.selectFirst(".title a");
|
|
if (titleElement != null) { article.setTitle(titleElement.text().trim()); article.setUrl(titleElement.attr("href")); }
|
|
Element authorElement = element.selectFirst(".name a");
|
|
if (authorElement != null) article.setAuthor(authorElement.text().trim());
|
|
Element timeElement = element.selectFirst(".time");
|
|
if (timeElement != null) article.setPublishDate(timeElement.text().trim());
|
|
Element viewElement = element.selectFirst(".view");
|
|
if (viewElement != null) article.setViews(viewElement.text().trim());
|
|
if (article.getTitle() != null && !article.getTitle().isEmpty()) articles.add(article);
|
|
if (articles.size() >= 35) break;
|
|
}
|
|
Thread.sleep(1000);
|
|
}
|
|
} catch (IOException e) { throw new CrawlerException(ErrorCode.CONNECTION_ERROR, "连接CSDN失败", e); }
|
|
catch (InterruptedException e) { Thread.currentThread().interrupt(); throw new CrawlerException(ErrorCode.UNKNOWN_ERROR, "爬取被中断", e); }
|
|
if (articles.isEmpty()) throw new CrawlerException(ErrorCode.NO_RESULTS_ERROR, "未获取到CSDN文章数据");
|
|
return articles;
|
|
}
|
|
@Override public String getSourceName() { return "CSDN博客"; }
|
|
@Override public String getItemType() { return "Article"; }
|
|
}
|
|
|
|
// ==================== 4. 工具类 ====================
|
|
class HttpUtil {
|
|
private static final String[] USER_AGENTS = {
|
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
|
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2 Safari/605.1.15"
|
|
};
|
|
public static Document getDocument(String url) throws IOException {
|
|
String userAgent = USER_AGENTS[(int) (Math.random() * USER_AGENTS.length)];
|
|
return Jsoup.connect(url).userAgent(userAgent).timeout(15000).get();
|
|
}
|
|
}
|
|
|
|
// ==================== 5. Command模式 ====================
|
|
interface Command {
|
|
void execute();
|
|
String getCommandName();
|
|
}
|
|
|
|
class CrawlCommand implements Command {
|
|
private CrawlStrategy strategy;
|
|
private CLIView view;
|
|
private List<CrawlResult> results;
|
|
private int pageCount;
|
|
|
|
public CrawlCommand(CrawlStrategy strategy, CLIView view, List<CrawlResult> results, int pageCount) {
|
|
this.strategy = strategy; this.view = view; this.results = results; this.pageCount = pageCount;
|
|
}
|
|
|
|
@Override
|
|
public void execute() {
|
|
view.showMessage("开始爬取 " + strategy.getSourceName() + "...");
|
|
try {
|
|
long startTime = System.currentTimeMillis();
|
|
List<Object> items = strategy.crawl(pageCount);
|
|
long endTime = System.currentTimeMillis();
|
|
results.add(new CrawlResult(strategy.getSourceName(), items, endTime - startTime));
|
|
view.showMessage("爬取完成!获取 " + items.size() + " 条数据,耗时 " + (endTime - startTime) + "ms");
|
|
} catch (CrawlerException e) {
|
|
view.showError("爬取失败: " + e.getMessage());
|
|
}
|
|
}
|
|
|
|
@Override public String getCommandName() { return "CrawlCommand(" + strategy.getSourceName() + ")"; }
|
|
}
|
|
|
|
class SaveCommand implements Command {
|
|
private List<CrawlResult> results;
|
|
private CLIView view;
|
|
|
|
public SaveCommand(List<CrawlResult> results, CLIView view) {
|
|
this.results = results; this.view = view;
|
|
}
|
|
|
|
@Override
|
|
public void execute() {
|
|
if (results.isEmpty()) { view.showError("没有可保存的数据"); return; }
|
|
String fileName = "e:\\w14\\crawl_results_" + LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyyMMdd_HHmmss")) + ".txt";
|
|
try (BufferedWriter writer = new BufferedWriter(new FileWriter(fileName))) {
|
|
writer.write("爬虫结果报告"); writer.newLine();
|
|
writer.write("生成时间: " + LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss"))); writer.newLine();
|
|
writer.write("=".repeat(60)); writer.newLine();
|
|
|
|
for (CrawlResult result : results) {
|
|
writer.write("\n【来源】: " + result.getSource()); writer.newLine();
|
|
writer.write("【耗时】: " + result.getCrawlTime() + "ms"); writer.newLine();
|
|
writer.write("【数量】: " + result.getTotalCount() + " 条"); writer.newLine();
|
|
writer.write("-".repeat(60)); writer.newLine();
|
|
|
|
int index = 1;
|
|
for (Object item : result.getItems()) {
|
|
writer.write("\n" + index + ". ");
|
|
if (item instanceof Book) {
|
|
Book b = (Book) item;
|
|
writer.write(b.getTitle()); writer.newLine();
|
|
if (b.getAuthors() != null) writer.write(" 作者: " + b.getAuthors() + "\n");
|
|
if (b.getPublisher() != null) writer.write(" 出版社: " + b.getPublisher() + "\n");
|
|
if (b.getPublishDate() != null) writer.write(" 出版时间: " + b.getPublishDate() + "\n");
|
|
if (b.getRating() > 0) writer.write(" 评分: " + b.getRating() + "\n");
|
|
} else if (item instanceof Movie) {
|
|
Movie m = (Movie) item;
|
|
writer.write(m.getTitle()); writer.newLine();
|
|
if (m.getDirector() != null) writer.write(" 导演: " + m.getDirector() + "\n");
|
|
if (m.getActors() != null) writer.write(" 主演: " + m.getActors() + "\n");
|
|
if (m.getYear() != null) writer.write(" 年份: " + m.getYear() + "\n");
|
|
if (m.getRating() > 0) writer.write(" 评分: " + m.getRating() + "\n");
|
|
} else if (item instanceof Music) {
|
|
Music m = (Music) item;
|
|
writer.write(m.getTitle()); writer.newLine();
|
|
if (m.getArtist() != null) writer.write(" 歌手: " + m.getArtist() + "\n");
|
|
if (m.getAlbum() != null) writer.write(" 专辑: " + m.getAlbum() + "\n");
|
|
if (m.getRating() > 0) writer.write(" 评分: " + m.getRating() + "\n");
|
|
} else if (item instanceof Article) {
|
|
Article a = (Article) item;
|
|
writer.write(a.getTitle()); writer.newLine();
|
|
if (a.getAuthor() != null) writer.write(" 作者: " + a.getAuthor() + "\n");
|
|
if (a.getPublishDate() != null) writer.write(" 发布时间: " + a.getPublishDate() + "\n");
|
|
if (a.getViews() != null) writer.write(" 阅读量: " + a.getViews() + "\n");
|
|
}
|
|
index++;
|
|
}
|
|
writer.write("=".repeat(60)); writer.newLine();
|
|
}
|
|
view.showMessage("数据已保存到文件: " + fileName);
|
|
} catch (IOException e) {
|
|
view.showError("保存文件失败: " + e.getMessage());
|
|
}
|
|
}
|
|
|
|
@Override public String getCommandName() { return "SaveCommand"; }
|
|
}
|
|
|
|
// ==================== 6. MVC视图 (View) ====================
|
|
class CLIView {
|
|
private Scanner scanner;
|
|
public CLIView() { this.scanner = new Scanner(System.in); }
|
|
|
|
public int showMenu() {
|
|
System.out.println("\n========== 爬虫系统 v1.0 ==========");
|
|
System.out.println("1. 爬取豆瓣读书");
|
|
System.out.println("2. 爬取豆瓣电影");
|
|
System.out.println("3. 爬取豆瓣音乐");
|
|
System.out.println("4. 爬取CSDN博客");
|
|
System.out.println("5. 爬取所有网站");
|
|
System.out.println("6. 查看爬取结果");
|
|
System.out.println("7. 保存结果到文件");
|
|
System.out.println("0. 退出");
|
|
System.out.print("请输入选择: ");
|
|
try { return Integer.parseInt(scanner.nextLine()); } catch (NumberFormatException e) { return -1; }
|
|
}
|
|
|
|
public void showMessage(String message) { System.out.println("[信息] " + message); }
|
|
public void showError(String error) { System.err.println("[错误] " + error); }
|
|
|
|
public void displayResults(List<CrawlResult> results) {
|
|
if (results.isEmpty()) { System.out.println("暂无爬取结果"); return; }
|
|
System.out.println("\n========== 爬取结果 ==========");
|
|
for (CrawlResult result : results) {
|
|
System.out.println("\n【来源】: " + result.getSource());
|
|
System.out.println("【耗时】: " + result.getCrawlTime() + "ms | 【数量】: " + result.getTotalCount() + " 条");
|
|
System.out.println("-".repeat(50));
|
|
int index = 1;
|
|
for (Object item : result.getItems()) {
|
|
if (item instanceof Book) {
|
|
Book b = (Book) item;
|
|
System.out.println(index + ". " + b.getTitle());
|
|
if (b.getAuthors() != null) System.out.println(" 作者: " + b.getAuthors());
|
|
if (b.getRating() > 0) System.out.println(" 评分: " + b.getRating());
|
|
} else if (item instanceof Movie) {
|
|
Movie m = (Movie) item;
|
|
System.out.println(index + ". " + m.getTitle());
|
|
if (m.getDirector() != null) System.out.println(" 导演: " + m.getDirector());
|
|
if (m.getRating() > 0) System.out.println(" 评分: " + m.getRating());
|
|
} else if (item instanceof Music) {
|
|
Music m = (Music) item;
|
|
System.out.println(index + ". " + m.getTitle());
|
|
if (m.getArtist() != null) System.out.println(" 歌手: " + m.getArtist());
|
|
if (m.getRating() > 0) System.out.println(" 评分: " + m.getRating());
|
|
} else if (item instanceof Article) {
|
|
Article a = (Article) item;
|
|
System.out.println(index + ". " + a.getTitle());
|
|
if (a.getAuthor() != null) System.out.println(" 作者: " + a.getAuthor());
|
|
if (a.getViews() != null) System.out.println(" 阅读量: " + a.getViews());
|
|
}
|
|
if (index++ >= 10) { System.out.println(" ... (仅显示前10条)"); break; }
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// ==================== 7. MVC控制器 (Controller) ====================
|
|
class CrawlerController {
|
|
private CLIView view;
|
|
private List<CrawlResult> results;
|
|
|
|
public CrawlerController(CLIView view) {
|
|
this.view = view;
|
|
this.results = new ArrayList<>();
|
|
}
|
|
|
|
public void executeCrawlCommand(String source) {
|
|
List<Command> commands = new ArrayList<>();
|
|
int pageCount = 3; // 确保获取至少30条数据
|
|
|
|
switch (source.toLowerCase()) {
|
|
case "doubanbook": commands.add(new CrawlCommand(new DoubanBookStrategy(), view, results, pageCount)); break;
|
|
case "doubanmovie": commands.add(new CrawlCommand(new DoubanMovieStrategy(), view, results, pageCount)); break;
|
|
case "doubanmusic": commands.add(new CrawlCommand(new DoubanMusicStrategy(), view, results, pageCount)); break;
|
|
case "csdn": commands.add(new CrawlCommand(new CsdnBlogStrategy(), view, results, pageCount)); break;
|
|
case "all":
|
|
commands.add(new CrawlCommand(new DoubanBookStrategy(), view, results, pageCount));
|
|
commands.add(new CrawlCommand(new DoubanMovieStrategy(), view, results, pageCount));
|
|
commands.add(new CrawlCommand(new DoubanMusicStrategy(), view, results, pageCount));
|
|
commands.add(new CrawlCommand(new CsdnBlogStrategy(), view, results, pageCount));
|
|
break;
|
|
default: view.showError("未知的爬取源: " + source); return;
|
|
}
|
|
|
|
for (Command command : commands) {
|
|
command.execute();
|
|
}
|
|
}
|
|
|
|
public void saveResults() {
|
|
new SaveCommand(results, view).execute();
|
|
}
|
|
|
|
public List<CrawlResult> getResults() {
|
|
return new ArrayList<>(results);
|
|
}
|
|
}
|
|
|
|
// ==================== 8. 主程序入口 ====================
|
|
public class CrawlerApp {
|
|
public static void main(String[] args) {
|
|
CLIView view = new CLIView();
|
|
CrawlerController controller = new CrawlerController(view);
|
|
|
|
System.out.println("========== 爬虫系统 v1.0 自动运行模式 ==========");
|
|
System.out.println("开始爬取所有4个网站...\n");
|
|
|
|
controller.executeCrawlCommand("all");
|
|
|
|
System.out.println("\n========== 爬取结果汇总 ==========");
|
|
view.displayResults(controller.getResults());
|
|
|
|
System.out.println("\n========== 保存结果到文件 ==========");
|
|
controller.saveResults();
|
|
|
|
System.out.println("\n========== 程序结束 ==========");
|
|
}
|
|
}
|
|
|