Browse Source

上传文件至 'project/command'

main
LiuZihan 4 weeks ago
parent
commit
5063e2b8b5
  1. 47
      project/command/AutoCommand.java
  2. 8
      project/command/Command.java
  3. 197
      project/command/CrawlCommand.java
  4. 23
      project/command/ExitCommand.java
  5. 52
      project/command/ExportCommand.java

47
project/command/AutoCommand.java

@ -0,0 +1,47 @@
package com.example.moviecli.command;
import com.example.moviecli.repository.MovieRepository;
import com.example.moviecli.strategy.MovieStrategyFactory;
import com.example.moviecli.view.ConsoleView;
public class AutoCommand implements Command {
private final ConsoleView view;
private final MovieStrategyFactory factory;
private final CrawlCommand crawlCommand;
public AutoCommand(ConsoleView view, MovieStrategyFactory factory) {
this.view = view;
this.factory = factory;
this.crawlCommand = new CrawlCommand(view, factory);
}
@Override
public String getName() {
return "auto";
}
@Override
public void execute(String[] args, MovieRepository repository) {
view.printInfo("开始自动执行预设任务...");
// 1. 豆瓣电影 Top250
crawlCommand.execute(new String[]{"crawl", "https://movie.douban.com/top250"}, repository);
// 2. 新浪新闻(替代猫眼)
crawlCommand.execute(new String[]{"crawl", "https://news.sina.com.cn/"}, repository);
// 3. 豆瓣图书 Top50
crawlCommand.execute(new String[]{"crawl", "https://book.douban.com/top250"}, repository);
// 4. 列出所有数据
new ListCommand(view).execute(new String[]{"list"}, repository);
// 5. 统计评分分布
new StatCommand(view).execute(new String[]{"stat"}, repository);
// 6. 导出全部数据到 movies.csv
new ExportCommand(view).execute(new String[]{"export"}, repository);
view.printSuccess("自动任务执行完毕!已生成三个独立 CSV 文件及总文件 movies.csv。");
}
}

8
project/command/Command.java

@ -0,0 +1,8 @@
package com.example.moviecli.command;
import com.example.moviecli.repository.MovieRepository;
public interface Command {
String getName();
void execute(String[] args, MovieRepository repository);
}

197
project/command/CrawlCommand.java

@ -0,0 +1,197 @@
package com.example.moviecli.command;
import com.example.moviecli.model.Movie;
import com.example.moviecli.repository.MovieRepository;
import com.example.moviecli.strategy.MovieCrawlStrategy;
import com.example.moviecli.strategy.MovieStrategyFactory;
import com.example.moviecli.view.ConsoleView;
import com.example.moviecli.exception.CrawlFailedException;
import com.example.moviecli.exception.ParseFailedException;
import com.example.moviecli.exception.SaveFailedException;
import com.opencsv.CSVWriter;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import java.io.FileWriter;
import java.util.ArrayList;
import java.util.List;
public class CrawlCommand implements Command {
private final ConsoleView view;
private final MovieStrategyFactory factory;
public CrawlCommand(ConsoleView view, MovieStrategyFactory factory) {
this.view = view;
this.factory = factory;
}
@Override
public String getName() {
return "crawl";
}
@Override
public void execute(String[] args, MovieRepository repository) {
if (args.length < 2) {
view.printError("用法: crawl <url>");
view.printInfo("支持的 URL 示例:");
view.printInfo(" https://movie.douban.com/top250");
view.printInfo(" https://news.sina.com.cn/");
view.printInfo(" https://book.douban.com/top250");
return;
}
String url = args[1];
MovieCrawlStrategy strategy = factory.getStrategy(url);
if (strategy == null) {
view.printError("不支持该 URL 的爬取策略: " + url);
return;
}
if (url.contains("movie.douban.com/top250")) {
crawlDoubanTop250(strategy, repository);
} else if (url.contains("news.sina.com.cn")) {
crawlSinaNews(strategy, repository);
} else if (url.contains("book.douban.com/top250")) {
crawlDoubanBookTop50(strategy, repository);
} else {
crawlSinglePage(url, strategy, repository);
}
}
/** 豆瓣电影 Top250 -> douban_movies.csv */
private void crawlDoubanTop250(MovieCrawlStrategy strategy, MovieRepository repository) {
List<Movie> allMovies = new ArrayList<>();
int total = 0;
for (int start = 0; start < 250; start += 25) {
String pageUrl = "https://movie.douban.com/top250?start=" + start;
try {
view.printInfo("正在爬取: " + pageUrl);
Document doc = Jsoup.connect(pageUrl)
.userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36")
.timeout(15000)
.get();
List<Movie> pageMovies = strategy.parse(doc);
allMovies.addAll(pageMovies);
repository.addAll(pageMovies);
total += pageMovies.size();
view.printInfo("已累计爬取 " + total + " 条...");
Thread.sleep(1500);
} catch (ParseFailedException e) {
view.printError("解析失败: " + e.getMessage());
e.printStackTrace();
} catch (Exception e) {
CrawlFailedException ex = new CrawlFailedException("豆瓣电影爬取失败: " + pageUrl, e);
view.printError(ex.getMessage());
ex.printStackTrace();
}
}
view.printSuccess("豆瓣电影 Top250 全部爬取完成,共 " + total + " 条记录。");
saveToCsv(allMovies, "douban_movies.csv");
}
/** 新浪新闻首页 -> sina_news.csv */
private void crawlSinaNews(MovieCrawlStrategy strategy, MovieRepository repository) {
String url = "https://news.sina.com.cn/";
try {
view.printInfo("正在爬取新浪新闻: " + url);
Document doc = Jsoup.connect(url)
.userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36")
.timeout(15000)
.get();
List<Movie> news = strategy.parse(doc);
repository.addAll(news);
view.printSuccess("新浪新闻爬取完成,共 " + news.size() + " 条记录。");
saveToCsv(news, "sina_news.csv");
} catch (ParseFailedException e) {
view.printError("解析失败: " + e.getMessage());
e.printStackTrace();
} catch (Exception e) {
CrawlFailedException ex = new CrawlFailedException("新浪新闻爬取失败: " + url, e);
view.printError(ex.getMessage());
ex.printStackTrace();
}
}
/** 豆瓣图书 Top50 -> douban_books.csv */
private void crawlDoubanBookTop50(MovieCrawlStrategy strategy, MovieRepository repository) {
List<Movie> allMovies = new ArrayList<>();
int total = 0;
for (int start = 0; start < 50; start += 25) {
String pageUrl = "https://book.douban.com/top250?start=" + start;
try {
view.printInfo("正在爬取: " + pageUrl);
Document doc = Jsoup.connect(pageUrl)
.userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36")
.timeout(15000)
.get();
List<Movie> pageMovies = strategy.parse(doc);
allMovies.addAll(pageMovies);
repository.addAll(pageMovies);
total += pageMovies.size();
view.printInfo("已累计爬取 " + total + " 条...");
Thread.sleep(1500);
} catch (ParseFailedException e) {
view.printError("解析失败: " + e.getMessage());
e.printStackTrace();
} catch (Exception e) {
CrawlFailedException ex = new CrawlFailedException("豆瓣图书爬取失败: " + pageUrl, e);
view.printError(ex.getMessage());
ex.printStackTrace();
}
}
view.printSuccess("豆瓣图书 Top50 爬取完成,共 " + total + " 条记录。");
saveToCsv(allMovies, "douban_books.csv");
}
/** 单页兜底(未匹配的URL) */
private void crawlSinglePage(String url, MovieCrawlStrategy strategy, MovieRepository repository) {
List<Movie> allMovies = new ArrayList<>();
try {
view.printInfo("正在爬取: " + url);
Document doc = Jsoup.connect(url)
.userAgent("Mozilla/5.0")
.timeout(10000)
.get();
List<Movie> movies = strategy.parse(doc);
allMovies.addAll(movies);
repository.addAll(movies);
view.printSuccess("爬取完成!共 " + movies.size() + " 条记录。");
saveToCsv(allMovies, "unknown.csv");
} catch (ParseFailedException e) {
view.printError("解析失败: " + e.getMessage());
e.printStackTrace();
} catch (Exception e) {
CrawlFailedException ex = new CrawlFailedException("爬取失败: " + url, e);
view.printError(ex.getMessage());
ex.printStackTrace();
}
}
/** 保存电影/新闻列表到 CSV 文件 */
private void saveToCsv(List<Movie> items, String filename) {
if (items.isEmpty()) {
view.printInfo("没有数据可保存到 " + filename);
return;
}
try (CSVWriter writer = new CSVWriter(new FileWriter(filename))) {
String[] header = {"Rank", "Title", "OriginalTitle", "Score", "Year", "Director"};
writer.writeNext(header);
for (Movie m : items) {
String[] line = {
String.valueOf(m.getRank()),
m.getTitle(),
m.getOriginalTitle(),
m.getScore(),
m.getYear(),
m.getDirector()
};
writer.writeNext(line);
}
view.printSuccess("已保存 " + items.size() + " 条记录到 " + filename);
} catch (Exception e) {
SaveFailedException ex = new SaveFailedException("保存 " + filename + " 失败", e);
view.printError(ex.getMessage());
ex.printStackTrace();
}
}
}

23
project/command/ExitCommand.java

@ -0,0 +1,23 @@
package com.example.moviecli.command;
import com.example.moviecli.repository.MovieRepository;
import com.example.moviecli.view.ConsoleView;
public class ExitCommand implements Command {
private final ConsoleView view;
public ExitCommand(ConsoleView view) {
this.view = view;
}
@Override
public String getName() {
return "exit";
}
@Override
public void execute(String[] args, MovieRepository repository) {
view.printSuccess("再见!");
System.exit(0);
}
}

52
project/command/ExportCommand.java

@ -0,0 +1,52 @@
package com.example.moviecli.command;
import com.example.moviecli.model.Movie;
import com.example.moviecli.repository.MovieRepository;
import com.example.moviecli.view.ConsoleView;
import com.example.moviecli.exception.SaveFailedException;
import com.opencsv.CSVWriter;
import java.io.FileWriter;
import java.util.List;
public class ExportCommand implements Command {
private final ConsoleView view;
public ExportCommand(ConsoleView view) {
this.view = view;
}
@Override
public String getName() {
return "export";
}
@Override
public void execute(String[] args, MovieRepository repository) {
List<Movie> movies = repository.getAll();
if (movies.isEmpty()) {
view.printError("没有数据可导出。");
return;
}
try (CSVWriter writer = new CSVWriter(new FileWriter("movies.csv"))) {
String[] header = {"Rank", "Title", "OriginalTitle", "Score", "Year", "Director"};
writer.writeNext(header);
for (Movie m : movies) {
String[] line = {
String.valueOf(m.getRank()),
m.getTitle(),
m.getOriginalTitle(),
m.getScore(),
m.getYear(),
m.getDirector()
};
writer.writeNext(line);
}
view.printSuccess("导出成功:movies.csv");
} catch (Exception e) {
// 使用自定义异常包装原始异常
SaveFailedException ex = new SaveFailedException("导出CSV文件失败", e);
view.printError(ex.getMessage());
ex.printStackTrace(); // 打印堆栈,体现使用了自定义异常
}
}
}
Loading…
Cancel
Save