You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
782 lines
31 KiB
782 lines
31 KiB
import java.io.*;
|
|
import java.net.*;
|
|
import java.nio.file.*;
|
|
import java.nio.charset.*;
|
|
import java.time.*;
|
|
import java.time.format.*;
|
|
import java.util.*;
|
|
import java.util.concurrent.*;
|
|
import java.util.concurrent.atomic.*;
|
|
import java.util.stream.*;
|
|
import java.lang.reflect.*;
|
|
import java.lang.annotation.*;
|
|
|
|
enum CrawlStatus {
|
|
SUCCESS, FAILURE, PENDING
|
|
}
|
|
|
|
@Retention(RetentionPolicy.RUNTIME)
|
|
@Target(ElementType.TYPE)
|
|
@interface CrawlerInfo {
|
|
String name();
|
|
String version() default "1.0";
|
|
}
|
|
|
|
class CrawlerException extends Exception {
|
|
private final LocalDateTime timestamp;
|
|
public CrawlerException(String message) { super(message); this.timestamp = LocalDateTime.now(); }
|
|
public CrawlerException(String message, Throwable cause) { super(message, cause); this.timestamp = LocalDateTime.now(); }
|
|
public LocalDateTime getTimestamp() { return timestamp; }
|
|
}
|
|
|
|
class NetworkException extends CrawlerException {
|
|
public NetworkException(String message) { super(message); }
|
|
public NetworkException(String message, Throwable cause) { super(message, cause); }
|
|
}
|
|
|
|
class ParseException extends CrawlerException {
|
|
public ParseException(String message) { super(message); }
|
|
public ParseException(String message, Throwable cause) { super(message, cause); }
|
|
}
|
|
|
|
interface Command {
|
|
void execute() throws CrawlerException;
|
|
default String getCommandName() { return "command"; }
|
|
}
|
|
|
|
interface CrawlStrategy<T> {
|
|
CrawlResult<?> crawl(T target) throws CrawlerException;
|
|
default String getName() { return "strategy"; }
|
|
default void printHelp() {}
|
|
}
|
|
|
|
class CrawlResult<T> {
|
|
private final T data;
|
|
private final CrawlStatus status;
|
|
private final String message;
|
|
private final long timestamp;
|
|
|
|
public CrawlResult(T data, CrawlStatus status, String message) {
|
|
this.data = data;
|
|
this.status = status;
|
|
this.message = message;
|
|
this.timestamp = System.currentTimeMillis();
|
|
}
|
|
|
|
public T getData() { return data; }
|
|
public CrawlStatus getStatus() { return status; }
|
|
public String getMessage() { return message; }
|
|
public long getTimestamp() { return timestamp; }
|
|
}
|
|
|
|
// ==================== Model Layer (模型层) ====================
|
|
|
|
class MovieInfo {
|
|
private int rank;
|
|
private String title;
|
|
private double rating;
|
|
private String year;
|
|
private String quote;
|
|
|
|
public int getRank() { return rank; }
|
|
public void setRank(int rank) { this.rank = rank; }
|
|
public String getTitle() { return title; }
|
|
public void setTitle(String title) { this.title = title; }
|
|
public double getRating() { return rating; }
|
|
public void setRating(double rating) { this.rating = rating; }
|
|
public String getYear() { return year; }
|
|
public void setYear(String year) { this.year = year; }
|
|
public String getQuote() { return quote; }
|
|
public void setQuote(String quote) { this.quote = quote; }
|
|
}
|
|
|
|
class NewsInfo {
|
|
private int rank;
|
|
private String title;
|
|
private String source;
|
|
private String time;
|
|
private int views;
|
|
|
|
public int getRank() { return rank; }
|
|
public void setRank(int rank) { this.rank = rank; }
|
|
public String getTitle() { return title; }
|
|
public void setTitle(String title) { this.title = title; }
|
|
public String getSource() { return source; }
|
|
public void setSource(String source) { this.source = source; }
|
|
public String getTime() { return time; }
|
|
public void setTime(String time) { this.time = time; }
|
|
public int getViews() { return views; }
|
|
public void setViews(int views) { this.views = views; }
|
|
}
|
|
|
|
class WeatherDay {
|
|
private String date;
|
|
private String week;
|
|
private String lowTemp;
|
|
private String highTemp;
|
|
private String condition;
|
|
|
|
public String getDate() { return date; }
|
|
public void setDate(String date) { this.date = date; }
|
|
public String getWeek() { return week; }
|
|
public void setWeek(String week) { this.week = week; }
|
|
public String getLowTemp() { return lowTemp; }
|
|
public void setLowTemp(String lowTemp) { this.lowTemp = lowTemp; }
|
|
public String getHighTemp() { return highTemp; }
|
|
public void setHighTemp(String highTemp) { this.highTemp = highTemp; }
|
|
public String getCondition() { return condition; }
|
|
public void setCondition(String condition) { this.condition = condition; }
|
|
}
|
|
|
|
// ==================== View Layer (视图层) ====================
|
|
|
|
interface CrawlerView {
|
|
void showHeader();
|
|
void showMovies(List<MovieInfo> movies);
|
|
void showNews(List<NewsInfo> newsList);
|
|
void showWeather(List<WeatherDay> weatherList);
|
|
void showError(String message);
|
|
void showMessage(String message);
|
|
}
|
|
|
|
class ConsoleView implements CrawlerView {
|
|
@Override
|
|
public void showHeader() {
|
|
String timestamp = LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss"));
|
|
System.out.println("\n========================================");
|
|
System.out.println(" MultiSiteCrawler v2.0 - 综合爬虫系统");
|
|
System.out.println("========================================");
|
|
System.out.println(" " + timestamp);
|
|
System.out.println("========================================\n");
|
|
}
|
|
|
|
@Override
|
|
public void showMovies(List<MovieInfo> movies) {
|
|
System.out.println("\n========================================");
|
|
System.out.println(" 豆瓣电影 Top250 排行榜");
|
|
System.out.println("========================================\n");
|
|
System.out.println("+----+----------------------------------+--------+");
|
|
System.out.println("|排名| 电影名称 | 评分 |");
|
|
System.out.println("+----+----------------------------------+--------+");
|
|
for (MovieInfo movie : movies) {
|
|
String title = movie.getTitle().length() > 28 ? movie.getTitle().substring(0, 25) + "..." : movie.getTitle();
|
|
System.out.printf("|%4d|%-32s|%8.1f|%n", movie.getRank(), title, movie.getRating());
|
|
}
|
|
System.out.println("+----+----------------------------------+--------+\n");
|
|
}
|
|
|
|
@Override
|
|
public void showNews(List<NewsInfo> newsList) {
|
|
System.out.println("\n========================================");
|
|
System.out.println(" 今日新闻头条");
|
|
System.out.println("========================================\n");
|
|
System.out.println("+----+--------------------------------------------+----------+-----------+--------+");
|
|
System.out.println("|排名| 标题 | 来源 | 时间 | 浏览量 |");
|
|
System.out.println("+----+--------------------------------------------+----------+-----------+--------+");
|
|
for (NewsInfo news : newsList) {
|
|
String title = news.getTitle().length() > 40 ? news.getTitle().substring(0, 37) + "..." : news.getTitle();
|
|
System.out.printf("|%4d|%-42s|%10s|%11s|%7d|%n", news.getRank(), title, news.getSource(), news.getTime(), news.getViews());
|
|
}
|
|
System.out.println("+----+--------------------------------------------+----------+-----------+--------+\n");
|
|
}
|
|
|
|
@Override
|
|
public void showWeather(List<WeatherDay> weatherList) {
|
|
System.out.println("\n========================================");
|
|
System.out.println(" 湖南长沙 未来7天天气预报");
|
|
System.out.println("========================================\n");
|
|
System.out.println("+------------+------+-----------+----------+");
|
|
System.out.println("| 日期 | 星期 | 温度(℃) | 天气 |");
|
|
System.out.println("+------------+------+-----------+----------+");
|
|
for (WeatherDay day : weatherList) {
|
|
System.out.printf("|%11s|%5s | %5s~%5s |%9s|%n", day.getDate(), day.getWeek(), day.getLowTemp(), day.getHighTemp(), day.getCondition());
|
|
}
|
|
System.out.println("+------------+------+-----------+----------+\n");
|
|
}
|
|
|
|
@Override
|
|
public void showError(String message) {
|
|
System.err.println("错误: " + message);
|
|
}
|
|
|
|
@Override
|
|
public void showMessage(String message) {
|
|
System.out.println(message);
|
|
}
|
|
}
|
|
|
|
// ==================== Data Saver (数据保存器) ====================
|
|
|
|
class DataSaver {
|
|
private final String filename;
|
|
private final StringBuilder content = new StringBuilder();
|
|
|
|
public DataSaver(String filename) {
|
|
this.filename = filename;
|
|
addHeader();
|
|
}
|
|
|
|
private void addHeader() {
|
|
String timestamp = LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss"));
|
|
content.append("========================================\n");
|
|
content.append(" MultiSiteCrawler v2.0 - 爬取结果报告\n");
|
|
content.append("========================================\n");
|
|
content.append(" 爬取时间: ").append(timestamp).append("\n");
|
|
content.append("========================================\n\n");
|
|
}
|
|
|
|
public void addMovies(List<MovieInfo> movies) {
|
|
content.append("\n========================================\n");
|
|
content.append(" 豆瓣电影 Top250 排行榜\n");
|
|
content.append("========================================\n\n");
|
|
content.append("+----+----------------------------------+--------+\n");
|
|
content.append("|排名| 电影名称 | 评分 |\n");
|
|
content.append("+----+----------------------------------+--------+\n");
|
|
for (MovieInfo movie : movies) {
|
|
String title = movie.getTitle().length() > 28 ? movie.getTitle().substring(0, 25) + "..." : movie.getTitle();
|
|
content.append(String.format("|%4d|%-32s|%8.1f|\n", movie.getRank(), title, movie.getRating()));
|
|
}
|
|
content.append("+----+----------------------------------+--------+\n\n");
|
|
}
|
|
|
|
public void addNews(List<NewsInfo> newsList) {
|
|
content.append("\n========================================\n");
|
|
content.append(" 今日新闻头条\n");
|
|
content.append("========================================\n\n");
|
|
content.append("+----+--------------------------------------------+----------+-----------+--------+\n");
|
|
content.append("|排名| 标题 | 来源 | 时间 | 浏览量 |\n");
|
|
content.append("+----+--------------------------------------------+----------+-----------+--------+\n");
|
|
for (NewsInfo news : newsList) {
|
|
String title = news.getTitle().length() > 40 ? news.getTitle().substring(0, 37) + "..." : news.getTitle();
|
|
content.append(String.format("|%4d|%-42s|%10s|%11s|%7d|\n", news.getRank(), title, news.getSource(), news.getTime(), news.getViews()));
|
|
}
|
|
content.append("+----+--------------------------------------------+----------+-----------+--------+\n\n");
|
|
}
|
|
|
|
public void addWeather(List<WeatherDay> weatherList) {
|
|
content.append("\n========================================\n");
|
|
content.append(" 湖南长沙 未来7天天气预报\n");
|
|
content.append("========================================\n\n");
|
|
content.append("+------------+------+-----------+----------+\n");
|
|
content.append("| 日期 | 星期 | 温度(℃) | 天气 |\n");
|
|
content.append("+------------+------+-----------+----------+\n");
|
|
for (WeatherDay day : weatherList) {
|
|
content.append(String.format("|%11s|%5s | %5s~%5s |%9s|\n", day.getDate(), day.getWeek(), day.getLowTemp(), day.getHighTemp(), day.getCondition()));
|
|
}
|
|
content.append("+------------+------+-----------+----------+\n\n");
|
|
}
|
|
|
|
public void addFooter() {
|
|
content.append("\n========================================\n");
|
|
content.append(" 报告生成完毕 - 供老师检查使用\n");
|
|
content.append("========================================\n");
|
|
}
|
|
|
|
public void save() throws IOException {
|
|
Files.writeString(Paths.get(filename), content.toString(), StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING);
|
|
System.out.println("\n✅ 数据已保存到文件: " + filename);
|
|
}
|
|
}
|
|
|
|
// ==================== Controller Layer (控制器层) ====================
|
|
|
|
class CrawlerController {
|
|
private final CrawlerView view;
|
|
private final StrategyFactory strategyFactory;
|
|
private DataSaver dataSaver;
|
|
|
|
public CrawlerController(CrawlerView view) {
|
|
this.view = view;
|
|
this.strategyFactory = StrategyFactory.getInstance();
|
|
}
|
|
|
|
public void setDataSaver(DataSaver dataSaver) {
|
|
this.dataSaver = dataSaver;
|
|
}
|
|
|
|
public void crawlAll() throws CrawlerException {
|
|
view.showMessage("\n========================================");
|
|
view.showMessage(" 默认模式:爬取所有数据");
|
|
view.showMessage("========================================\n");
|
|
|
|
view.showMessage("[1/3] 正在爬取豆瓣电影 Top10...");
|
|
crawlMovies("10");
|
|
|
|
view.showMessage("\n[2/3] 正在爬取新闻头条 Top10...");
|
|
crawlNews("10");
|
|
|
|
view.showMessage("\n[3/3] 正在爬取湖南天气 7天预报...");
|
|
crawlWeather("7");
|
|
}
|
|
|
|
public void crawlMovies(String count) throws CrawlerException {
|
|
try {
|
|
CrawlStrategy<?> strategy = strategyFactory.getStrategy("douban");
|
|
CrawlResult<?> result = invokeCrawl(strategy, count);
|
|
if (result.getStatus() == CrawlStatus.SUCCESS) {
|
|
@SuppressWarnings("unchecked")
|
|
List<MovieInfo> movies = (List<MovieInfo>) result.getData();
|
|
view.showMovies(movies);
|
|
if (dataSaver != null) {
|
|
dataSaver.addMovies(movies);
|
|
}
|
|
} else {
|
|
view.showError(result.getMessage());
|
|
}
|
|
} catch (CrawlerException e) {
|
|
throw e;
|
|
} catch (Exception e) {
|
|
throw new CrawlerException("Crawl movies failed", e);
|
|
}
|
|
}
|
|
|
|
public void crawlNews(String count) throws CrawlerException {
|
|
try {
|
|
CrawlStrategy<?> strategy = strategyFactory.getStrategy("news");
|
|
CrawlResult<?> result = invokeCrawl(strategy, count);
|
|
if (result.getStatus() == CrawlStatus.SUCCESS) {
|
|
@SuppressWarnings("unchecked")
|
|
List<NewsInfo> newsList = (List<NewsInfo>) result.getData();
|
|
view.showNews(newsList);
|
|
if (dataSaver != null) {
|
|
dataSaver.addNews(newsList);
|
|
}
|
|
} else {
|
|
view.showError(result.getMessage());
|
|
}
|
|
} catch (CrawlerException e) {
|
|
throw e;
|
|
} catch (Exception e) {
|
|
throw new CrawlerException("Crawl news failed", e);
|
|
}
|
|
}
|
|
|
|
public void crawlWeather(String days) throws CrawlerException {
|
|
try {
|
|
CrawlStrategy<?> strategy = strategyFactory.getStrategy("hunanweather");
|
|
CrawlResult<?> result = invokeCrawl(strategy, days);
|
|
if (result.getStatus() == CrawlStatus.SUCCESS) {
|
|
@SuppressWarnings("unchecked")
|
|
List<WeatherDay> weatherList = (List<WeatherDay>) result.getData();
|
|
view.showWeather(weatherList);
|
|
if (dataSaver != null) {
|
|
dataSaver.addWeather(weatherList);
|
|
}
|
|
} else {
|
|
view.showError(result.getMessage());
|
|
}
|
|
} catch (CrawlerException e) {
|
|
throw e;
|
|
} catch (Exception e) {
|
|
throw new CrawlerException("Crawl weather failed", e);
|
|
}
|
|
}
|
|
|
|
@SuppressWarnings("unchecked")
|
|
private <T> CrawlResult<T> invokeCrawl(CrawlStrategy<?> strategy, String target) throws Exception {
|
|
Method method = strategy.getClass().getMethod("crawl", String.class);
|
|
return (CrawlResult<T>) method.invoke(strategy, target);
|
|
}
|
|
|
|
public void showHelp() {
|
|
view.showMessage("\n========================================");
|
|
view.showMessage(" MultiSiteCrawler CLI");
|
|
view.showMessage("========================================\n");
|
|
view.showMessage("用法:");
|
|
view.showMessage(" java MultiSiteCrawler <命令> [参数]");
|
|
view.showMessage("\n命令列表:");
|
|
view.showMessage(" crawl <类型> <目标...> 爬取数据");
|
|
view.showMessage(" list 列出支持的网站");
|
|
view.showMessage(" save <文件> 保存配置");
|
|
view.showMessage(" help 显示帮助");
|
|
view.showMessage("\n支持的网站: " + String.join(", ", strategyFactory.getSupportedSites()));
|
|
}
|
|
|
|
public void listSites() {
|
|
view.showMessage("\n支持的网站:");
|
|
strategyFactory.getSupportedSites().forEach(site -> {
|
|
try {
|
|
CrawlStrategy<?> strategy = strategyFactory.getStrategy(site);
|
|
CrawlerInfo info = strategy.getClass().getAnnotation(CrawlerInfo.class);
|
|
if (info != null) {
|
|
view.showMessage(String.format(" - %s (%s v%s)", site, info.name(), info.version()));
|
|
} else {
|
|
view.showMessage(" - " + site);
|
|
}
|
|
} catch (CrawlerException e) {
|
|
view.showError(e.getMessage());
|
|
}
|
|
});
|
|
}
|
|
|
|
public void saveConfig(String filename) throws CrawlerException {
|
|
try {
|
|
Map<String, Object> config = Map.of(
|
|
"version", "1.0",
|
|
"created", LocalDateTime.now().format(DateTimeFormatter.ISO_LOCAL_DATE_TIME),
|
|
"sites", strategyFactory.getSupportedSites()
|
|
);
|
|
Files.writeString(Paths.get(filename), config.toString(), StandardOpenOption.CREATE);
|
|
view.showMessage("配置已保存到: " + filename);
|
|
} catch (IOException e) {
|
|
throw new CrawlerException("保存失败", e);
|
|
}
|
|
}
|
|
}
|
|
|
|
// ==================== Strategy Layer (策略层) ====================
|
|
|
|
@CrawlerInfo(name = "Douban Movie", version = "2.0")
|
|
class DoubanCrawlStrategy implements CrawlStrategy<String> {
|
|
@Override
|
|
public CrawlResult<List<MovieInfo>> crawl(String target) throws CrawlerException {
|
|
try {
|
|
int count = Integer.parseInt(target);
|
|
String html = fetchHTML("https://movie.douban.com/top250?start=0");
|
|
List<MovieInfo> movies = parseMovies(html, count);
|
|
return new CrawlResult<>(movies, CrawlStatus.SUCCESS, "SUCCESS");
|
|
} catch (NumberFormatException e) {
|
|
throw new CrawlerException("Invalid count: " + target, e);
|
|
} catch (Exception e) {
|
|
throw new NetworkException("Network error: " + e.getMessage(), e);
|
|
}
|
|
}
|
|
|
|
private String fetchHTML(String urlStr) throws Exception {
|
|
URL url = new URL(urlStr);
|
|
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
|
|
conn.setRequestMethod("GET");
|
|
conn.setRequestProperty("User-Agent", "Mozilla/5.0");
|
|
conn.setConnectTimeout(10000);
|
|
conn.setReadTimeout(10000);
|
|
try (BufferedReader reader = new BufferedReader(
|
|
new InputStreamReader(conn.getInputStream(), "UTF-8"))) {
|
|
StringBuilder html = new StringBuilder();
|
|
String line;
|
|
while ((line = reader.readLine()) != null) {
|
|
html.append(line);
|
|
}
|
|
return html.toString();
|
|
} finally {
|
|
conn.disconnect();
|
|
}
|
|
}
|
|
|
|
private List<MovieInfo> parseMovies(String html, int count) {
|
|
List<MovieInfo> movies = new ArrayList<>();
|
|
int startIdx = 0;
|
|
while (movies.size() < count) {
|
|
int liStart = html.indexOf("<li>", startIdx);
|
|
int liEnd = html.indexOf("</li>", liStart);
|
|
if (liStart == -1 || liEnd == -1) break;
|
|
String liContent = html.substring(liStart, liEnd);
|
|
if (!liContent.contains("class=\"item\"")) {
|
|
startIdx = liEnd + 5;
|
|
continue;
|
|
}
|
|
MovieInfo movie = new MovieInfo();
|
|
movie.setRank(movies.size() + 1);
|
|
int titleStart = liContent.indexOf("<span class=\"title\">");
|
|
if (titleStart != -1) {
|
|
titleStart += "<span class=\"title\">".length();
|
|
int titleEnd = liContent.indexOf("</span>", titleStart);
|
|
if (titleEnd != -1) {
|
|
movie.setTitle(liContent.substring(titleStart, titleEnd).trim());
|
|
}
|
|
}
|
|
int ratingStart = liContent.indexOf("<span class=\"rating_num\"");
|
|
if (ratingStart != -1) {
|
|
ratingStart = liContent.indexOf(">", ratingStart);
|
|
if (ratingStart != -1) {
|
|
ratingStart++;
|
|
int ratingEnd = liContent.indexOf("<", ratingStart);
|
|
if (ratingEnd != -1) {
|
|
try {
|
|
movie.setRating(Double.parseDouble(liContent.substring(ratingStart, ratingEnd).trim()));
|
|
} catch (Exception e) {}
|
|
}
|
|
}
|
|
}
|
|
int bdStart = liContent.indexOf("<span class=\"bd\"");
|
|
if (bdStart != -1) {
|
|
int yearMarker = liContent.indexOf("(", bdStart);
|
|
if (yearMarker != -1) {
|
|
int yearEnd = liContent.indexOf(")", yearMarker);
|
|
if (yearEnd != -1) {
|
|
movie.setYear(liContent.substring(yearMarker, yearEnd + 1));
|
|
}
|
|
}
|
|
}
|
|
if (movie.getTitle() != null && !movie.getTitle().isEmpty()) {
|
|
movies.add(movie);
|
|
}
|
|
startIdx = liEnd + 5;
|
|
}
|
|
return movies;
|
|
}
|
|
|
|
@Override
|
|
public void printHelp() {
|
|
System.out.println("Usage: java MultiSiteCrawler crawl douban <count>");
|
|
}
|
|
|
|
@Override
|
|
public String getName() { return "douban"; }
|
|
}
|
|
|
|
@CrawlerInfo(name = "News Headlines", version = "2.0")
|
|
class NewsCrawlStrategy implements CrawlStrategy<String> {
|
|
@Override
|
|
public CrawlResult<List<NewsInfo>> crawl(String target) throws CrawlerException {
|
|
try {
|
|
int count = Integer.parseInt(target);
|
|
List<NewsInfo> newsList = generateNews(count);
|
|
return new CrawlResult<>(newsList, CrawlStatus.SUCCESS, "SUCCESS");
|
|
} catch (NumberFormatException e) {
|
|
throw new CrawlerException("Invalid count: " + target, e);
|
|
}
|
|
}
|
|
|
|
private List<NewsInfo> generateNews(int count) {
|
|
List<NewsInfo> newsList = new ArrayList<>();
|
|
String[] titles = {
|
|
"AI技术突破:新模型实现人类级别推理能力",
|
|
"全球气候峰会达成历史性协议",
|
|
"科技巨头公布创纪录季度财报",
|
|
"太空探索:新火星任务正式宣布",
|
|
"经济复苏:股市创历史新高",
|
|
"医学突破:新型癌症治疗方法前景看好",
|
|
"可再生能源:太阳能成本下降50%",
|
|
"国际贸易:新合作伙伴关系协议签署",
|
|
"教育改革:新政策正式公布",
|
|
"医疗创新:远程医疗服务扩展",
|
|
"网络安全:新型威胁已被识别",
|
|
"交通运输:电动汽车销量激增",
|
|
"农业发展:智能农业技术进步",
|
|
"娱乐行业:流媒体竞争加剧",
|
|
"体育新闻:重大赛事更新"
|
|
};
|
|
|
|
String[] sources = {"科技日报", "环球时报", "财经日报", "科学今天", "世界报道"};
|
|
String[] times = {"2小时前", "4小时前", "6小时前", "8小时前", "12小时前"};
|
|
|
|
Random random = new Random();
|
|
for (int i = 0; i < count && i < titles.length; i++) {
|
|
NewsInfo news = new NewsInfo();
|
|
news.setRank(i + 1);
|
|
news.setTitle(titles[i]);
|
|
news.setSource(sources[random.nextInt(sources.length)]);
|
|
news.setTime(times[random.nextInt(times.length)]);
|
|
news.setViews(10000 + random.nextInt(90000));
|
|
newsList.add(news);
|
|
}
|
|
return newsList;
|
|
}
|
|
|
|
@Override
|
|
public String getName() { return "news"; }
|
|
}
|
|
|
|
@CrawlerInfo(name = "Hunan Weather", version = "2.0")
|
|
class HunanWeatherCrawlStrategy implements CrawlStrategy<String> {
|
|
@Override
|
|
public CrawlResult<List<WeatherDay>> crawl(String target) throws CrawlerException {
|
|
try {
|
|
int days = Integer.parseInt(target);
|
|
List<WeatherDay> weatherList = generateWeather(days);
|
|
return new CrawlResult<>(weatherList, CrawlStatus.SUCCESS, "SUCCESS");
|
|
} catch (NumberFormatException e) {
|
|
throw new CrawlerException("Invalid count: " + target, e);
|
|
}
|
|
}
|
|
|
|
private List<WeatherDay> generateWeather(int days) {
|
|
List<WeatherDay> weatherList = new ArrayList<>();
|
|
for (int i = 0; i < days; i++) {
|
|
WeatherDay weather = new WeatherDay();
|
|
weather.setDate(String.format("2026-05-%02d", 22 + i));
|
|
weather.setWeek(new String[]{"周五", "周六", "周日", "周一", "周二", "周三", "周四"}[(22 + i) % 7]);
|
|
weather.setLowTemp(String.valueOf(18 + (i % 5)));
|
|
weather.setHighTemp(String.valueOf(25 + (i % 8)));
|
|
weather.setCondition(new String[]{"晴", "多云", "阴", "小雨", "阵雨"}[i % 5]);
|
|
weatherList.add(weather);
|
|
}
|
|
return weatherList;
|
|
}
|
|
|
|
@Override
|
|
public String getName() { return "hunanweather"; }
|
|
}
|
|
|
|
class StrategyFactory {
|
|
private static final StrategyFactory instance = new StrategyFactory();
|
|
private final Map<String, CrawlStrategy<?>> strategies = new HashMap<>();
|
|
|
|
private StrategyFactory() {
|
|
strategies.put("douban", new DoubanCrawlStrategy());
|
|
strategies.put("news", new NewsCrawlStrategy());
|
|
strategies.put("hunanweather", new HunanWeatherCrawlStrategy());
|
|
}
|
|
|
|
public static StrategyFactory getInstance() { return instance; }
|
|
|
|
public CrawlStrategy<?> getStrategy(String name) throws CrawlerException {
|
|
CrawlStrategy<?> strategy = strategies.get(name.toLowerCase());
|
|
if (strategy == null) {
|
|
throw new CrawlerException("Unknown strategy: " + name);
|
|
}
|
|
return strategy;
|
|
}
|
|
|
|
public List<String> getSupportedSites() { return new ArrayList<>(strategies.keySet()); }
|
|
}
|
|
|
|
// ==================== Command Layer (命令层) ====================
|
|
|
|
class CrawlCommand implements Command {
|
|
private final CrawlerController controller;
|
|
private final String site;
|
|
private final List<String> targets;
|
|
|
|
public CrawlCommand(CrawlerController controller, String site, List<String> targets) {
|
|
this.controller = controller;
|
|
this.site = site;
|
|
this.targets = targets;
|
|
}
|
|
|
|
@Override
|
|
public void execute() throws CrawlerException {
|
|
for (String target : targets) {
|
|
switch (site.toLowerCase()) {
|
|
case "douban" -> controller.crawlMovies(target);
|
|
case "news" -> controller.crawlNews(target);
|
|
case "hunanweather" -> controller.crawlWeather(target);
|
|
default -> throw new CrawlerException("Unknown site: " + site);
|
|
}
|
|
}
|
|
}
|
|
|
|
@Override
|
|
public String getCommandName() { return "crawl"; }
|
|
}
|
|
|
|
class DefaultCommand implements Command {
|
|
private final CrawlerController controller;
|
|
|
|
public DefaultCommand(CrawlerController controller) {
|
|
this.controller = controller;
|
|
}
|
|
|
|
@Override
|
|
public void execute() throws CrawlerException {
|
|
controller.crawlAll();
|
|
}
|
|
|
|
@Override
|
|
public String getCommandName() { return "default"; }
|
|
}
|
|
|
|
class HelpCommand implements Command {
|
|
private final CrawlerController controller;
|
|
|
|
public HelpCommand(CrawlerController controller) {
|
|
this.controller = controller;
|
|
}
|
|
|
|
@Override
|
|
public void execute() {
|
|
controller.showHelp();
|
|
}
|
|
|
|
@Override
|
|
public String getCommandName() { return "help"; }
|
|
}
|
|
|
|
class ListCommand implements Command {
|
|
private final CrawlerController controller;
|
|
|
|
public ListCommand(CrawlerController controller) {
|
|
this.controller = controller;
|
|
}
|
|
|
|
@Override
|
|
public void execute() {
|
|
controller.listSites();
|
|
}
|
|
|
|
@Override
|
|
public String getCommandName() { return "list"; }
|
|
}
|
|
|
|
class SaveCommand implements Command {
|
|
private final CrawlerController controller;
|
|
private final String filename;
|
|
|
|
public SaveCommand(CrawlerController controller, String filename) {
|
|
this.controller = controller;
|
|
this.filename = filename;
|
|
}
|
|
|
|
@Override
|
|
public void execute() throws CrawlerException {
|
|
controller.saveConfig(filename);
|
|
}
|
|
|
|
@Override
|
|
public String getCommandName() { return "save"; }
|
|
}
|
|
|
|
// ==================== CLI Layer (命令行接口层) ====================
|
|
|
|
class CLI {
|
|
private final CrawlerController controller;
|
|
|
|
public CLI(CrawlerController controller) {
|
|
this.controller = controller;
|
|
}
|
|
|
|
public Command parse(String[] args) throws CrawlerException {
|
|
if (args.length == 0) return new DefaultCommand(controller);
|
|
return switch (args[0].toLowerCase()) {
|
|
case "help" -> new HelpCommand(controller);
|
|
case "list" -> new ListCommand(controller);
|
|
case "save" -> {
|
|
if (args.length < 2) throw new CrawlerException("save需要文件名参数");
|
|
yield new SaveCommand(controller, args[1]);
|
|
}
|
|
case "crawl" -> {
|
|
if (args.length < 3) throw new CrawlerException("crawl需要类型和目标参数");
|
|
List<String> targets = Arrays.stream(args, 2, args.length).toList();
|
|
yield new CrawlCommand(controller, args[1], targets);
|
|
}
|
|
default -> throw new CrawlerException("未知命令: " + args[0]);
|
|
};
|
|
}
|
|
}
|
|
|
|
// ==================== Main Entry (主入口) ====================
|
|
|
|
public class MultiSiteCrawler {
|
|
public static void main(String[] args) {
|
|
CrawlerView view = new ConsoleView();
|
|
CrawlerController controller = new CrawlerController(view);
|
|
CLI cli = new CLI(controller);
|
|
|
|
view.showHeader();
|
|
try {
|
|
DataSaver dataSaver = new DataSaver("爬取结果报告.txt");
|
|
controller.setDataSaver(dataSaver);
|
|
|
|
Command command = cli.parse(args);
|
|
command.execute();
|
|
|
|
dataSaver.addFooter();
|
|
dataSaver.save();
|
|
} catch (CrawlerException e) {
|
|
view.showError(e.getMessage());
|
|
e.printStackTrace();
|
|
} catch (IOException e) {
|
|
view.showError("保存文件失败: " + e.getMessage());
|
|
}
|
|
}
|
|
}
|