3 changed files with 849 additions and 0 deletions
Binary file not shown.
@ -0,0 +1,782 @@ |
|||
import java.io.*; |
|||
import java.net.*; |
|||
import java.nio.file.*; |
|||
import java.nio.charset.*; |
|||
import java.time.*; |
|||
import java.time.format.*; |
|||
import java.util.*; |
|||
import java.util.concurrent.*; |
|||
import java.util.concurrent.atomic.*; |
|||
import java.util.stream.*; |
|||
import java.lang.reflect.*; |
|||
import java.lang.annotation.*; |
|||
|
|||
enum CrawlStatus { |
|||
SUCCESS, FAILURE, PENDING |
|||
} |
|||
|
|||
@Retention(RetentionPolicy.RUNTIME) |
|||
@Target(ElementType.TYPE) |
|||
@interface CrawlerInfo { |
|||
String name(); |
|||
String version() default "1.0"; |
|||
} |
|||
|
|||
class CrawlerException extends Exception { |
|||
private final LocalDateTime timestamp; |
|||
public CrawlerException(String message) { super(message); this.timestamp = LocalDateTime.now(); } |
|||
public CrawlerException(String message, Throwable cause) { super(message, cause); this.timestamp = LocalDateTime.now(); } |
|||
public LocalDateTime getTimestamp() { return timestamp; } |
|||
} |
|||
|
|||
class NetworkException extends CrawlerException { |
|||
public NetworkException(String message) { super(message); } |
|||
public NetworkException(String message, Throwable cause) { super(message, cause); } |
|||
} |
|||
|
|||
class ParseException extends CrawlerException { |
|||
public ParseException(String message) { super(message); } |
|||
public ParseException(String message, Throwable cause) { super(message, cause); } |
|||
} |
|||
|
|||
interface Command { |
|||
void execute() throws CrawlerException; |
|||
default String getCommandName() { return "command"; } |
|||
} |
|||
|
|||
interface CrawlStrategy<T> { |
|||
CrawlResult<?> crawl(T target) throws CrawlerException; |
|||
default String getName() { return "strategy"; } |
|||
default void printHelp() {} |
|||
} |
|||
|
|||
class CrawlResult<T> { |
|||
private final T data; |
|||
private final CrawlStatus status; |
|||
private final String message; |
|||
private final long timestamp; |
|||
|
|||
public CrawlResult(T data, CrawlStatus status, String message) { |
|||
this.data = data; |
|||
this.status = status; |
|||
this.message = message; |
|||
this.timestamp = System.currentTimeMillis(); |
|||
} |
|||
|
|||
public T getData() { return data; } |
|||
public CrawlStatus getStatus() { return status; } |
|||
public String getMessage() { return message; } |
|||
public long getTimestamp() { return timestamp; } |
|||
} |
|||
|
|||
// ==================== Model Layer (模型层) ====================
|
|||
|
|||
class MovieInfo { |
|||
private int rank; |
|||
private String title; |
|||
private double rating; |
|||
private String year; |
|||
private String quote; |
|||
|
|||
public int getRank() { return rank; } |
|||
public void setRank(int rank) { this.rank = rank; } |
|||
public String getTitle() { return title; } |
|||
public void setTitle(String title) { this.title = title; } |
|||
public double getRating() { return rating; } |
|||
public void setRating(double rating) { this.rating = rating; } |
|||
public String getYear() { return year; } |
|||
public void setYear(String year) { this.year = year; } |
|||
public String getQuote() { return quote; } |
|||
public void setQuote(String quote) { this.quote = quote; } |
|||
} |
|||
|
|||
class NewsInfo { |
|||
private int rank; |
|||
private String title; |
|||
private String source; |
|||
private String time; |
|||
private int views; |
|||
|
|||
public int getRank() { return rank; } |
|||
public void setRank(int rank) { this.rank = rank; } |
|||
public String getTitle() { return title; } |
|||
public void setTitle(String title) { this.title = title; } |
|||
public String getSource() { return source; } |
|||
public void setSource(String source) { this.source = source; } |
|||
public String getTime() { return time; } |
|||
public void setTime(String time) { this.time = time; } |
|||
public int getViews() { return views; } |
|||
public void setViews(int views) { this.views = views; } |
|||
} |
|||
|
|||
class WeatherDay { |
|||
private String date; |
|||
private String week; |
|||
private String lowTemp; |
|||
private String highTemp; |
|||
private String condition; |
|||
|
|||
public String getDate() { return date; } |
|||
public void setDate(String date) { this.date = date; } |
|||
public String getWeek() { return week; } |
|||
public void setWeek(String week) { this.week = week; } |
|||
public String getLowTemp() { return lowTemp; } |
|||
public void setLowTemp(String lowTemp) { this.lowTemp = lowTemp; } |
|||
public String getHighTemp() { return highTemp; } |
|||
public void setHighTemp(String highTemp) { this.highTemp = highTemp; } |
|||
public String getCondition() { return condition; } |
|||
public void setCondition(String condition) { this.condition = condition; } |
|||
} |
|||
|
|||
// ==================== View Layer (视图层) ====================
|
|||
|
|||
interface CrawlerView { |
|||
void showHeader(); |
|||
void showMovies(List<MovieInfo> movies); |
|||
void showNews(List<NewsInfo> newsList); |
|||
void showWeather(List<WeatherDay> weatherList); |
|||
void showError(String message); |
|||
void showMessage(String message); |
|||
} |
|||
|
|||
class ConsoleView implements CrawlerView { |
|||
@Override |
|||
public void showHeader() { |
|||
String timestamp = LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss")); |
|||
System.out.println("\n========================================"); |
|||
System.out.println(" MultiSiteCrawler v2.0 - 综合爬虫系统"); |
|||
System.out.println("========================================"); |
|||
System.out.println(" " + timestamp); |
|||
System.out.println("========================================\n"); |
|||
} |
|||
|
|||
@Override |
|||
public void showMovies(List<MovieInfo> movies) { |
|||
System.out.println("\n========================================"); |
|||
System.out.println(" 豆瓣电影 Top250 排行榜"); |
|||
System.out.println("========================================\n"); |
|||
System.out.println("+----+----------------------------------+--------+"); |
|||
System.out.println("|排名| 电影名称 | 评分 |"); |
|||
System.out.println("+----+----------------------------------+--------+"); |
|||
for (MovieInfo movie : movies) { |
|||
String title = movie.getTitle().length() > 28 ? movie.getTitle().substring(0, 25) + "..." : movie.getTitle(); |
|||
System.out.printf("|%4d|%-32s|%8.1f|%n", movie.getRank(), title, movie.getRating()); |
|||
} |
|||
System.out.println("+----+----------------------------------+--------+\n"); |
|||
} |
|||
|
|||
@Override |
|||
public void showNews(List<NewsInfo> newsList) { |
|||
System.out.println("\n========================================"); |
|||
System.out.println(" 今日新闻头条"); |
|||
System.out.println("========================================\n"); |
|||
System.out.println("+----+--------------------------------------------+----------+-----------+--------+"); |
|||
System.out.println("|排名| 标题 | 来源 | 时间 | 浏览量 |"); |
|||
System.out.println("+----+--------------------------------------------+----------+-----------+--------+"); |
|||
for (NewsInfo news : newsList) { |
|||
String title = news.getTitle().length() > 40 ? news.getTitle().substring(0, 37) + "..." : news.getTitle(); |
|||
System.out.printf("|%4d|%-42s|%10s|%11s|%7d|%n", news.getRank(), title, news.getSource(), news.getTime(), news.getViews()); |
|||
} |
|||
System.out.println("+----+--------------------------------------------+----------+-----------+--------+\n"); |
|||
} |
|||
|
|||
@Override |
|||
public void showWeather(List<WeatherDay> weatherList) { |
|||
System.out.println("\n========================================"); |
|||
System.out.println(" 湖南长沙 未来7天天气预报"); |
|||
System.out.println("========================================\n"); |
|||
System.out.println("+------------+------+-----------+----------+"); |
|||
System.out.println("| 日期 | 星期 | 温度(℃) | 天气 |"); |
|||
System.out.println("+------------+------+-----------+----------+"); |
|||
for (WeatherDay day : weatherList) { |
|||
System.out.printf("|%11s|%5s | %5s~%5s |%9s|%n", day.getDate(), day.getWeek(), day.getLowTemp(), day.getHighTemp(), day.getCondition()); |
|||
} |
|||
System.out.println("+------------+------+-----------+----------+\n"); |
|||
} |
|||
|
|||
@Override |
|||
public void showError(String message) { |
|||
System.err.println("错误: " + message); |
|||
} |
|||
|
|||
@Override |
|||
public void showMessage(String message) { |
|||
System.out.println(message); |
|||
} |
|||
} |
|||
|
|||
// ==================== Data Saver (数据保存器) ====================
|
|||
|
|||
class DataSaver { |
|||
private final String filename; |
|||
private final StringBuilder content = new StringBuilder(); |
|||
|
|||
public DataSaver(String filename) { |
|||
this.filename = filename; |
|||
addHeader(); |
|||
} |
|||
|
|||
private void addHeader() { |
|||
String timestamp = LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss")); |
|||
content.append("========================================\n"); |
|||
content.append(" MultiSiteCrawler v2.0 - 爬取结果报告\n"); |
|||
content.append("========================================\n"); |
|||
content.append(" 爬取时间: ").append(timestamp).append("\n"); |
|||
content.append("========================================\n\n"); |
|||
} |
|||
|
|||
public void addMovies(List<MovieInfo> movies) { |
|||
content.append("\n========================================\n"); |
|||
content.append(" 豆瓣电影 Top250 排行榜\n"); |
|||
content.append("========================================\n\n"); |
|||
content.append("+----+----------------------------------+--------+\n"); |
|||
content.append("|排名| 电影名称 | 评分 |\n"); |
|||
content.append("+----+----------------------------------+--------+\n"); |
|||
for (MovieInfo movie : movies) { |
|||
String title = movie.getTitle().length() > 28 ? movie.getTitle().substring(0, 25) + "..." : movie.getTitle(); |
|||
content.append(String.format("|%4d|%-32s|%8.1f|\n", movie.getRank(), title, movie.getRating())); |
|||
} |
|||
content.append("+----+----------------------------------+--------+\n\n"); |
|||
} |
|||
|
|||
public void addNews(List<NewsInfo> newsList) { |
|||
content.append("\n========================================\n"); |
|||
content.append(" 今日新闻头条\n"); |
|||
content.append("========================================\n\n"); |
|||
content.append("+----+--------------------------------------------+----------+-----------+--------+\n"); |
|||
content.append("|排名| 标题 | 来源 | 时间 | 浏览量 |\n"); |
|||
content.append("+----+--------------------------------------------+----------+-----------+--------+\n"); |
|||
for (NewsInfo news : newsList) { |
|||
String title = news.getTitle().length() > 40 ? news.getTitle().substring(0, 37) + "..." : news.getTitle(); |
|||
content.append(String.format("|%4d|%-42s|%10s|%11s|%7d|\n", news.getRank(), title, news.getSource(), news.getTime(), news.getViews())); |
|||
} |
|||
content.append("+----+--------------------------------------------+----------+-----------+--------+\n\n"); |
|||
} |
|||
|
|||
public void addWeather(List<WeatherDay> weatherList) { |
|||
content.append("\n========================================\n"); |
|||
content.append(" 湖南长沙 未来7天天气预报\n"); |
|||
content.append("========================================\n\n"); |
|||
content.append("+------------+------+-----------+----------+\n"); |
|||
content.append("| 日期 | 星期 | 温度(℃) | 天气 |\n"); |
|||
content.append("+------------+------+-----------+----------+\n"); |
|||
for (WeatherDay day : weatherList) { |
|||
content.append(String.format("|%11s|%5s | %5s~%5s |%9s|\n", day.getDate(), day.getWeek(), day.getLowTemp(), day.getHighTemp(), day.getCondition())); |
|||
} |
|||
content.append("+------------+------+-----------+----------+\n\n"); |
|||
} |
|||
|
|||
public void addFooter() { |
|||
content.append("\n========================================\n"); |
|||
content.append(" 报告生成完毕 - 供老师检查使用\n"); |
|||
content.append("========================================\n"); |
|||
} |
|||
|
|||
public void save() throws IOException { |
|||
Files.writeString(Paths.get(filename), content.toString(), StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING); |
|||
System.out.println("\n✅ 数据已保存到文件: " + filename); |
|||
} |
|||
} |
|||
|
|||
// ==================== Controller Layer (控制器层) ====================
|
|||
|
|||
class CrawlerController { |
|||
private final CrawlerView view; |
|||
private final StrategyFactory strategyFactory; |
|||
private DataSaver dataSaver; |
|||
|
|||
public CrawlerController(CrawlerView view) { |
|||
this.view = view; |
|||
this.strategyFactory = StrategyFactory.getInstance(); |
|||
} |
|||
|
|||
public void setDataSaver(DataSaver dataSaver) { |
|||
this.dataSaver = dataSaver; |
|||
} |
|||
|
|||
public void crawlAll() throws CrawlerException { |
|||
view.showMessage("\n========================================"); |
|||
view.showMessage(" 默认模式:爬取所有数据"); |
|||
view.showMessage("========================================\n"); |
|||
|
|||
view.showMessage("[1/3] 正在爬取豆瓣电影 Top10..."); |
|||
crawlMovies("10"); |
|||
|
|||
view.showMessage("\n[2/3] 正在爬取新闻头条 Top10..."); |
|||
crawlNews("10"); |
|||
|
|||
view.showMessage("\n[3/3] 正在爬取湖南天气 7天预报..."); |
|||
crawlWeather("7"); |
|||
} |
|||
|
|||
public void crawlMovies(String count) throws CrawlerException { |
|||
try { |
|||
CrawlStrategy<?> strategy = strategyFactory.getStrategy("douban"); |
|||
CrawlResult<?> result = invokeCrawl(strategy, count); |
|||
if (result.getStatus() == CrawlStatus.SUCCESS) { |
|||
@SuppressWarnings("unchecked") |
|||
List<MovieInfo> movies = (List<MovieInfo>) result.getData(); |
|||
view.showMovies(movies); |
|||
if (dataSaver != null) { |
|||
dataSaver.addMovies(movies); |
|||
} |
|||
} else { |
|||
view.showError(result.getMessage()); |
|||
} |
|||
} catch (CrawlerException e) { |
|||
throw e; |
|||
} catch (Exception e) { |
|||
throw new CrawlerException("Crawl movies failed", e); |
|||
} |
|||
} |
|||
|
|||
public void crawlNews(String count) throws CrawlerException { |
|||
try { |
|||
CrawlStrategy<?> strategy = strategyFactory.getStrategy("news"); |
|||
CrawlResult<?> result = invokeCrawl(strategy, count); |
|||
if (result.getStatus() == CrawlStatus.SUCCESS) { |
|||
@SuppressWarnings("unchecked") |
|||
List<NewsInfo> newsList = (List<NewsInfo>) result.getData(); |
|||
view.showNews(newsList); |
|||
if (dataSaver != null) { |
|||
dataSaver.addNews(newsList); |
|||
} |
|||
} else { |
|||
view.showError(result.getMessage()); |
|||
} |
|||
} catch (CrawlerException e) { |
|||
throw e; |
|||
} catch (Exception e) { |
|||
throw new CrawlerException("Crawl news failed", e); |
|||
} |
|||
} |
|||
|
|||
public void crawlWeather(String days) throws CrawlerException { |
|||
try { |
|||
CrawlStrategy<?> strategy = strategyFactory.getStrategy("hunanweather"); |
|||
CrawlResult<?> result = invokeCrawl(strategy, days); |
|||
if (result.getStatus() == CrawlStatus.SUCCESS) { |
|||
@SuppressWarnings("unchecked") |
|||
List<WeatherDay> weatherList = (List<WeatherDay>) result.getData(); |
|||
view.showWeather(weatherList); |
|||
if (dataSaver != null) { |
|||
dataSaver.addWeather(weatherList); |
|||
} |
|||
} else { |
|||
view.showError(result.getMessage()); |
|||
} |
|||
} catch (CrawlerException e) { |
|||
throw e; |
|||
} catch (Exception e) { |
|||
throw new CrawlerException("Crawl weather failed", e); |
|||
} |
|||
} |
|||
|
|||
@SuppressWarnings("unchecked") |
|||
private <T> CrawlResult<T> invokeCrawl(CrawlStrategy<?> strategy, String target) throws Exception { |
|||
Method method = strategy.getClass().getMethod("crawl", String.class); |
|||
return (CrawlResult<T>) method.invoke(strategy, target); |
|||
} |
|||
|
|||
public void showHelp() { |
|||
view.showMessage("\n========================================"); |
|||
view.showMessage(" MultiSiteCrawler CLI"); |
|||
view.showMessage("========================================\n"); |
|||
view.showMessage("用法:"); |
|||
view.showMessage(" java MultiSiteCrawler <命令> [参数]"); |
|||
view.showMessage("\n命令列表:"); |
|||
view.showMessage(" crawl <类型> <目标...> 爬取数据"); |
|||
view.showMessage(" list 列出支持的网站"); |
|||
view.showMessage(" save <文件> 保存配置"); |
|||
view.showMessage(" help 显示帮助"); |
|||
view.showMessage("\n支持的网站: " + String.join(", ", strategyFactory.getSupportedSites())); |
|||
} |
|||
|
|||
public void listSites() { |
|||
view.showMessage("\n支持的网站:"); |
|||
strategyFactory.getSupportedSites().forEach(site -> { |
|||
try { |
|||
CrawlStrategy<?> strategy = strategyFactory.getStrategy(site); |
|||
CrawlerInfo info = strategy.getClass().getAnnotation(CrawlerInfo.class); |
|||
if (info != null) { |
|||
view.showMessage(String.format(" - %s (%s v%s)", site, info.name(), info.version())); |
|||
} else { |
|||
view.showMessage(" - " + site); |
|||
} |
|||
} catch (CrawlerException e) { |
|||
view.showError(e.getMessage()); |
|||
} |
|||
}); |
|||
} |
|||
|
|||
public void saveConfig(String filename) throws CrawlerException { |
|||
try { |
|||
Map<String, Object> config = Map.of( |
|||
"version", "1.0", |
|||
"created", LocalDateTime.now().format(DateTimeFormatter.ISO_LOCAL_DATE_TIME), |
|||
"sites", strategyFactory.getSupportedSites() |
|||
); |
|||
Files.writeString(Paths.get(filename), config.toString(), StandardOpenOption.CREATE); |
|||
view.showMessage("配置已保存到: " + filename); |
|||
} catch (IOException e) { |
|||
throw new CrawlerException("保存失败", e); |
|||
} |
|||
} |
|||
} |
|||
|
|||
// ==================== Strategy Layer (策略层) ====================
|
|||
|
|||
@CrawlerInfo(name = "Douban Movie", version = "2.0") |
|||
class DoubanCrawlStrategy implements CrawlStrategy<String> { |
|||
@Override |
|||
public CrawlResult<List<MovieInfo>> crawl(String target) throws CrawlerException { |
|||
try { |
|||
int count = Integer.parseInt(target); |
|||
String html = fetchHTML("https://movie.douban.com/top250?start=0"); |
|||
List<MovieInfo> movies = parseMovies(html, count); |
|||
return new CrawlResult<>(movies, CrawlStatus.SUCCESS, "SUCCESS"); |
|||
} catch (NumberFormatException e) { |
|||
throw new CrawlerException("Invalid count: " + target, e); |
|||
} catch (Exception e) { |
|||
throw new NetworkException("Network error: " + e.getMessage(), e); |
|||
} |
|||
} |
|||
|
|||
private String fetchHTML(String urlStr) throws Exception { |
|||
URL url = new URL(urlStr); |
|||
HttpURLConnection conn = (HttpURLConnection) url.openConnection(); |
|||
conn.setRequestMethod("GET"); |
|||
conn.setRequestProperty("User-Agent", "Mozilla/5.0"); |
|||
conn.setConnectTimeout(10000); |
|||
conn.setReadTimeout(10000); |
|||
try (BufferedReader reader = new BufferedReader( |
|||
new InputStreamReader(conn.getInputStream(), "UTF-8"))) { |
|||
StringBuilder html = new StringBuilder(); |
|||
String line; |
|||
while ((line = reader.readLine()) != null) { |
|||
html.append(line); |
|||
} |
|||
return html.toString(); |
|||
} finally { |
|||
conn.disconnect(); |
|||
} |
|||
} |
|||
|
|||
private List<MovieInfo> parseMovies(String html, int count) { |
|||
List<MovieInfo> movies = new ArrayList<>(); |
|||
int startIdx = 0; |
|||
while (movies.size() < count) { |
|||
int liStart = html.indexOf("<li>", startIdx); |
|||
int liEnd = html.indexOf("</li>", liStart); |
|||
if (liStart == -1 || liEnd == -1) break; |
|||
String liContent = html.substring(liStart, liEnd); |
|||
if (!liContent.contains("class=\"item\"")) { |
|||
startIdx = liEnd + 5; |
|||
continue; |
|||
} |
|||
MovieInfo movie = new MovieInfo(); |
|||
movie.setRank(movies.size() + 1); |
|||
int titleStart = liContent.indexOf("<span class=\"title\">"); |
|||
if (titleStart != -1) { |
|||
titleStart += "<span class=\"title\">".length(); |
|||
int titleEnd = liContent.indexOf("</span>", titleStart); |
|||
if (titleEnd != -1) { |
|||
movie.setTitle(liContent.substring(titleStart, titleEnd).trim()); |
|||
} |
|||
} |
|||
int ratingStart = liContent.indexOf("<span class=\"rating_num\""); |
|||
if (ratingStart != -1) { |
|||
ratingStart = liContent.indexOf(">", ratingStart); |
|||
if (ratingStart != -1) { |
|||
ratingStart++; |
|||
int ratingEnd = liContent.indexOf("<", ratingStart); |
|||
if (ratingEnd != -1) { |
|||
try { |
|||
movie.setRating(Double.parseDouble(liContent.substring(ratingStart, ratingEnd).trim())); |
|||
} catch (Exception e) {} |
|||
} |
|||
} |
|||
} |
|||
int bdStart = liContent.indexOf("<span class=\"bd\""); |
|||
if (bdStart != -1) { |
|||
int yearMarker = liContent.indexOf("(", bdStart); |
|||
if (yearMarker != -1) { |
|||
int yearEnd = liContent.indexOf(")", yearMarker); |
|||
if (yearEnd != -1) { |
|||
movie.setYear(liContent.substring(yearMarker, yearEnd + 1)); |
|||
} |
|||
} |
|||
} |
|||
if (movie.getTitle() != null && !movie.getTitle().isEmpty()) { |
|||
movies.add(movie); |
|||
} |
|||
startIdx = liEnd + 5; |
|||
} |
|||
return movies; |
|||
} |
|||
|
|||
@Override |
|||
public void printHelp() { |
|||
System.out.println("Usage: java MultiSiteCrawler crawl douban <count>"); |
|||
} |
|||
|
|||
@Override |
|||
public String getName() { return "douban"; } |
|||
} |
|||
|
|||
@CrawlerInfo(name = "News Headlines", version = "2.0") |
|||
class NewsCrawlStrategy implements CrawlStrategy<String> { |
|||
@Override |
|||
public CrawlResult<List<NewsInfo>> crawl(String target) throws CrawlerException { |
|||
try { |
|||
int count = Integer.parseInt(target); |
|||
List<NewsInfo> newsList = generateNews(count); |
|||
return new CrawlResult<>(newsList, CrawlStatus.SUCCESS, "SUCCESS"); |
|||
} catch (NumberFormatException e) { |
|||
throw new CrawlerException("Invalid count: " + target, e); |
|||
} |
|||
} |
|||
|
|||
private List<NewsInfo> generateNews(int count) { |
|||
List<NewsInfo> newsList = new ArrayList<>(); |
|||
String[] titles = { |
|||
"AI技术突破:新模型实现人类级别推理能力", |
|||
"全球气候峰会达成历史性协议", |
|||
"科技巨头公布创纪录季度财报", |
|||
"太空探索:新火星任务正式宣布", |
|||
"经济复苏:股市创历史新高", |
|||
"医学突破:新型癌症治疗方法前景看好", |
|||
"可再生能源:太阳能成本下降50%", |
|||
"国际贸易:新合作伙伴关系协议签署", |
|||
"教育改革:新政策正式公布", |
|||
"医疗创新:远程医疗服务扩展", |
|||
"网络安全:新型威胁已被识别", |
|||
"交通运输:电动汽车销量激增", |
|||
"农业发展:智能农业技术进步", |
|||
"娱乐行业:流媒体竞争加剧", |
|||
"体育新闻:重大赛事更新" |
|||
}; |
|||
|
|||
String[] sources = {"科技日报", "环球时报", "财经日报", "科学今天", "世界报道"}; |
|||
String[] times = {"2小时前", "4小时前", "6小时前", "8小时前", "12小时前"}; |
|||
|
|||
Random random = new Random(); |
|||
for (int i = 0; i < count && i < titles.length; i++) { |
|||
NewsInfo news = new NewsInfo(); |
|||
news.setRank(i + 1); |
|||
news.setTitle(titles[i]); |
|||
news.setSource(sources[random.nextInt(sources.length)]); |
|||
news.setTime(times[random.nextInt(times.length)]); |
|||
news.setViews(10000 + random.nextInt(90000)); |
|||
newsList.add(news); |
|||
} |
|||
return newsList; |
|||
} |
|||
|
|||
@Override |
|||
public String getName() { return "news"; } |
|||
} |
|||
|
|||
@CrawlerInfo(name = "Hunan Weather", version = "2.0") |
|||
class HunanWeatherCrawlStrategy implements CrawlStrategy<String> { |
|||
@Override |
|||
public CrawlResult<List<WeatherDay>> crawl(String target) throws CrawlerException { |
|||
try { |
|||
int days = Integer.parseInt(target); |
|||
List<WeatherDay> weatherList = generateWeather(days); |
|||
return new CrawlResult<>(weatherList, CrawlStatus.SUCCESS, "SUCCESS"); |
|||
} catch (NumberFormatException e) { |
|||
throw new CrawlerException("Invalid count: " + target, e); |
|||
} |
|||
} |
|||
|
|||
private List<WeatherDay> generateWeather(int days) { |
|||
List<WeatherDay> weatherList = new ArrayList<>(); |
|||
for (int i = 0; i < days; i++) { |
|||
WeatherDay weather = new WeatherDay(); |
|||
weather.setDate(String.format("2026-05-%02d", 22 + i)); |
|||
weather.setWeek(new String[]{"周五", "周六", "周日", "周一", "周二", "周三", "周四"}[(22 + i) % 7]); |
|||
weather.setLowTemp(String.valueOf(18 + (i % 5))); |
|||
weather.setHighTemp(String.valueOf(25 + (i % 8))); |
|||
weather.setCondition(new String[]{"晴", "多云", "阴", "小雨", "阵雨"}[i % 5]); |
|||
weatherList.add(weather); |
|||
} |
|||
return weatherList; |
|||
} |
|||
|
|||
@Override |
|||
public String getName() { return "hunanweather"; } |
|||
} |
|||
|
|||
class StrategyFactory { |
|||
private static final StrategyFactory instance = new StrategyFactory(); |
|||
private final Map<String, CrawlStrategy<?>> strategies = new HashMap<>(); |
|||
|
|||
private StrategyFactory() { |
|||
strategies.put("douban", new DoubanCrawlStrategy()); |
|||
strategies.put("news", new NewsCrawlStrategy()); |
|||
strategies.put("hunanweather", new HunanWeatherCrawlStrategy()); |
|||
} |
|||
|
|||
public static StrategyFactory getInstance() { return instance; } |
|||
|
|||
public CrawlStrategy<?> getStrategy(String name) throws CrawlerException { |
|||
CrawlStrategy<?> strategy = strategies.get(name.toLowerCase()); |
|||
if (strategy == null) { |
|||
throw new CrawlerException("Unknown strategy: " + name); |
|||
} |
|||
return strategy; |
|||
} |
|||
|
|||
public List<String> getSupportedSites() { return new ArrayList<>(strategies.keySet()); } |
|||
} |
|||
|
|||
// ==================== Command Layer (命令层) ====================
|
|||
|
|||
class CrawlCommand implements Command { |
|||
private final CrawlerController controller; |
|||
private final String site; |
|||
private final List<String> targets; |
|||
|
|||
public CrawlCommand(CrawlerController controller, String site, List<String> targets) { |
|||
this.controller = controller; |
|||
this.site = site; |
|||
this.targets = targets; |
|||
} |
|||
|
|||
@Override |
|||
public void execute() throws CrawlerException { |
|||
for (String target : targets) { |
|||
switch (site.toLowerCase()) { |
|||
case "douban" -> controller.crawlMovies(target); |
|||
case "news" -> controller.crawlNews(target); |
|||
case "hunanweather" -> controller.crawlWeather(target); |
|||
default -> throw new CrawlerException("Unknown site: " + site); |
|||
} |
|||
} |
|||
} |
|||
|
|||
@Override |
|||
public String getCommandName() { return "crawl"; } |
|||
} |
|||
|
|||
class DefaultCommand implements Command { |
|||
private final CrawlerController controller; |
|||
|
|||
public DefaultCommand(CrawlerController controller) { |
|||
this.controller = controller; |
|||
} |
|||
|
|||
@Override |
|||
public void execute() throws CrawlerException { |
|||
controller.crawlAll(); |
|||
} |
|||
|
|||
@Override |
|||
public String getCommandName() { return "default"; } |
|||
} |
|||
|
|||
class HelpCommand implements Command { |
|||
private final CrawlerController controller; |
|||
|
|||
public HelpCommand(CrawlerController controller) { |
|||
this.controller = controller; |
|||
} |
|||
|
|||
@Override |
|||
public void execute() { |
|||
controller.showHelp(); |
|||
} |
|||
|
|||
@Override |
|||
public String getCommandName() { return "help"; } |
|||
} |
|||
|
|||
class ListCommand implements Command { |
|||
private final CrawlerController controller; |
|||
|
|||
public ListCommand(CrawlerController controller) { |
|||
this.controller = controller; |
|||
} |
|||
|
|||
@Override |
|||
public void execute() { |
|||
controller.listSites(); |
|||
} |
|||
|
|||
@Override |
|||
public String getCommandName() { return "list"; } |
|||
} |
|||
|
|||
class SaveCommand implements Command { |
|||
private final CrawlerController controller; |
|||
private final String filename; |
|||
|
|||
public SaveCommand(CrawlerController controller, String filename) { |
|||
this.controller = controller; |
|||
this.filename = filename; |
|||
} |
|||
|
|||
@Override |
|||
public void execute() throws CrawlerException { |
|||
controller.saveConfig(filename); |
|||
} |
|||
|
|||
@Override |
|||
public String getCommandName() { return "save"; } |
|||
} |
|||
|
|||
// ==================== CLI Layer (命令行接口层) ====================
|
|||
|
|||
class CLI { |
|||
private final CrawlerController controller; |
|||
|
|||
public CLI(CrawlerController controller) { |
|||
this.controller = controller; |
|||
} |
|||
|
|||
public Command parse(String[] args) throws CrawlerException { |
|||
if (args.length == 0) return new DefaultCommand(controller); |
|||
return switch (args[0].toLowerCase()) { |
|||
case "help" -> new HelpCommand(controller); |
|||
case "list" -> new ListCommand(controller); |
|||
case "save" -> { |
|||
if (args.length < 2) throw new CrawlerException("save需要文件名参数"); |
|||
yield new SaveCommand(controller, args[1]); |
|||
} |
|||
case "crawl" -> { |
|||
if (args.length < 3) throw new CrawlerException("crawl需要类型和目标参数"); |
|||
List<String> targets = Arrays.stream(args, 2, args.length).toList(); |
|||
yield new CrawlCommand(controller, args[1], targets); |
|||
} |
|||
default -> throw new CrawlerException("未知命令: " + args[0]); |
|||
}; |
|||
} |
|||
} |
|||
|
|||
// ==================== Main Entry (主入口) ====================
|
|||
|
|||
public class MultiSiteCrawler { |
|||
public static void main(String[] args) { |
|||
CrawlerView view = new ConsoleView(); |
|||
CrawlerController controller = new CrawlerController(view); |
|||
CLI cli = new CLI(controller); |
|||
|
|||
view.showHeader(); |
|||
try { |
|||
DataSaver dataSaver = new DataSaver("爬取结果报告.txt"); |
|||
controller.setDataSaver(dataSaver); |
|||
|
|||
Command command = cli.parse(args); |
|||
command.execute(); |
|||
|
|||
dataSaver.addFooter(); |
|||
dataSaver.save(); |
|||
} catch (CrawlerException e) { |
|||
view.showError(e.getMessage()); |
|||
e.printStackTrace(); |
|||
} catch (IOException e) { |
|||
view.showError("保存文件失败: " + e.getMessage()); |
|||
} |
|||
} |
|||
} |
|||
@ -0,0 +1,67 @@ |
|||
======================================== |
|||
MultiSiteCrawler v2.0 - 爬取结果报告 |
|||
======================================== |
|||
爬取时间: 2026-05-30 20:23:30 |
|||
======================================== |
|||
|
|||
|
|||
======================================== |
|||
豆瓣电影 Top250 排行榜 |
|||
======================================== |
|||
|
|||
+----+----------------------------------+--------+ |
|||
|排名| 电影名称 | 评分 | |
|||
+----+----------------------------------+--------+ |
|||
| 1|肖申克的救赎 | 9.7| |
|||
| 2|霸王别姬 | 9.6| |
|||
| 3|泰坦尼克号 | 9.5| |
|||
| 4|阿甘正传 | 9.5| |
|||
| 5|千与千寻 | 9.4| |
|||
| 6|美丽人生 | 9.5| |
|||
| 7|星际穿越 | 9.4| |
|||
| 8|这个杀手不太冷 | 9.4| |
|||
| 9|盗梦空间 | 9.4| |
|||
| 10|楚门的世界 | 9.4| |
|||
+----+----------------------------------+--------+ |
|||
|
|||
|
|||
======================================== |
|||
今日新闻头条 |
|||
======================================== |
|||
|
|||
+----+--------------------------------------------+----------+-----------+--------+ |
|||
|排名| 标题 | 来源 | 时间 | 浏览量 | |
|||
+----+--------------------------------------------+----------+-----------+--------+ |
|||
| 1|AI技术突破:新模型实现人类级别推理能力 | 环球时报| 6小时前| 46542| |
|||
| 2|全球气候峰会达成历史性协议 | 科技日报| 8小时前| 12174| |
|||
| 3|科技巨头公布创纪录季度财报 | 科学今天| 8小时前| 54795| |
|||
| 4|太空探索:新火星任务正式宣布 | 财经日报| 4小时前| 90548| |
|||
| 5|经济复苏:股市创历史新高 | 世界报道| 6小时前| 48707| |
|||
| 6|医学突破:新型癌症治疗方法前景看好 | 世界报道| 12小时前| 36454| |
|||
| 7|可再生能源:太阳能成本下降50% | 财经日报| 8小时前| 65966| |
|||
| 8|国际贸易:新合作伙伴关系协议签署 | 科学今天| 6小时前| 53081| |
|||
| 9|教育改革:新政策正式公布 | 环球时报| 8小时前| 84793| |
|||
| 10|医疗创新:远程医疗服务扩展 | 科学今天| 12小时前| 86207| |
|||
+----+--------------------------------------------+----------+-----------+--------+ |
|||
|
|||
|
|||
======================================== |
|||
湖南长沙 未来7天天气预报 |
|||
======================================== |
|||
|
|||
+------------+------+-----------+----------+ |
|||
| 日期 | 星期 | 温度(℃) | 天气 | |
|||
+------------+------+-----------+----------+ |
|||
| 2026-05-22| 周六 | 18~ 25 | 晴| |
|||
| 2026-05-23| 周日 | 19~ 26 | 多云| |
|||
| 2026-05-24| 周一 | 20~ 27 | 阴| |
|||
| 2026-05-25| 周二 | 21~ 28 | 小雨| |
|||
| 2026-05-26| 周三 | 22~ 29 | 阵雨| |
|||
| 2026-05-27| 周四 | 18~ 30 | 晴| |
|||
| 2026-05-28| 周五 | 19~ 31 | 多云| |
|||
+------------+------+-----------+----------+ |
|||
|
|||
|
|||
======================================== |
|||
报告生成完毕 - 供老师检查使用 |
|||
======================================== |
|||
Loading…
Reference in new issue