3 changed files with 849 additions and 0 deletions
Binary file not shown.
@ -0,0 +1,782 @@ |
|||||
|
import java.io.*; |
||||
|
import java.net.*; |
||||
|
import java.nio.file.*; |
||||
|
import java.nio.charset.*; |
||||
|
import java.time.*; |
||||
|
import java.time.format.*; |
||||
|
import java.util.*; |
||||
|
import java.util.concurrent.*; |
||||
|
import java.util.concurrent.atomic.*; |
||||
|
import java.util.stream.*; |
||||
|
import java.lang.reflect.*; |
||||
|
import java.lang.annotation.*; |
||||
|
|
||||
|
enum CrawlStatus { |
||||
|
SUCCESS, FAILURE, PENDING |
||||
|
} |
||||
|
|
||||
|
@Retention(RetentionPolicy.RUNTIME) |
||||
|
@Target(ElementType.TYPE) |
||||
|
@interface CrawlerInfo { |
||||
|
String name(); |
||||
|
String version() default "1.0"; |
||||
|
} |
||||
|
|
||||
|
class CrawlerException extends Exception { |
||||
|
private final LocalDateTime timestamp; |
||||
|
public CrawlerException(String message) { super(message); this.timestamp = LocalDateTime.now(); } |
||||
|
public CrawlerException(String message, Throwable cause) { super(message, cause); this.timestamp = LocalDateTime.now(); } |
||||
|
public LocalDateTime getTimestamp() { return timestamp; } |
||||
|
} |
||||
|
|
||||
|
class NetworkException extends CrawlerException { |
||||
|
public NetworkException(String message) { super(message); } |
||||
|
public NetworkException(String message, Throwable cause) { super(message, cause); } |
||||
|
} |
||||
|
|
||||
|
class ParseException extends CrawlerException { |
||||
|
public ParseException(String message) { super(message); } |
||||
|
public ParseException(String message, Throwable cause) { super(message, cause); } |
||||
|
} |
||||
|
|
||||
|
interface Command { |
||||
|
void execute() throws CrawlerException; |
||||
|
default String getCommandName() { return "command"; } |
||||
|
} |
||||
|
|
||||
|
interface CrawlStrategy<T> { |
||||
|
CrawlResult<?> crawl(T target) throws CrawlerException; |
||||
|
default String getName() { return "strategy"; } |
||||
|
default void printHelp() {} |
||||
|
} |
||||
|
|
||||
|
class CrawlResult<T> { |
||||
|
private final T data; |
||||
|
private final CrawlStatus status; |
||||
|
private final String message; |
||||
|
private final long timestamp; |
||||
|
|
||||
|
public CrawlResult(T data, CrawlStatus status, String message) { |
||||
|
this.data = data; |
||||
|
this.status = status; |
||||
|
this.message = message; |
||||
|
this.timestamp = System.currentTimeMillis(); |
||||
|
} |
||||
|
|
||||
|
public T getData() { return data; } |
||||
|
public CrawlStatus getStatus() { return status; } |
||||
|
public String getMessage() { return message; } |
||||
|
public long getTimestamp() { return timestamp; } |
||||
|
} |
||||
|
|
||||
|
// ==================== Model Layer (模型层) ====================
|
||||
|
|
||||
|
class MovieInfo { |
||||
|
private int rank; |
||||
|
private String title; |
||||
|
private double rating; |
||||
|
private String year; |
||||
|
private String quote; |
||||
|
|
||||
|
public int getRank() { return rank; } |
||||
|
public void setRank(int rank) { this.rank = rank; } |
||||
|
public String getTitle() { return title; } |
||||
|
public void setTitle(String title) { this.title = title; } |
||||
|
public double getRating() { return rating; } |
||||
|
public void setRating(double rating) { this.rating = rating; } |
||||
|
public String getYear() { return year; } |
||||
|
public void setYear(String year) { this.year = year; } |
||||
|
public String getQuote() { return quote; } |
||||
|
public void setQuote(String quote) { this.quote = quote; } |
||||
|
} |
||||
|
|
||||
|
class NewsInfo { |
||||
|
private int rank; |
||||
|
private String title; |
||||
|
private String source; |
||||
|
private String time; |
||||
|
private int views; |
||||
|
|
||||
|
public int getRank() { return rank; } |
||||
|
public void setRank(int rank) { this.rank = rank; } |
||||
|
public String getTitle() { return title; } |
||||
|
public void setTitle(String title) { this.title = title; } |
||||
|
public String getSource() { return source; } |
||||
|
public void setSource(String source) { this.source = source; } |
||||
|
public String getTime() { return time; } |
||||
|
public void setTime(String time) { this.time = time; } |
||||
|
public int getViews() { return views; } |
||||
|
public void setViews(int views) { this.views = views; } |
||||
|
} |
||||
|
|
||||
|
class WeatherDay { |
||||
|
private String date; |
||||
|
private String week; |
||||
|
private String lowTemp; |
||||
|
private String highTemp; |
||||
|
private String condition; |
||||
|
|
||||
|
public String getDate() { return date; } |
||||
|
public void setDate(String date) { this.date = date; } |
||||
|
public String getWeek() { return week; } |
||||
|
public void setWeek(String week) { this.week = week; } |
||||
|
public String getLowTemp() { return lowTemp; } |
||||
|
public void setLowTemp(String lowTemp) { this.lowTemp = lowTemp; } |
||||
|
public String getHighTemp() { return highTemp; } |
||||
|
public void setHighTemp(String highTemp) { this.highTemp = highTemp; } |
||||
|
public String getCondition() { return condition; } |
||||
|
public void setCondition(String condition) { this.condition = condition; } |
||||
|
} |
||||
|
|
||||
|
// ==================== View Layer (视图层) ====================
|
||||
|
|
||||
|
interface CrawlerView { |
||||
|
void showHeader(); |
||||
|
void showMovies(List<MovieInfo> movies); |
||||
|
void showNews(List<NewsInfo> newsList); |
||||
|
void showWeather(List<WeatherDay> weatherList); |
||||
|
void showError(String message); |
||||
|
void showMessage(String message); |
||||
|
} |
||||
|
|
||||
|
class ConsoleView implements CrawlerView { |
||||
|
@Override |
||||
|
public void showHeader() { |
||||
|
String timestamp = LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss")); |
||||
|
System.out.println("\n========================================"); |
||||
|
System.out.println(" MultiSiteCrawler v2.0 - 综合爬虫系统"); |
||||
|
System.out.println("========================================"); |
||||
|
System.out.println(" " + timestamp); |
||||
|
System.out.println("========================================\n"); |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public void showMovies(List<MovieInfo> movies) { |
||||
|
System.out.println("\n========================================"); |
||||
|
System.out.println(" 豆瓣电影 Top250 排行榜"); |
||||
|
System.out.println("========================================\n"); |
||||
|
System.out.println("+----+----------------------------------+--------+"); |
||||
|
System.out.println("|排名| 电影名称 | 评分 |"); |
||||
|
System.out.println("+----+----------------------------------+--------+"); |
||||
|
for (MovieInfo movie : movies) { |
||||
|
String title = movie.getTitle().length() > 28 ? movie.getTitle().substring(0, 25) + "..." : movie.getTitle(); |
||||
|
System.out.printf("|%4d|%-32s|%8.1f|%n", movie.getRank(), title, movie.getRating()); |
||||
|
} |
||||
|
System.out.println("+----+----------------------------------+--------+\n"); |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public void showNews(List<NewsInfo> newsList) { |
||||
|
System.out.println("\n========================================"); |
||||
|
System.out.println(" 今日新闻头条"); |
||||
|
System.out.println("========================================\n"); |
||||
|
System.out.println("+----+--------------------------------------------+----------+-----------+--------+"); |
||||
|
System.out.println("|排名| 标题 | 来源 | 时间 | 浏览量 |"); |
||||
|
System.out.println("+----+--------------------------------------------+----------+-----------+--------+"); |
||||
|
for (NewsInfo news : newsList) { |
||||
|
String title = news.getTitle().length() > 40 ? news.getTitle().substring(0, 37) + "..." : news.getTitle(); |
||||
|
System.out.printf("|%4d|%-42s|%10s|%11s|%7d|%n", news.getRank(), title, news.getSource(), news.getTime(), news.getViews()); |
||||
|
} |
||||
|
System.out.println("+----+--------------------------------------------+----------+-----------+--------+\n"); |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public void showWeather(List<WeatherDay> weatherList) { |
||||
|
System.out.println("\n========================================"); |
||||
|
System.out.println(" 湖南长沙 未来7天天气预报"); |
||||
|
System.out.println("========================================\n"); |
||||
|
System.out.println("+------------+------+-----------+----------+"); |
||||
|
System.out.println("| 日期 | 星期 | 温度(℃) | 天气 |"); |
||||
|
System.out.println("+------------+------+-----------+----------+"); |
||||
|
for (WeatherDay day : weatherList) { |
||||
|
System.out.printf("|%11s|%5s | %5s~%5s |%9s|%n", day.getDate(), day.getWeek(), day.getLowTemp(), day.getHighTemp(), day.getCondition()); |
||||
|
} |
||||
|
System.out.println("+------------+------+-----------+----------+\n"); |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public void showError(String message) { |
||||
|
System.err.println("错误: " + message); |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public void showMessage(String message) { |
||||
|
System.out.println(message); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
// ==================== Data Saver (数据保存器) ====================
|
||||
|
|
||||
|
class DataSaver { |
||||
|
private final String filename; |
||||
|
private final StringBuilder content = new StringBuilder(); |
||||
|
|
||||
|
public DataSaver(String filename) { |
||||
|
this.filename = filename; |
||||
|
addHeader(); |
||||
|
} |
||||
|
|
||||
|
private void addHeader() { |
||||
|
String timestamp = LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss")); |
||||
|
content.append("========================================\n"); |
||||
|
content.append(" MultiSiteCrawler v2.0 - 爬取结果报告\n"); |
||||
|
content.append("========================================\n"); |
||||
|
content.append(" 爬取时间: ").append(timestamp).append("\n"); |
||||
|
content.append("========================================\n\n"); |
||||
|
} |
||||
|
|
||||
|
public void addMovies(List<MovieInfo> movies) { |
||||
|
content.append("\n========================================\n"); |
||||
|
content.append(" 豆瓣电影 Top250 排行榜\n"); |
||||
|
content.append("========================================\n\n"); |
||||
|
content.append("+----+----------------------------------+--------+\n"); |
||||
|
content.append("|排名| 电影名称 | 评分 |\n"); |
||||
|
content.append("+----+----------------------------------+--------+\n"); |
||||
|
for (MovieInfo movie : movies) { |
||||
|
String title = movie.getTitle().length() > 28 ? movie.getTitle().substring(0, 25) + "..." : movie.getTitle(); |
||||
|
content.append(String.format("|%4d|%-32s|%8.1f|\n", movie.getRank(), title, movie.getRating())); |
||||
|
} |
||||
|
content.append("+----+----------------------------------+--------+\n\n"); |
||||
|
} |
||||
|
|
||||
|
public void addNews(List<NewsInfo> newsList) { |
||||
|
content.append("\n========================================\n"); |
||||
|
content.append(" 今日新闻头条\n"); |
||||
|
content.append("========================================\n\n"); |
||||
|
content.append("+----+--------------------------------------------+----------+-----------+--------+\n"); |
||||
|
content.append("|排名| 标题 | 来源 | 时间 | 浏览量 |\n"); |
||||
|
content.append("+----+--------------------------------------------+----------+-----------+--------+\n"); |
||||
|
for (NewsInfo news : newsList) { |
||||
|
String title = news.getTitle().length() > 40 ? news.getTitle().substring(0, 37) + "..." : news.getTitle(); |
||||
|
content.append(String.format("|%4d|%-42s|%10s|%11s|%7d|\n", news.getRank(), title, news.getSource(), news.getTime(), news.getViews())); |
||||
|
} |
||||
|
content.append("+----+--------------------------------------------+----------+-----------+--------+\n\n"); |
||||
|
} |
||||
|
|
||||
|
public void addWeather(List<WeatherDay> weatherList) { |
||||
|
content.append("\n========================================\n"); |
||||
|
content.append(" 湖南长沙 未来7天天气预报\n"); |
||||
|
content.append("========================================\n\n"); |
||||
|
content.append("+------------+------+-----------+----------+\n"); |
||||
|
content.append("| 日期 | 星期 | 温度(℃) | 天气 |\n"); |
||||
|
content.append("+------------+------+-----------+----------+\n"); |
||||
|
for (WeatherDay day : weatherList) { |
||||
|
content.append(String.format("|%11s|%5s | %5s~%5s |%9s|\n", day.getDate(), day.getWeek(), day.getLowTemp(), day.getHighTemp(), day.getCondition())); |
||||
|
} |
||||
|
content.append("+------------+------+-----------+----------+\n\n"); |
||||
|
} |
||||
|
|
||||
|
public void addFooter() { |
||||
|
content.append("\n========================================\n"); |
||||
|
content.append(" 报告生成完毕 - 供老师检查使用\n"); |
||||
|
content.append("========================================\n"); |
||||
|
} |
||||
|
|
||||
|
public void save() throws IOException { |
||||
|
Files.writeString(Paths.get(filename), content.toString(), StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING); |
||||
|
System.out.println("\n✅ 数据已保存到文件: " + filename); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
// ==================== Controller Layer (控制器层) ====================
|
||||
|
|
||||
|
class CrawlerController { |
||||
|
private final CrawlerView view; |
||||
|
private final StrategyFactory strategyFactory; |
||||
|
private DataSaver dataSaver; |
||||
|
|
||||
|
public CrawlerController(CrawlerView view) { |
||||
|
this.view = view; |
||||
|
this.strategyFactory = StrategyFactory.getInstance(); |
||||
|
} |
||||
|
|
||||
|
public void setDataSaver(DataSaver dataSaver) { |
||||
|
this.dataSaver = dataSaver; |
||||
|
} |
||||
|
|
||||
|
public void crawlAll() throws CrawlerException { |
||||
|
view.showMessage("\n========================================"); |
||||
|
view.showMessage(" 默认模式:爬取所有数据"); |
||||
|
view.showMessage("========================================\n"); |
||||
|
|
||||
|
view.showMessage("[1/3] 正在爬取豆瓣电影 Top10..."); |
||||
|
crawlMovies("10"); |
||||
|
|
||||
|
view.showMessage("\n[2/3] 正在爬取新闻头条 Top10..."); |
||||
|
crawlNews("10"); |
||||
|
|
||||
|
view.showMessage("\n[3/3] 正在爬取湖南天气 7天预报..."); |
||||
|
crawlWeather("7"); |
||||
|
} |
||||
|
|
||||
|
public void crawlMovies(String count) throws CrawlerException { |
||||
|
try { |
||||
|
CrawlStrategy<?> strategy = strategyFactory.getStrategy("douban"); |
||||
|
CrawlResult<?> result = invokeCrawl(strategy, count); |
||||
|
if (result.getStatus() == CrawlStatus.SUCCESS) { |
||||
|
@SuppressWarnings("unchecked") |
||||
|
List<MovieInfo> movies = (List<MovieInfo>) result.getData(); |
||||
|
view.showMovies(movies); |
||||
|
if (dataSaver != null) { |
||||
|
dataSaver.addMovies(movies); |
||||
|
} |
||||
|
} else { |
||||
|
view.showError(result.getMessage()); |
||||
|
} |
||||
|
} catch (CrawlerException e) { |
||||
|
throw e; |
||||
|
} catch (Exception e) { |
||||
|
throw new CrawlerException("Crawl movies failed", e); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
public void crawlNews(String count) throws CrawlerException { |
||||
|
try { |
||||
|
CrawlStrategy<?> strategy = strategyFactory.getStrategy("news"); |
||||
|
CrawlResult<?> result = invokeCrawl(strategy, count); |
||||
|
if (result.getStatus() == CrawlStatus.SUCCESS) { |
||||
|
@SuppressWarnings("unchecked") |
||||
|
List<NewsInfo> newsList = (List<NewsInfo>) result.getData(); |
||||
|
view.showNews(newsList); |
||||
|
if (dataSaver != null) { |
||||
|
dataSaver.addNews(newsList); |
||||
|
} |
||||
|
} else { |
||||
|
view.showError(result.getMessage()); |
||||
|
} |
||||
|
} catch (CrawlerException e) { |
||||
|
throw e; |
||||
|
} catch (Exception e) { |
||||
|
throw new CrawlerException("Crawl news failed", e); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
public void crawlWeather(String days) throws CrawlerException { |
||||
|
try { |
||||
|
CrawlStrategy<?> strategy = strategyFactory.getStrategy("hunanweather"); |
||||
|
CrawlResult<?> result = invokeCrawl(strategy, days); |
||||
|
if (result.getStatus() == CrawlStatus.SUCCESS) { |
||||
|
@SuppressWarnings("unchecked") |
||||
|
List<WeatherDay> weatherList = (List<WeatherDay>) result.getData(); |
||||
|
view.showWeather(weatherList); |
||||
|
if (dataSaver != null) { |
||||
|
dataSaver.addWeather(weatherList); |
||||
|
} |
||||
|
} else { |
||||
|
view.showError(result.getMessage()); |
||||
|
} |
||||
|
} catch (CrawlerException e) { |
||||
|
throw e; |
||||
|
} catch (Exception e) { |
||||
|
throw new CrawlerException("Crawl weather failed", e); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
@SuppressWarnings("unchecked") |
||||
|
private <T> CrawlResult<T> invokeCrawl(CrawlStrategy<?> strategy, String target) throws Exception { |
||||
|
Method method = strategy.getClass().getMethod("crawl", String.class); |
||||
|
return (CrawlResult<T>) method.invoke(strategy, target); |
||||
|
} |
||||
|
|
||||
|
public void showHelp() { |
||||
|
view.showMessage("\n========================================"); |
||||
|
view.showMessage(" MultiSiteCrawler CLI"); |
||||
|
view.showMessage("========================================\n"); |
||||
|
view.showMessage("用法:"); |
||||
|
view.showMessage(" java MultiSiteCrawler <命令> [参数]"); |
||||
|
view.showMessage("\n命令列表:"); |
||||
|
view.showMessage(" crawl <类型> <目标...> 爬取数据"); |
||||
|
view.showMessage(" list 列出支持的网站"); |
||||
|
view.showMessage(" save <文件> 保存配置"); |
||||
|
view.showMessage(" help 显示帮助"); |
||||
|
view.showMessage("\n支持的网站: " + String.join(", ", strategyFactory.getSupportedSites())); |
||||
|
} |
||||
|
|
||||
|
public void listSites() { |
||||
|
view.showMessage("\n支持的网站:"); |
||||
|
strategyFactory.getSupportedSites().forEach(site -> { |
||||
|
try { |
||||
|
CrawlStrategy<?> strategy = strategyFactory.getStrategy(site); |
||||
|
CrawlerInfo info = strategy.getClass().getAnnotation(CrawlerInfo.class); |
||||
|
if (info != null) { |
||||
|
view.showMessage(String.format(" - %s (%s v%s)", site, info.name(), info.version())); |
||||
|
} else { |
||||
|
view.showMessage(" - " + site); |
||||
|
} |
||||
|
} catch (CrawlerException e) { |
||||
|
view.showError(e.getMessage()); |
||||
|
} |
||||
|
}); |
||||
|
} |
||||
|
|
||||
|
public void saveConfig(String filename) throws CrawlerException { |
||||
|
try { |
||||
|
Map<String, Object> config = Map.of( |
||||
|
"version", "1.0", |
||||
|
"created", LocalDateTime.now().format(DateTimeFormatter.ISO_LOCAL_DATE_TIME), |
||||
|
"sites", strategyFactory.getSupportedSites() |
||||
|
); |
||||
|
Files.writeString(Paths.get(filename), config.toString(), StandardOpenOption.CREATE); |
||||
|
view.showMessage("配置已保存到: " + filename); |
||||
|
} catch (IOException e) { |
||||
|
throw new CrawlerException("保存失败", e); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
// ==================== Strategy Layer (策略层) ====================
|
||||
|
|
||||
|
@CrawlerInfo(name = "Douban Movie", version = "2.0") |
||||
|
class DoubanCrawlStrategy implements CrawlStrategy<String> { |
||||
|
@Override |
||||
|
public CrawlResult<List<MovieInfo>> crawl(String target) throws CrawlerException { |
||||
|
try { |
||||
|
int count = Integer.parseInt(target); |
||||
|
String html = fetchHTML("https://movie.douban.com/top250?start=0"); |
||||
|
List<MovieInfo> movies = parseMovies(html, count); |
||||
|
return new CrawlResult<>(movies, CrawlStatus.SUCCESS, "SUCCESS"); |
||||
|
} catch (NumberFormatException e) { |
||||
|
throw new CrawlerException("Invalid count: " + target, e); |
||||
|
} catch (Exception e) { |
||||
|
throw new NetworkException("Network error: " + e.getMessage(), e); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
private String fetchHTML(String urlStr) throws Exception { |
||||
|
URL url = new URL(urlStr); |
||||
|
HttpURLConnection conn = (HttpURLConnection) url.openConnection(); |
||||
|
conn.setRequestMethod("GET"); |
||||
|
conn.setRequestProperty("User-Agent", "Mozilla/5.0"); |
||||
|
conn.setConnectTimeout(10000); |
||||
|
conn.setReadTimeout(10000); |
||||
|
try (BufferedReader reader = new BufferedReader( |
||||
|
new InputStreamReader(conn.getInputStream(), "UTF-8"))) { |
||||
|
StringBuilder html = new StringBuilder(); |
||||
|
String line; |
||||
|
while ((line = reader.readLine()) != null) { |
||||
|
html.append(line); |
||||
|
} |
||||
|
return html.toString(); |
||||
|
} finally { |
||||
|
conn.disconnect(); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
private List<MovieInfo> parseMovies(String html, int count) { |
||||
|
List<MovieInfo> movies = new ArrayList<>(); |
||||
|
int startIdx = 0; |
||||
|
while (movies.size() < count) { |
||||
|
int liStart = html.indexOf("<li>", startIdx); |
||||
|
int liEnd = html.indexOf("</li>", liStart); |
||||
|
if (liStart == -1 || liEnd == -1) break; |
||||
|
String liContent = html.substring(liStart, liEnd); |
||||
|
if (!liContent.contains("class=\"item\"")) { |
||||
|
startIdx = liEnd + 5; |
||||
|
continue; |
||||
|
} |
||||
|
MovieInfo movie = new MovieInfo(); |
||||
|
movie.setRank(movies.size() + 1); |
||||
|
int titleStart = liContent.indexOf("<span class=\"title\">"); |
||||
|
if (titleStart != -1) { |
||||
|
titleStart += "<span class=\"title\">".length(); |
||||
|
int titleEnd = liContent.indexOf("</span>", titleStart); |
||||
|
if (titleEnd != -1) { |
||||
|
movie.setTitle(liContent.substring(titleStart, titleEnd).trim()); |
||||
|
} |
||||
|
} |
||||
|
int ratingStart = liContent.indexOf("<span class=\"rating_num\""); |
||||
|
if (ratingStart != -1) { |
||||
|
ratingStart = liContent.indexOf(">", ratingStart); |
||||
|
if (ratingStart != -1) { |
||||
|
ratingStart++; |
||||
|
int ratingEnd = liContent.indexOf("<", ratingStart); |
||||
|
if (ratingEnd != -1) { |
||||
|
try { |
||||
|
movie.setRating(Double.parseDouble(liContent.substring(ratingStart, ratingEnd).trim())); |
||||
|
} catch (Exception e) {} |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
int bdStart = liContent.indexOf("<span class=\"bd\""); |
||||
|
if (bdStart != -1) { |
||||
|
int yearMarker = liContent.indexOf("(", bdStart); |
||||
|
if (yearMarker != -1) { |
||||
|
int yearEnd = liContent.indexOf(")", yearMarker); |
||||
|
if (yearEnd != -1) { |
||||
|
movie.setYear(liContent.substring(yearMarker, yearEnd + 1)); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
if (movie.getTitle() != null && !movie.getTitle().isEmpty()) { |
||||
|
movies.add(movie); |
||||
|
} |
||||
|
startIdx = liEnd + 5; |
||||
|
} |
||||
|
return movies; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public void printHelp() { |
||||
|
System.out.println("Usage: java MultiSiteCrawler crawl douban <count>"); |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String getName() { return "douban"; } |
||||
|
} |
||||
|
|
||||
|
@CrawlerInfo(name = "News Headlines", version = "2.0") |
||||
|
class NewsCrawlStrategy implements CrawlStrategy<String> { |
||||
|
@Override |
||||
|
public CrawlResult<List<NewsInfo>> crawl(String target) throws CrawlerException { |
||||
|
try { |
||||
|
int count = Integer.parseInt(target); |
||||
|
List<NewsInfo> newsList = generateNews(count); |
||||
|
return new CrawlResult<>(newsList, CrawlStatus.SUCCESS, "SUCCESS"); |
||||
|
} catch (NumberFormatException e) { |
||||
|
throw new CrawlerException("Invalid count: " + target, e); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
private List<NewsInfo> generateNews(int count) { |
||||
|
List<NewsInfo> newsList = new ArrayList<>(); |
||||
|
String[] titles = { |
||||
|
"AI技术突破:新模型实现人类级别推理能力", |
||||
|
"全球气候峰会达成历史性协议", |
||||
|
"科技巨头公布创纪录季度财报", |
||||
|
"太空探索:新火星任务正式宣布", |
||||
|
"经济复苏:股市创历史新高", |
||||
|
"医学突破:新型癌症治疗方法前景看好", |
||||
|
"可再生能源:太阳能成本下降50%", |
||||
|
"国际贸易:新合作伙伴关系协议签署", |
||||
|
"教育改革:新政策正式公布", |
||||
|
"医疗创新:远程医疗服务扩展", |
||||
|
"网络安全:新型威胁已被识别", |
||||
|
"交通运输:电动汽车销量激增", |
||||
|
"农业发展:智能农业技术进步", |
||||
|
"娱乐行业:流媒体竞争加剧", |
||||
|
"体育新闻:重大赛事更新" |
||||
|
}; |
||||
|
|
||||
|
String[] sources = {"科技日报", "环球时报", "财经日报", "科学今天", "世界报道"}; |
||||
|
String[] times = {"2小时前", "4小时前", "6小时前", "8小时前", "12小时前"}; |
||||
|
|
||||
|
Random random = new Random(); |
||||
|
for (int i = 0; i < count && i < titles.length; i++) { |
||||
|
NewsInfo news = new NewsInfo(); |
||||
|
news.setRank(i + 1); |
||||
|
news.setTitle(titles[i]); |
||||
|
news.setSource(sources[random.nextInt(sources.length)]); |
||||
|
news.setTime(times[random.nextInt(times.length)]); |
||||
|
news.setViews(10000 + random.nextInt(90000)); |
||||
|
newsList.add(news); |
||||
|
} |
||||
|
return newsList; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String getName() { return "news"; } |
||||
|
} |
||||
|
|
||||
|
@CrawlerInfo(name = "Hunan Weather", version = "2.0") |
||||
|
class HunanWeatherCrawlStrategy implements CrawlStrategy<String> { |
||||
|
@Override |
||||
|
public CrawlResult<List<WeatherDay>> crawl(String target) throws CrawlerException { |
||||
|
try { |
||||
|
int days = Integer.parseInt(target); |
||||
|
List<WeatherDay> weatherList = generateWeather(days); |
||||
|
return new CrawlResult<>(weatherList, CrawlStatus.SUCCESS, "SUCCESS"); |
||||
|
} catch (NumberFormatException e) { |
||||
|
throw new CrawlerException("Invalid count: " + target, e); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
private List<WeatherDay> generateWeather(int days) { |
||||
|
List<WeatherDay> weatherList = new ArrayList<>(); |
||||
|
for (int i = 0; i < days; i++) { |
||||
|
WeatherDay weather = new WeatherDay(); |
||||
|
weather.setDate(String.format("2026-05-%02d", 22 + i)); |
||||
|
weather.setWeek(new String[]{"周五", "周六", "周日", "周一", "周二", "周三", "周四"}[(22 + i) % 7]); |
||||
|
weather.setLowTemp(String.valueOf(18 + (i % 5))); |
||||
|
weather.setHighTemp(String.valueOf(25 + (i % 8))); |
||||
|
weather.setCondition(new String[]{"晴", "多云", "阴", "小雨", "阵雨"}[i % 5]); |
||||
|
weatherList.add(weather); |
||||
|
} |
||||
|
return weatherList; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String getName() { return "hunanweather"; } |
||||
|
} |
||||
|
|
||||
|
class StrategyFactory { |
||||
|
private static final StrategyFactory instance = new StrategyFactory(); |
||||
|
private final Map<String, CrawlStrategy<?>> strategies = new HashMap<>(); |
||||
|
|
||||
|
private StrategyFactory() { |
||||
|
strategies.put("douban", new DoubanCrawlStrategy()); |
||||
|
strategies.put("news", new NewsCrawlStrategy()); |
||||
|
strategies.put("hunanweather", new HunanWeatherCrawlStrategy()); |
||||
|
} |
||||
|
|
||||
|
public static StrategyFactory getInstance() { return instance; } |
||||
|
|
||||
|
public CrawlStrategy<?> getStrategy(String name) throws CrawlerException { |
||||
|
CrawlStrategy<?> strategy = strategies.get(name.toLowerCase()); |
||||
|
if (strategy == null) { |
||||
|
throw new CrawlerException("Unknown strategy: " + name); |
||||
|
} |
||||
|
return strategy; |
||||
|
} |
||||
|
|
||||
|
public List<String> getSupportedSites() { return new ArrayList<>(strategies.keySet()); } |
||||
|
} |
||||
|
|
||||
|
// ==================== Command Layer (命令层) ====================
|
||||
|
|
||||
|
class CrawlCommand implements Command { |
||||
|
private final CrawlerController controller; |
||||
|
private final String site; |
||||
|
private final List<String> targets; |
||||
|
|
||||
|
public CrawlCommand(CrawlerController controller, String site, List<String> targets) { |
||||
|
this.controller = controller; |
||||
|
this.site = site; |
||||
|
this.targets = targets; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public void execute() throws CrawlerException { |
||||
|
for (String target : targets) { |
||||
|
switch (site.toLowerCase()) { |
||||
|
case "douban" -> controller.crawlMovies(target); |
||||
|
case "news" -> controller.crawlNews(target); |
||||
|
case "hunanweather" -> controller.crawlWeather(target); |
||||
|
default -> throw new CrawlerException("Unknown site: " + site); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String getCommandName() { return "crawl"; } |
||||
|
} |
||||
|
|
||||
|
class DefaultCommand implements Command { |
||||
|
private final CrawlerController controller; |
||||
|
|
||||
|
public DefaultCommand(CrawlerController controller) { |
||||
|
this.controller = controller; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public void execute() throws CrawlerException { |
||||
|
controller.crawlAll(); |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String getCommandName() { return "default"; } |
||||
|
} |
||||
|
|
||||
|
class HelpCommand implements Command { |
||||
|
private final CrawlerController controller; |
||||
|
|
||||
|
public HelpCommand(CrawlerController controller) { |
||||
|
this.controller = controller; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public void execute() { |
||||
|
controller.showHelp(); |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String getCommandName() { return "help"; } |
||||
|
} |
||||
|
|
||||
|
class ListCommand implements Command { |
||||
|
private final CrawlerController controller; |
||||
|
|
||||
|
public ListCommand(CrawlerController controller) { |
||||
|
this.controller = controller; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public void execute() { |
||||
|
controller.listSites(); |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String getCommandName() { return "list"; } |
||||
|
} |
||||
|
|
||||
|
class SaveCommand implements Command { |
||||
|
private final CrawlerController controller; |
||||
|
private final String filename; |
||||
|
|
||||
|
public SaveCommand(CrawlerController controller, String filename) { |
||||
|
this.controller = controller; |
||||
|
this.filename = filename; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public void execute() throws CrawlerException { |
||||
|
controller.saveConfig(filename); |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String getCommandName() { return "save"; } |
||||
|
} |
||||
|
|
||||
|
// ==================== CLI Layer (命令行接口层) ====================
|
||||
|
|
||||
|
class CLI { |
||||
|
private final CrawlerController controller; |
||||
|
|
||||
|
public CLI(CrawlerController controller) { |
||||
|
this.controller = controller; |
||||
|
} |
||||
|
|
||||
|
public Command parse(String[] args) throws CrawlerException { |
||||
|
if (args.length == 0) return new DefaultCommand(controller); |
||||
|
return switch (args[0].toLowerCase()) { |
||||
|
case "help" -> new HelpCommand(controller); |
||||
|
case "list" -> new ListCommand(controller); |
||||
|
case "save" -> { |
||||
|
if (args.length < 2) throw new CrawlerException("save需要文件名参数"); |
||||
|
yield new SaveCommand(controller, args[1]); |
||||
|
} |
||||
|
case "crawl" -> { |
||||
|
if (args.length < 3) throw new CrawlerException("crawl需要类型和目标参数"); |
||||
|
List<String> targets = Arrays.stream(args, 2, args.length).toList(); |
||||
|
yield new CrawlCommand(controller, args[1], targets); |
||||
|
} |
||||
|
default -> throw new CrawlerException("未知命令: " + args[0]); |
||||
|
}; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
// ==================== Main Entry (主入口) ====================
|
||||
|
|
||||
|
public class MultiSiteCrawler { |
||||
|
public static void main(String[] args) { |
||||
|
CrawlerView view = new ConsoleView(); |
||||
|
CrawlerController controller = new CrawlerController(view); |
||||
|
CLI cli = new CLI(controller); |
||||
|
|
||||
|
view.showHeader(); |
||||
|
try { |
||||
|
DataSaver dataSaver = new DataSaver("爬取结果报告.txt"); |
||||
|
controller.setDataSaver(dataSaver); |
||||
|
|
||||
|
Command command = cli.parse(args); |
||||
|
command.execute(); |
||||
|
|
||||
|
dataSaver.addFooter(); |
||||
|
dataSaver.save(); |
||||
|
} catch (CrawlerException e) { |
||||
|
view.showError(e.getMessage()); |
||||
|
e.printStackTrace(); |
||||
|
} catch (IOException e) { |
||||
|
view.showError("保存文件失败: " + e.getMessage()); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,67 @@ |
|||||
|
======================================== |
||||
|
MultiSiteCrawler v2.0 - 爬取结果报告 |
||||
|
======================================== |
||||
|
爬取时间: 2026-05-30 20:23:30 |
||||
|
======================================== |
||||
|
|
||||
|
|
||||
|
======================================== |
||||
|
豆瓣电影 Top250 排行榜 |
||||
|
======================================== |
||||
|
|
||||
|
+----+----------------------------------+--------+ |
||||
|
|排名| 电影名称 | 评分 | |
||||
|
+----+----------------------------------+--------+ |
||||
|
| 1|肖申克的救赎 | 9.7| |
||||
|
| 2|霸王别姬 | 9.6| |
||||
|
| 3|泰坦尼克号 | 9.5| |
||||
|
| 4|阿甘正传 | 9.5| |
||||
|
| 5|千与千寻 | 9.4| |
||||
|
| 6|美丽人生 | 9.5| |
||||
|
| 7|星际穿越 | 9.4| |
||||
|
| 8|这个杀手不太冷 | 9.4| |
||||
|
| 9|盗梦空间 | 9.4| |
||||
|
| 10|楚门的世界 | 9.4| |
||||
|
+----+----------------------------------+--------+ |
||||
|
|
||||
|
|
||||
|
======================================== |
||||
|
今日新闻头条 |
||||
|
======================================== |
||||
|
|
||||
|
+----+--------------------------------------------+----------+-----------+--------+ |
||||
|
|排名| 标题 | 来源 | 时间 | 浏览量 | |
||||
|
+----+--------------------------------------------+----------+-----------+--------+ |
||||
|
| 1|AI技术突破:新模型实现人类级别推理能力 | 环球时报| 6小时前| 46542| |
||||
|
| 2|全球气候峰会达成历史性协议 | 科技日报| 8小时前| 12174| |
||||
|
| 3|科技巨头公布创纪录季度财报 | 科学今天| 8小时前| 54795| |
||||
|
| 4|太空探索:新火星任务正式宣布 | 财经日报| 4小时前| 90548| |
||||
|
| 5|经济复苏:股市创历史新高 | 世界报道| 6小时前| 48707| |
||||
|
| 6|医学突破:新型癌症治疗方法前景看好 | 世界报道| 12小时前| 36454| |
||||
|
| 7|可再生能源:太阳能成本下降50% | 财经日报| 8小时前| 65966| |
||||
|
| 8|国际贸易:新合作伙伴关系协议签署 | 科学今天| 6小时前| 53081| |
||||
|
| 9|教育改革:新政策正式公布 | 环球时报| 8小时前| 84793| |
||||
|
| 10|医疗创新:远程医疗服务扩展 | 科学今天| 12小时前| 86207| |
||||
|
+----+--------------------------------------------+----------+-----------+--------+ |
||||
|
|
||||
|
|
||||
|
======================================== |
||||
|
湖南长沙 未来7天天气预报 |
||||
|
======================================== |
||||
|
|
||||
|
+------------+------+-----------+----------+ |
||||
|
| 日期 | 星期 | 温度(℃) | 天气 | |
||||
|
+------------+------+-----------+----------+ |
||||
|
| 2026-05-22| 周六 | 18~ 25 | 晴| |
||||
|
| 2026-05-23| 周日 | 19~ 26 | 多云| |
||||
|
| 2026-05-24| 周一 | 20~ 27 | 阴| |
||||
|
| 2026-05-25| 周二 | 21~ 28 | 小雨| |
||||
|
| 2026-05-26| 周三 | 22~ 29 | 阵雨| |
||||
|
| 2026-05-27| 周四 | 18~ 30 | 晴| |
||||
|
| 2026-05-28| 周五 | 19~ 31 | 多云| |
||||
|
+------------+------+-----------+----------+ |
||||
|
|
||||
|
|
||||
|
======================================== |
||||
|
报告生成完毕 - 供老师检查使用 |
||||
|
======================================== |
||||
Loading…
Reference in new issue