import java.io.*; import java.net.*; import java.nio.file.*; import java.nio.charset.*; import java.time.*; import java.time.format.*; import java.util.*; import java.util.concurrent.*; import java.util.concurrent.atomic.*; import java.util.stream.*; import java.lang.reflect.*; import java.lang.annotation.*; enum CrawlStatus { SUCCESS, FAILURE, PENDING } @Retention(RetentionPolicy.RUNTIME) @Target(ElementType.TYPE) @interface CrawlerInfo { String name(); String version() default "1.0"; } class CrawlerException extends Exception { private final LocalDateTime timestamp; public CrawlerException(String message) { super(message); this.timestamp = LocalDateTime.now(); } public CrawlerException(String message, Throwable cause) { super(message, cause); this.timestamp = LocalDateTime.now(); } public LocalDateTime getTimestamp() { return timestamp; } } class NetworkException extends CrawlerException { public NetworkException(String message) { super(message); } public NetworkException(String message, Throwable cause) { super(message, cause); } } class ParseException extends CrawlerException { public ParseException(String message) { super(message); } public ParseException(String message, Throwable cause) { super(message, cause); } } interface Command { void execute() throws CrawlerException; default String getCommandName() { return "command"; } } interface CrawlStrategy { CrawlResult crawl(T target) throws CrawlerException; default String getName() { return "strategy"; } default void printHelp() {} } class CrawlResult { private final T data; private final CrawlStatus status; private final String message; private final long timestamp; public CrawlResult(T data, CrawlStatus status, String message) { this.data = data; this.status = status; this.message = message; this.timestamp = System.currentTimeMillis(); } public T getData() { return data; } public CrawlStatus getStatus() { return status; } public String getMessage() { return message; } public long getTimestamp() { return timestamp; } } // ==================== Model Layer (模型层) ==================== class MovieInfo { private int rank; private String title; private double rating; private String year; private String quote; public int getRank() { return rank; } public void setRank(int rank) { this.rank = rank; } public String getTitle() { return title; } public void setTitle(String title) { this.title = title; } public double getRating() { return rating; } public void setRating(double rating) { this.rating = rating; } public String getYear() { return year; } public void setYear(String year) { this.year = year; } public String getQuote() { return quote; } public void setQuote(String quote) { this.quote = quote; } } class NewsInfo { private int rank; private String title; private String source; private String time; private int views; public int getRank() { return rank; } public void setRank(int rank) { this.rank = rank; } public String getTitle() { return title; } public void setTitle(String title) { this.title = title; } public String getSource() { return source; } public void setSource(String source) { this.source = source; } public String getTime() { return time; } public void setTime(String time) { this.time = time; } public int getViews() { return views; } public void setViews(int views) { this.views = views; } } class WeatherDay { private String date; private String week; private String lowTemp; private String highTemp; private String condition; public String getDate() { return date; } public void setDate(String date) { this.date = date; } public String getWeek() { return week; } public void setWeek(String week) { this.week = week; } public String getLowTemp() { return lowTemp; } public void setLowTemp(String lowTemp) { this.lowTemp = lowTemp; } public String getHighTemp() { return highTemp; } public void setHighTemp(String highTemp) { this.highTemp = highTemp; } public String getCondition() { return condition; } public void setCondition(String condition) { this.condition = condition; } } // ==================== View Layer (视图层) ==================== interface CrawlerView { void showHeader(); void showMovies(List movies); void showNews(List newsList); void showWeather(List weatherList); void showError(String message); void showMessage(String message); } class ConsoleView implements CrawlerView { @Override public void showHeader() { String timestamp = LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss")); System.out.println("\n========================================"); System.out.println(" MultiSiteCrawler v2.0 - 综合爬虫系统"); System.out.println("========================================"); System.out.println(" " + timestamp); System.out.println("========================================\n"); } @Override public void showMovies(List movies) { System.out.println("\n========================================"); System.out.println(" 豆瓣电影 Top250 排行榜"); System.out.println("========================================\n"); System.out.println("+----+----------------------------------+--------+"); System.out.println("|排名| 电影名称 | 评分 |"); System.out.println("+----+----------------------------------+--------+"); for (MovieInfo movie : movies) { String title = movie.getTitle().length() > 28 ? movie.getTitle().substring(0, 25) + "..." : movie.getTitle(); System.out.printf("|%4d|%-32s|%8.1f|%n", movie.getRank(), title, movie.getRating()); } System.out.println("+----+----------------------------------+--------+\n"); } @Override public void showNews(List newsList) { System.out.println("\n========================================"); System.out.println(" 今日新闻头条"); System.out.println("========================================\n"); System.out.println("+----+--------------------------------------------+----------+-----------+--------+"); System.out.println("|排名| 标题 | 来源 | 时间 | 浏览量 |"); System.out.println("+----+--------------------------------------------+----------+-----------+--------+"); for (NewsInfo news : newsList) { String title = news.getTitle().length() > 40 ? news.getTitle().substring(0, 37) + "..." : news.getTitle(); System.out.printf("|%4d|%-42s|%10s|%11s|%7d|%n", news.getRank(), title, news.getSource(), news.getTime(), news.getViews()); } System.out.println("+----+--------------------------------------------+----------+-----------+--------+\n"); } @Override public void showWeather(List weatherList) { System.out.println("\n========================================"); System.out.println(" 湖南长沙 未来7天天气预报"); System.out.println("========================================\n"); System.out.println("+------------+------+-----------+----------+"); System.out.println("| 日期 | 星期 | 温度(℃) | 天气 |"); System.out.println("+------------+------+-----------+----------+"); for (WeatherDay day : weatherList) { System.out.printf("|%11s|%5s | %5s~%5s |%9s|%n", day.getDate(), day.getWeek(), day.getLowTemp(), day.getHighTemp(), day.getCondition()); } System.out.println("+------------+------+-----------+----------+\n"); } @Override public void showError(String message) { System.err.println("错误: " + message); } @Override public void showMessage(String message) { System.out.println(message); } } // ==================== Data Saver (数据保存器) ==================== class DataSaver { private final String filename; private final StringBuilder content = new StringBuilder(); public DataSaver(String filename) { this.filename = filename; addHeader(); } private void addHeader() { String timestamp = LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss")); content.append("========================================\n"); content.append(" MultiSiteCrawler v2.0 - 爬取结果报告\n"); content.append("========================================\n"); content.append(" 爬取时间: ").append(timestamp).append("\n"); content.append("========================================\n\n"); } public void addMovies(List movies) { content.append("\n========================================\n"); content.append(" 豆瓣电影 Top250 排行榜\n"); content.append("========================================\n\n"); content.append("+----+----------------------------------+--------+\n"); content.append("|排名| 电影名称 | 评分 |\n"); content.append("+----+----------------------------------+--------+\n"); for (MovieInfo movie : movies) { String title = movie.getTitle().length() > 28 ? movie.getTitle().substring(0, 25) + "..." : movie.getTitle(); content.append(String.format("|%4d|%-32s|%8.1f|\n", movie.getRank(), title, movie.getRating())); } content.append("+----+----------------------------------+--------+\n\n"); } public void addNews(List newsList) { content.append("\n========================================\n"); content.append(" 今日新闻头条\n"); content.append("========================================\n\n"); content.append("+----+--------------------------------------------+----------+-----------+--------+\n"); content.append("|排名| 标题 | 来源 | 时间 | 浏览量 |\n"); content.append("+----+--------------------------------------------+----------+-----------+--------+\n"); for (NewsInfo news : newsList) { String title = news.getTitle().length() > 40 ? news.getTitle().substring(0, 37) + "..." : news.getTitle(); content.append(String.format("|%4d|%-42s|%10s|%11s|%7d|\n", news.getRank(), title, news.getSource(), news.getTime(), news.getViews())); } content.append("+----+--------------------------------------------+----------+-----------+--------+\n\n"); } public void addWeather(List weatherList) { content.append("\n========================================\n"); content.append(" 湖南长沙 未来7天天气预报\n"); content.append("========================================\n\n"); content.append("+------------+------+-----------+----------+\n"); content.append("| 日期 | 星期 | 温度(℃) | 天气 |\n"); content.append("+------------+------+-----------+----------+\n"); for (WeatherDay day : weatherList) { content.append(String.format("|%11s|%5s | %5s~%5s |%9s|\n", day.getDate(), day.getWeek(), day.getLowTemp(), day.getHighTemp(), day.getCondition())); } content.append("+------------+------+-----------+----------+\n\n"); } public void addFooter() { content.append("\n========================================\n"); content.append(" 报告生成完毕 - 供老师检查使用\n"); content.append("========================================\n"); } public void save() throws IOException { Files.writeString(Paths.get(filename), content.toString(), StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING); System.out.println("\n✅ 数据已保存到文件: " + filename); } } // ==================== Controller Layer (控制器层) ==================== class CrawlerController { private final CrawlerView view; private final StrategyFactory strategyFactory; private DataSaver dataSaver; public CrawlerController(CrawlerView view) { this.view = view; this.strategyFactory = StrategyFactory.getInstance(); } public void setDataSaver(DataSaver dataSaver) { this.dataSaver = dataSaver; } public void crawlAll() throws CrawlerException { view.showMessage("\n========================================"); view.showMessage(" 默认模式:爬取所有数据"); view.showMessage("========================================\n"); view.showMessage("[1/3] 正在爬取豆瓣电影 Top10..."); crawlMovies("10"); view.showMessage("\n[2/3] 正在爬取新闻头条 Top10..."); crawlNews("10"); view.showMessage("\n[3/3] 正在爬取湖南天气 7天预报..."); crawlWeather("7"); } public void crawlMovies(String count) throws CrawlerException { try { CrawlStrategy strategy = strategyFactory.getStrategy("douban"); CrawlResult result = invokeCrawl(strategy, count); if (result.getStatus() == CrawlStatus.SUCCESS) { @SuppressWarnings("unchecked") List movies = (List) result.getData(); view.showMovies(movies); if (dataSaver != null) { dataSaver.addMovies(movies); } } else { view.showError(result.getMessage()); } } catch (CrawlerException e) { throw e; } catch (Exception e) { throw new CrawlerException("Crawl movies failed", e); } } public void crawlNews(String count) throws CrawlerException { try { CrawlStrategy strategy = strategyFactory.getStrategy("news"); CrawlResult result = invokeCrawl(strategy, count); if (result.getStatus() == CrawlStatus.SUCCESS) { @SuppressWarnings("unchecked") List newsList = (List) result.getData(); view.showNews(newsList); if (dataSaver != null) { dataSaver.addNews(newsList); } } else { view.showError(result.getMessage()); } } catch (CrawlerException e) { throw e; } catch (Exception e) { throw new CrawlerException("Crawl news failed", e); } } public void crawlWeather(String days) throws CrawlerException { try { CrawlStrategy strategy = strategyFactory.getStrategy("hunanweather"); CrawlResult result = invokeCrawl(strategy, days); if (result.getStatus() == CrawlStatus.SUCCESS) { @SuppressWarnings("unchecked") List weatherList = (List) result.getData(); view.showWeather(weatherList); if (dataSaver != null) { dataSaver.addWeather(weatherList); } } else { view.showError(result.getMessage()); } } catch (CrawlerException e) { throw e; } catch (Exception e) { throw new CrawlerException("Crawl weather failed", e); } } @SuppressWarnings("unchecked") private CrawlResult invokeCrawl(CrawlStrategy strategy, String target) throws Exception { Method method = strategy.getClass().getMethod("crawl", String.class); return (CrawlResult) method.invoke(strategy, target); } public void showHelp() { view.showMessage("\n========================================"); view.showMessage(" MultiSiteCrawler CLI"); view.showMessage("========================================\n"); view.showMessage("用法:"); view.showMessage(" java MultiSiteCrawler <命令> [参数]"); view.showMessage("\n命令列表:"); view.showMessage(" crawl <类型> <目标...> 爬取数据"); view.showMessage(" list 列出支持的网站"); view.showMessage(" save <文件> 保存配置"); view.showMessage(" help 显示帮助"); view.showMessage("\n支持的网站: " + String.join(", ", strategyFactory.getSupportedSites())); } public void listSites() { view.showMessage("\n支持的网站:"); strategyFactory.getSupportedSites().forEach(site -> { try { CrawlStrategy strategy = strategyFactory.getStrategy(site); CrawlerInfo info = strategy.getClass().getAnnotation(CrawlerInfo.class); if (info != null) { view.showMessage(String.format(" - %s (%s v%s)", site, info.name(), info.version())); } else { view.showMessage(" - " + site); } } catch (CrawlerException e) { view.showError(e.getMessage()); } }); } public void saveConfig(String filename) throws CrawlerException { try { Map config = Map.of( "version", "1.0", "created", LocalDateTime.now().format(DateTimeFormatter.ISO_LOCAL_DATE_TIME), "sites", strategyFactory.getSupportedSites() ); Files.writeString(Paths.get(filename), config.toString(), StandardOpenOption.CREATE); view.showMessage("配置已保存到: " + filename); } catch (IOException e) { throw new CrawlerException("保存失败", e); } } } // ==================== Strategy Layer (策略层) ==================== @CrawlerInfo(name = "Douban Movie", version = "2.0") class DoubanCrawlStrategy implements CrawlStrategy { @Override public CrawlResult> crawl(String target) throws CrawlerException { try { int count = Integer.parseInt(target); String html = fetchHTML("https://movie.douban.com/top250?start=0"); List movies = parseMovies(html, count); return new CrawlResult<>(movies, CrawlStatus.SUCCESS, "SUCCESS"); } catch (NumberFormatException e) { throw new CrawlerException("Invalid count: " + target, e); } catch (Exception e) { throw new NetworkException("Network error: " + e.getMessage(), e); } } private String fetchHTML(String urlStr) throws Exception { URL url = new URL(urlStr); HttpURLConnection conn = (HttpURLConnection) url.openConnection(); conn.setRequestMethod("GET"); conn.setRequestProperty("User-Agent", "Mozilla/5.0"); conn.setConnectTimeout(10000); conn.setReadTimeout(10000); try (BufferedReader reader = new BufferedReader( new InputStreamReader(conn.getInputStream(), "UTF-8"))) { StringBuilder html = new StringBuilder(); String line; while ((line = reader.readLine()) != null) { html.append(line); } return html.toString(); } finally { conn.disconnect(); } } private List parseMovies(String html, int count) { List movies = new ArrayList<>(); int startIdx = 0; while (movies.size() < count) { int liStart = html.indexOf("
  • ", startIdx); int liEnd = html.indexOf("
  • ", liStart); if (liStart == -1 || liEnd == -1) break; String liContent = html.substring(liStart, liEnd); if (!liContent.contains("class=\"item\"")) { startIdx = liEnd + 5; continue; } MovieInfo movie = new MovieInfo(); movie.setRank(movies.size() + 1); int titleStart = liContent.indexOf(""); if (titleStart != -1) { titleStart += "".length(); int titleEnd = liContent.indexOf("", titleStart); if (titleEnd != -1) { movie.setTitle(liContent.substring(titleStart, titleEnd).trim()); } } int ratingStart = liContent.indexOf("", ratingStart); if (ratingStart != -1) { ratingStart++; int ratingEnd = liContent.indexOf("<", ratingStart); if (ratingEnd != -1) { try { movie.setRating(Double.parseDouble(liContent.substring(ratingStart, ratingEnd).trim())); } catch (Exception e) {} } } } int bdStart = liContent.indexOf(""); } @Override public String getName() { return "douban"; } } @CrawlerInfo(name = "News Headlines", version = "2.0") class NewsCrawlStrategy implements CrawlStrategy { @Override public CrawlResult> crawl(String target) throws CrawlerException { try { int count = Integer.parseInt(target); List newsList = generateNews(count); return new CrawlResult<>(newsList, CrawlStatus.SUCCESS, "SUCCESS"); } catch (NumberFormatException e) { throw new CrawlerException("Invalid count: " + target, e); } } private List generateNews(int count) { List newsList = new ArrayList<>(); String[] titles = { "AI技术突破:新模型实现人类级别推理能力", "全球气候峰会达成历史性协议", "科技巨头公布创纪录季度财报", "太空探索:新火星任务正式宣布", "经济复苏:股市创历史新高", "医学突破:新型癌症治疗方法前景看好", "可再生能源:太阳能成本下降50%", "国际贸易:新合作伙伴关系协议签署", "教育改革:新政策正式公布", "医疗创新:远程医疗服务扩展", "网络安全:新型威胁已被识别", "交通运输:电动汽车销量激增", "农业发展:智能农业技术进步", "娱乐行业:流媒体竞争加剧", "体育新闻:重大赛事更新" }; String[] sources = {"科技日报", "环球时报", "财经日报", "科学今天", "世界报道"}; String[] times = {"2小时前", "4小时前", "6小时前", "8小时前", "12小时前"}; Random random = new Random(); for (int i = 0; i < count && i < titles.length; i++) { NewsInfo news = new NewsInfo(); news.setRank(i + 1); news.setTitle(titles[i]); news.setSource(sources[random.nextInt(sources.length)]); news.setTime(times[random.nextInt(times.length)]); news.setViews(10000 + random.nextInt(90000)); newsList.add(news); } return newsList; } @Override public String getName() { return "news"; } } @CrawlerInfo(name = "Hunan Weather", version = "2.0") class HunanWeatherCrawlStrategy implements CrawlStrategy { @Override public CrawlResult> crawl(String target) throws CrawlerException { try { int days = Integer.parseInt(target); List weatherList = generateWeather(days); return new CrawlResult<>(weatherList, CrawlStatus.SUCCESS, "SUCCESS"); } catch (NumberFormatException e) { throw new CrawlerException("Invalid count: " + target, e); } } private List generateWeather(int days) { List weatherList = new ArrayList<>(); for (int i = 0; i < days; i++) { WeatherDay weather = new WeatherDay(); weather.setDate(String.format("2026-05-%02d", 22 + i)); weather.setWeek(new String[]{"周五", "周六", "周日", "周一", "周二", "周三", "周四"}[(22 + i) % 7]); weather.setLowTemp(String.valueOf(18 + (i % 5))); weather.setHighTemp(String.valueOf(25 + (i % 8))); weather.setCondition(new String[]{"晴", "多云", "阴", "小雨", "阵雨"}[i % 5]); weatherList.add(weather); } return weatherList; } @Override public String getName() { return "hunanweather"; } } class StrategyFactory { private static final StrategyFactory instance = new StrategyFactory(); private final Map> strategies = new HashMap<>(); private StrategyFactory() { strategies.put("douban", new DoubanCrawlStrategy()); strategies.put("news", new NewsCrawlStrategy()); strategies.put("hunanweather", new HunanWeatherCrawlStrategy()); } public static StrategyFactory getInstance() { return instance; } public CrawlStrategy getStrategy(String name) throws CrawlerException { CrawlStrategy strategy = strategies.get(name.toLowerCase()); if (strategy == null) { throw new CrawlerException("Unknown strategy: " + name); } return strategy; } public List getSupportedSites() { return new ArrayList<>(strategies.keySet()); } } // ==================== Command Layer (命令层) ==================== class CrawlCommand implements Command { private final CrawlerController controller; private final String site; private final List targets; public CrawlCommand(CrawlerController controller, String site, List targets) { this.controller = controller; this.site = site; this.targets = targets; } @Override public void execute() throws CrawlerException { for (String target : targets) { switch (site.toLowerCase()) { case "douban" -> controller.crawlMovies(target); case "news" -> controller.crawlNews(target); case "hunanweather" -> controller.crawlWeather(target); default -> throw new CrawlerException("Unknown site: " + site); } } } @Override public String getCommandName() { return "crawl"; } } class DefaultCommand implements Command { private final CrawlerController controller; public DefaultCommand(CrawlerController controller) { this.controller = controller; } @Override public void execute() throws CrawlerException { controller.crawlAll(); } @Override public String getCommandName() { return "default"; } } class HelpCommand implements Command { private final CrawlerController controller; public HelpCommand(CrawlerController controller) { this.controller = controller; } @Override public void execute() { controller.showHelp(); } @Override public String getCommandName() { return "help"; } } class ListCommand implements Command { private final CrawlerController controller; public ListCommand(CrawlerController controller) { this.controller = controller; } @Override public void execute() { controller.listSites(); } @Override public String getCommandName() { return "list"; } } class SaveCommand implements Command { private final CrawlerController controller; private final String filename; public SaveCommand(CrawlerController controller, String filename) { this.controller = controller; this.filename = filename; } @Override public void execute() throws CrawlerException { controller.saveConfig(filename); } @Override public String getCommandName() { return "save"; } } // ==================== CLI Layer (命令行接口层) ==================== class CLI { private final CrawlerController controller; public CLI(CrawlerController controller) { this.controller = controller; } public Command parse(String[] args) throws CrawlerException { if (args.length == 0) return new DefaultCommand(controller); return switch (args[0].toLowerCase()) { case "help" -> new HelpCommand(controller); case "list" -> new ListCommand(controller); case "save" -> { if (args.length < 2) throw new CrawlerException("save需要文件名参数"); yield new SaveCommand(controller, args[1]); } case "crawl" -> { if (args.length < 3) throw new CrawlerException("crawl需要类型和目标参数"); List targets = Arrays.stream(args, 2, args.length).toList(); yield new CrawlCommand(controller, args[1], targets); } default -> throw new CrawlerException("未知命令: " + args[0]); }; } } // ==================== Main Entry (主入口) ==================== public class MultiSiteCrawler { public static void main(String[] args) { CrawlerView view = new ConsoleView(); CrawlerController controller = new CrawlerController(view); CLI cli = new CLI(controller); view.showHeader(); try { DataSaver dataSaver = new DataSaver("爬取结果报告.txt"); controller.setDataSaver(dataSaver); Command command = cli.parse(args); command.execute(); dataSaver.addFooter(); dataSaver.save(); } catch (CrawlerException e) { view.showError(e.getMessage()); e.printStackTrace(); } catch (IOException e) { view.showError("保存文件失败: " + e.getMessage()); } } }