import java.io.FileWriter; import java.io.IOException; import java.util.ArrayList; import java.util.List; import java.util.Scanner; import java.util.logging.Logger; interface Crawler { List crawl() throws CrawlerException; } interface CrawlerItem { int getRank(); String getName(); String getUrl(); String getDescription(); String getSource(); } interface Command { void execute() throws CrawlerException; String getCommandName(); } class CrawlerException extends Exception { public CrawlerException(String message) { super(message); } public CrawlerException(String message, Throwable cause) { super(message, cause); } } class NetworkException extends CrawlerException { public NetworkException(String message) { super(message); } public NetworkException(String message, Throwable cause) { super(message, cause); } } class ParseException extends CrawlerException { public ParseException(String message) { super(message); } public ParseException(String message, Throwable cause) { super(message, cause); } } class GitHubItem implements CrawlerItem { private int rank; private String name; private String description; private String url; private String language; private String stars; private String todayStars; @Override public int getRank() { return rank; } public void setRank(int rank) { this.rank = rank; } @Override public String getName() { return name; } public void setName(String name) { this.name = name; } @Override public String getDescription() { return description; } public void setDescription(String description) { this.description = description; } @Override public String getUrl() { return url; } public void setUrl(String url) { this.url = url; } public String getLanguage() { return language; } public void setLanguage(String language) { this.language = language; } public String getStars() { return stars; } public void setStars(String stars) { this.stars = stars; } public String getTodayStars() { return todayStars; } public void setTodayStars(String todayStars) { this.todayStars = todayStars; } @Override public String getSource() { return "GitHub Trending"; } @Override public String toString() { return String.format("%d. %s [%s] - %s星", rank, name, language, stars); } } class WeiboItem implements CrawlerItem { private int rank; private String name; private String description; private String url; private String hot; @Override public int getRank() { return rank; } public void setRank(int rank) { this.rank = rank; } @Override public String getName() { return name; } public void setName(String name) { this.name = name; } @Override public String getDescription() { return description; } public void setDescription(String description) { this.description = description; } @Override public String getUrl() { return url; } public void setUrl(String url) { this.url = url; } public String getHot() { return hot; } public void setHot(String hot) { this.hot = hot; } @Override public String getSource() { return "微博热搜"; } @Override public String toString() { return String.format("%d. %s - %s", rank, name, hot); } } class DoubanItem implements CrawlerItem { private int rank; private String name; private String description; private String url; private String rating; private String director; @Override public int getRank() { return rank; } public void setRank(int rank) { this.rank = rank; } @Override public String getName() { return name; } public void setName(String name) { this.name = name; } @Override public String getDescription() { return description; } public void setDescription(String description) { this.description = description; } @Override public String getUrl() { return url; } public void setUrl(String url) { this.url = url; } public String getRating() { return rating; } public void setRating(String rating) { this.rating = rating; } public String getDirector() { return director; } public void setDirector(String director) { this.director = director; } @Override public String getSource() { return "豆瓣电影Top250"; } @Override public String toString() { return String.format("%d. %s - %s分", rank, name, rating); } } class GitHubStrategy implements Crawler { @Override public List crawl() throws CrawlerException { try { return parseMockData(); } catch (Exception e) { throw new ParseException("解析GitHub数据失败", e); } } private List parseMockData() { List items = new ArrayList<>(); String[][] data = { {"freeCodeCamp/freeCodeCamp", "开源代码库和课程", "JavaScript", "358000", "120"}, {"microsoft/vscode", "Visual Studio Code", "TypeScript", "158000", "89"}, {"facebook/react", "React框架", "JavaScript", "205000", "76"}, {"tensorflow/tensorflow", "机器学习框架", "Python", "180000", "65"}, {"torvalds/linux", "Linux内核", "C", "160000", "45"}, {"kubernetes/kubernetes", "容器编排", "Go", "100000", "38"}, {"spring-projects/spring-boot", "Spring Boot", "Java", "60000", "32"}, {"vuejs/core", "Vue.js框架", "TypeScript", "45000", "58"}, {"rust-lang/rust", "Rust语言", "Rust", "85000", "42"}, {"numpy/numpy", "数值计算库", "Python", "25000", "28"} }; for (int i = 0; i < data.length; i++) { GitHubItem item = new GitHubItem(); item.setRank(i + 1); item.setName(data[i][0]); item.setDescription(data[i][1]); item.setLanguage(data[i][2]); item.setStars(data[i][3]); item.setTodayStars(data[i][4]); item.setUrl("https://github.com/" + data[i][0]); items.add(item); } return items; } public String getSourceName() { return "GitHub Trending"; } } class WeiboStrategy implements Crawler { @Override public List crawl() throws CrawlerException { try { return parseMockData(); } catch (Exception e) { throw new ParseException("解析微博数据失败", e); } } private List parseMockData() { List items = new ArrayList<>(); String[][] data = { {"北京气温创历史新高", "北京今日最高气温达到40.2度,创历史同期新高", "288万"}, {"国乒世预赛大胜", "国乒在世界杯预选赛中以3:0战胜对手", "196万"}, {"新型人工智能模型发布", "某科技公司发布新一代AI大模型,性能提升300%", "156万"}, {"高考倒计时100天", "距离2024年高考还有100天,考生们积极备考", "128万"}, {"春季旅游旺季来临", "随着气温回暖,各大景区迎来旅游高峰", "98万"}, {"新能源汽车销量创新高", "一季度新能源汽车销量同比增长50%", "86万"}, {"5G商用全面铺开", "全国5G基站总数突破400万", "72万"}, {"数字人民币试点扩大", "数字人民币应用场景进一步扩展", "65万"}, {"航天发射任务成功", "我国成功发射新一代通信卫星", "58万"}, {"高校毕业生就业形势", "今年高校毕业生规模预计达1179万人", "45万"} }; for (int i = 0; i < data.length; i++) { WeiboItem item = new WeiboItem(); item.setRank(i + 1); item.setName(data[i][0]); item.setDescription(data[i][1]); item.setHot(data[i][2]); item.setUrl("https://s.weibo.com/weibo?q=" + data[i][0]); items.add(item); } return items; } public String getSourceName() { return "微博热搜"; } } class DoubanStrategy implements Crawler { @Override public List crawl() throws CrawlerException { try { return parseMockData(); } catch (Exception e) { throw new ParseException("解析豆瓣数据失败", e); } } private List parseMockData() { List items = new ArrayList<>(); String[][] data = { {"肖申克的救赎", "希望让人自由", "9.7", "弗兰克·德拉邦特"}, {"霸王别姬", "风华绝代", "9.6", "陈凯歌"}, {"阿甘正传", "生命就像一盒巧克力", "9.5", "罗伯特·泽米吉斯"}, {"泰坦尼克号", "You jump, I jump", "9.4", "詹姆斯·卡梅隆"}, {"盗梦空间", "现实与梦境的交织", "9.3", "克里斯托弗·诺兰"}, {"星际穿越", "爱是唯一能超越时间和空间的事物", "9.4", "克里斯托弗·诺兰"}, {"千与千寻", "不要忘记自己的名字", "9.4", "宫崎骏"}, {"辛德勒的名单", "拯救一个人就是拯救全世界", "9.5", "史蒂文·斯皮尔伯格"}, {"疯狂动物城", "勇敢尝试,一切皆有可能", "9.2", "拜恩·霍华德"}, {"哪吒之魔童降世", "我命由我不由天", "8.4", "饺子"} }; for (int i = 0; i < data.length; i++) { DoubanItem item = new DoubanItem(); item.setRank(i + 1); item.setName(data[i][0]); item.setDescription(data[i][1]); item.setRating(data[i][2]); item.setDirector(data[i][3]); item.setUrl("https://movie.douban.com/subject/search?search_text=" + data[i][0]); items.add(item); } return items; } public String getSourceName() { return "豆瓣电影Top250"; } } class CrawlCommand implements Command { private static final Logger logger = Logger.getLogger(CrawlCommand.class.getName()); private Crawler strategy; private List result; public CrawlCommand(Crawler strategy) { this.strategy = strategy; } @Override public void execute() throws CrawlerException { logger.info("开始爬取..."); result = strategy.crawl(); logger.info("爬取完成,共获取 " + result.size() + " 条数据"); } @Override public String getCommandName() { return "crawl"; } public List getResult() { return result; } } class SaveCommand implements Command { private static final Logger logger = Logger.getLogger(SaveCommand.class.getName()); private List items; private String filename; public SaveCommand(List items, String filename) { this.items = items; this.filename = filename; } @Override public void execute() throws CrawlerException { if (items == null || items.isEmpty()) { throw new CrawlerException("没有可保存的数据"); } try (FileWriter writer = new FileWriter(filename)) { String header = generateHeader(items.get(0)); writer.write(header + "\n"); for (CrawlerItem item : items) { String line = generateLine(item); writer.write(line + "\n"); } logger.info("数据已保存到: " + filename); System.out.println("数据已保存到: " + filename); } catch (IOException e) { throw new CrawlerException("保存文件失败: " + e.getMessage(), e); } } private String generateHeader(CrawlerItem item) { if (item instanceof GitHubItem) { return "排名,名称,链接,描述,语言,星数,今日星数,来源"; } else if (item instanceof WeiboItem) { return "排名,名称,链接,描述,热度,来源"; } else if (item instanceof DoubanItem) { return "排名,名称,链接,描述,评分,导演,来源"; } return "排名,名称,链接,描述,来源"; } private String generateLine(CrawlerItem item) { StringBuilder sb = new StringBuilder(); sb.append(item.getRank()).append(","); sb.append(escapeCsv(item.getName())).append(","); sb.append(escapeCsv(item.getUrl())).append(","); sb.append(escapeCsv(item.getDescription())).append(","); if (item instanceof GitHubItem) { GitHubItem gitHub = (GitHubItem) item; sb.append(escapeCsv(gitHub.getLanguage())).append(","); sb.append(gitHub.getStars()).append(","); sb.append(gitHub.getTodayStars()).append(","); } else if (item instanceof WeiboItem) { WeiboItem weibo = (WeiboItem) item; sb.append(weibo.getHot()).append(","); } else if (item instanceof DoubanItem) { DoubanItem douban = (DoubanItem) item; sb.append(douban.getRating()).append(","); sb.append(escapeCsv(douban.getDirector())).append(","); } sb.append(escapeCsv(item.getSource())); return sb.toString(); } private String escapeCsv(String value) { if (value == null) return ""; if (value.contains(",") || value.contains("\"") || value.contains("\n")) { return "\"" + value.replace("\"", "\"\"") + "\""; } return value; } @Override public String getCommandName() { return "save"; } } class DisplayCommand implements Command { private static final Logger logger = Logger.getLogger(DisplayCommand.class.getName()); private List items; public DisplayCommand(List items) { this.items = items; } @Override public void execute() throws CrawlerException { if (items == null || items.isEmpty()) { throw new CrawlerException("没有可显示的数据"); } System.out.println("\n=== 爬取结果 ==="); for (CrawlerItem item : items) { System.out.println("\n" + item.getRank() + ". " + item.getName()); System.out.println("链接: " + item.getUrl()); System.out.println("描述: " + item.getDescription()); System.out.println("来源: " + item.getSource()); if (item instanceof GitHubItem) { GitHubItem gitHub = (GitHubItem) item; System.out.println("语言: " + gitHub.getLanguage()); System.out.println("星数: " + gitHub.getStars()); System.out.println("今日星数: " + gitHub.getTodayStars()); } else if (item instanceof WeiboItem) { WeiboItem weibo = (WeiboItem) item; System.out.println("热度: " + weibo.getHot()); } else if (item instanceof DoubanItem) { DoubanItem douban = (DoubanItem) item; System.out.println("评分: " + douban.getRating()); System.out.println("导演: " + douban.getDirector()); } } logger.info("已显示 " + items.size() + " 条数据"); } @Override public String getCommandName() { return "display"; } } class CrawlerController { private static final Logger logger = Logger.getLogger(CrawlerController.class.getName()); public void executeCrawl(String source) throws CrawlerException { Crawler strategy = createStrategy(source); executeWithStrategy(strategy, source); } public void executeCrawlAll() throws CrawlerException { String[] sources = {"GitHub Trending", "微博热搜", "豆瓣电影Top250"}; Crawler[] crawlers = {new GitHubStrategy(), new WeiboStrategy(), new DoubanStrategy()}; for (int i = 0; i < crawlers.length; i++) { try { executeWithStrategy(crawlers[i], sources[i]); } catch (CrawlerException e) { logger.severe("爬取 " + sources[i] + " 失败: " + e.getMessage()); } } } private Crawler createStrategy(String source) throws CrawlerException { switch (source.toLowerCase()) { case "github": return new GitHubStrategy(); case "weibo": return new WeiboStrategy(); case "douban": return new DoubanStrategy(); default: throw new CrawlerException("不支持的数据源: " + source); } } private void executeWithStrategy(Crawler strategy, String sourceName) throws CrawlerException { CrawlCommand crawlCommand = new CrawlCommand(strategy); crawlCommand.execute(); List result = crawlCommand.getResult(); DisplayCommand displayCommand = new DisplayCommand(result); displayCommand.execute(); String filename = sourceName.replace(" ", "_") + "_" + System.currentTimeMillis() + ".csv"; SaveCommand saveCommand = new SaveCommand(result, filename); saveCommand.execute(); } } class CLI { private static final Logger logger = Logger.getLogger(CLI.class.getName()); private CrawlerController controller; public CLI() { this.controller = new CrawlerController(); } public void start() { printWelcome(); Scanner scanner = new Scanner(System.in); while (true) { printMenu(); System.out.print("请输入选择: "); String input = scanner.nextLine().trim(); try { handleInput(input); } catch (CrawlerException e) { System.err.println("错误: " + e.getMessage()); logger.severe("执行失败: " + e.getMessage()); } if (input.equalsIgnoreCase("exit")) { break; } } scanner.close(); System.out.println("\n感谢使用爬虫工具,再见!"); } private void printWelcome() { System.out.println("========================================="); System.out.println(" 多网站爬虫工具 v1.0"); System.out.println("========================================="); System.out.println("支持爬取: GitHub Trending / 微博热搜 / 豆瓣电影"); System.out.println("=========================================\n"); } private void printMenu() { System.out.println("\n请选择操作:"); System.out.println("1. 爬取 GitHub Trending"); System.out.println("2. 爬取 微博热搜"); System.out.println("3. 爬取 豆瓣电影Top250"); System.out.println("4. 爬取所有网站"); System.out.println("5. 帮助"); System.out.println("6. 退出"); } private void handleInput(String input) throws CrawlerException { switch (input) { case "1": case "github": controller.executeCrawl("github"); break; case "2": case "weibo": controller.executeCrawl("weibo"); break; case "3": case "douban": controller.executeCrawl("douban"); break; case "4": case "all": controller.executeCrawlAll(); break; case "5": case "help": printHelp(); break; case "6": case "exit": break; default: System.out.println("无效输入,请输入数字 1-6 或命令名称"); } } private void printHelp() { System.out.println("\n=== 帮助信息 ==="); System.out.println("命令列表:"); System.out.println(" 1 / github - 爬取 GitHub Trending"); System.out.println(" 2 / weibo - 爬取 微博热搜"); System.out.println(" 3 / douban - 爬取 豆瓣电影Top250"); System.out.println(" 4 / all - 爬取所有网站"); System.out.println(" 5 / help - 显示帮助信息"); System.out.println(" 6 / exit - 退出程序"); System.out.println("\n输出说明:"); System.out.println(" - 控制台会显示爬取结果"); System.out.println(" - 数据会自动保存为 CSV 文件"); System.out.println(" - 文件名格式: [来源]_[时间戳].csv"); } } public class CrawlerProject { public static void main(String[] args) { CLI cli = new CLI(); cli.start(); } }