Browse Source

上传文件至 '202506050310-黄若妍-期末实验报告'

main
Huangruoyan 3 weeks ago
parent
commit
3f7da73e99
  1. 562
      202506050310-黄若妍-期末实验报告/CrawlerProject.java
  2. BIN
      202506050310-黄若妍-期末实验报告/项目报告v1(1).docx

562
202506050310-黄若妍-期末实验报告/CrawlerProject.java

@ -0,0 +1,562 @@
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Scanner;
import java.util.logging.Logger;
interface Crawler {
List<? extends CrawlerItem> crawl() throws CrawlerException;
}
interface CrawlerItem {
int getRank();
String getName();
String getUrl();
String getDescription();
String getSource();
}
interface Command {
void execute() throws CrawlerException;
String getCommandName();
}
class CrawlerException extends Exception {
public CrawlerException(String message) {
super(message);
}
public CrawlerException(String message, Throwable cause) {
super(message, cause);
}
}
class NetworkException extends CrawlerException {
public NetworkException(String message) {
super(message);
}
public NetworkException(String message, Throwable cause) {
super(message, cause);
}
}
class ParseException extends CrawlerException {
public ParseException(String message) {
super(message);
}
public ParseException(String message, Throwable cause) {
super(message, cause);
}
}
class GitHubItem implements CrawlerItem {
private int rank;
private String name;
private String description;
private String url;
private String language;
private String stars;
private String todayStars;
@Override
public int getRank() { return rank; }
public void setRank(int rank) { this.rank = rank; }
@Override
public String getName() { return name; }
public void setName(String name) { this.name = name; }
@Override
public String getDescription() { return description; }
public void setDescription(String description) { this.description = description; }
@Override
public String getUrl() { return url; }
public void setUrl(String url) { this.url = url; }
public String getLanguage() { return language; }
public void setLanguage(String language) { this.language = language; }
public String getStars() { return stars; }
public void setStars(String stars) { this.stars = stars; }
public String getTodayStars() { return todayStars; }
public void setTodayStars(String todayStars) { this.todayStars = todayStars; }
@Override
public String getSource() { return "GitHub Trending"; }
@Override
public String toString() {
return String.format("%d. %s [%s] - %s星", rank, name, language, stars);
}
}
class WeiboItem implements CrawlerItem {
private int rank;
private String name;
private String description;
private String url;
private String hot;
@Override
public int getRank() { return rank; }
public void setRank(int rank) { this.rank = rank; }
@Override
public String getName() { return name; }
public void setName(String name) { this.name = name; }
@Override
public String getDescription() { return description; }
public void setDescription(String description) { this.description = description; }
@Override
public String getUrl() { return url; }
public void setUrl(String url) { this.url = url; }
public String getHot() { return hot; }
public void setHot(String hot) { this.hot = hot; }
@Override
public String getSource() { return "微博热搜"; }
@Override
public String toString() {
return String.format("%d. %s - %s", rank, name, hot);
}
}
class DoubanItem implements CrawlerItem {
private int rank;
private String name;
private String description;
private String url;
private String rating;
private String director;
@Override
public int getRank() { return rank; }
public void setRank(int rank) { this.rank = rank; }
@Override
public String getName() { return name; }
public void setName(String name) { this.name = name; }
@Override
public String getDescription() { return description; }
public void setDescription(String description) { this.description = description; }
@Override
public String getUrl() { return url; }
public void setUrl(String url) { this.url = url; }
public String getRating() { return rating; }
public void setRating(String rating) { this.rating = rating; }
public String getDirector() { return director; }
public void setDirector(String director) { this.director = director; }
@Override
public String getSource() { return "豆瓣电影Top250"; }
@Override
public String toString() {
return String.format("%d. %s - %s分", rank, name, rating);
}
}
class GitHubStrategy implements Crawler {
@Override
public List<CrawlerItem> crawl() throws CrawlerException {
try {
return parseMockData();
} catch (Exception e) {
throw new ParseException("解析GitHub数据失败", e);
}
}
private List<CrawlerItem> parseMockData() {
List<CrawlerItem> items = new ArrayList<>();
String[][] data = {
{"freeCodeCamp/freeCodeCamp", "开源代码库和课程", "JavaScript", "358000", "120"},
{"microsoft/vscode", "Visual Studio Code", "TypeScript", "158000", "89"},
{"facebook/react", "React框架", "JavaScript", "205000", "76"},
{"tensorflow/tensorflow", "机器学习框架", "Python", "180000", "65"},
{"torvalds/linux", "Linux内核", "C", "160000", "45"},
{"kubernetes/kubernetes", "容器编排", "Go", "100000", "38"},
{"spring-projects/spring-boot", "Spring Boot", "Java", "60000", "32"},
{"vuejs/core", "Vue.js框架", "TypeScript", "45000", "58"},
{"rust-lang/rust", "Rust语言", "Rust", "85000", "42"},
{"numpy/numpy", "数值计算库", "Python", "25000", "28"}
};
for (int i = 0; i < data.length; i++) {
GitHubItem item = new GitHubItem();
item.setRank(i + 1);
item.setName(data[i][0]);
item.setDescription(data[i][1]);
item.setLanguage(data[i][2]);
item.setStars(data[i][3]);
item.setTodayStars(data[i][4]);
item.setUrl("https://github.com/" + data[i][0]);
items.add(item);
}
return items;
}
public String getSourceName() {
return "GitHub Trending";
}
}
class WeiboStrategy implements Crawler {
@Override
public List<CrawlerItem> crawl() throws CrawlerException {
try {
return parseMockData();
} catch (Exception e) {
throw new ParseException("解析微博数据失败", e);
}
}
private List<CrawlerItem> parseMockData() {
List<CrawlerItem> items = new ArrayList<>();
String[][] data = {
{"北京气温创历史新高", "北京今日最高气温达到40.2度,创历史同期新高", "288万"},
{"国乒世预赛大胜", "国乒在世界杯预选赛中以3:0战胜对手", "196万"},
{"新型人工智能模型发布", "某科技公司发布新一代AI大模型,性能提升300%", "156万"},
{"高考倒计时100天", "距离2024年高考还有100天,考生们积极备考", "128万"},
{"春季旅游旺季来临", "随着气温回暖,各大景区迎来旅游高峰", "98万"},
{"新能源汽车销量创新高", "一季度新能源汽车销量同比增长50%", "86万"},
{"5G商用全面铺开", "全国5G基站总数突破400万", "72万"},
{"数字人民币试点扩大", "数字人民币应用场景进一步扩展", "65万"},
{"航天发射任务成功", "我国成功发射新一代通信卫星", "58万"},
{"高校毕业生就业形势", "今年高校毕业生规模预计达1179万人", "45万"}
};
for (int i = 0; i < data.length; i++) {
WeiboItem item = new WeiboItem();
item.setRank(i + 1);
item.setName(data[i][0]);
item.setDescription(data[i][1]);
item.setHot(data[i][2]);
item.setUrl("https://s.weibo.com/weibo?q=" + data[i][0]);
items.add(item);
}
return items;
}
public String getSourceName() {
return "微博热搜";
}
}
class DoubanStrategy implements Crawler {
@Override
public List<CrawlerItem> crawl() throws CrawlerException {
try {
return parseMockData();
} catch (Exception e) {
throw new ParseException("解析豆瓣数据失败", e);
}
}
private List<CrawlerItem> parseMockData() {
List<CrawlerItem> items = new ArrayList<>();
String[][] data = {
{"肖申克的救赎", "希望让人自由", "9.7", "弗兰克·德拉邦特"},
{"霸王别姬", "风华绝代", "9.6", "陈凯歌"},
{"阿甘正传", "生命就像一盒巧克力", "9.5", "罗伯特·泽米吉斯"},
{"泰坦尼克号", "You jump, I jump", "9.4", "詹姆斯·卡梅隆"},
{"盗梦空间", "现实与梦境的交织", "9.3", "克里斯托弗·诺兰"},
{"星际穿越", "爱是唯一能超越时间和空间的事物", "9.4", "克里斯托弗·诺兰"},
{"千与千寻", "不要忘记自己的名字", "9.4", "宫崎骏"},
{"辛德勒的名单", "拯救一个人就是拯救全世界", "9.5", "史蒂文·斯皮尔伯格"},
{"疯狂动物城", "勇敢尝试,一切皆有可能", "9.2", "拜恩·霍华德"},
{"哪吒之魔童降世", "我命由我不由天", "8.4", "饺子"}
};
for (int i = 0; i < data.length; i++) {
DoubanItem item = new DoubanItem();
item.setRank(i + 1);
item.setName(data[i][0]);
item.setDescription(data[i][1]);
item.setRating(data[i][2]);
item.setDirector(data[i][3]);
item.setUrl("https://movie.douban.com/subject/search?search_text=" + data[i][0]);
items.add(item);
}
return items;
}
public String getSourceName() {
return "豆瓣电影Top250";
}
}
class CrawlCommand implements Command {
private static final Logger logger = Logger.getLogger(CrawlCommand.class.getName());
private Crawler strategy;
private List<? extends CrawlerItem> result;
public CrawlCommand(Crawler strategy) {
this.strategy = strategy;
}
@Override
public void execute() throws CrawlerException {
logger.info("开始爬取...");
result = strategy.crawl();
logger.info("爬取完成,共获取 " + result.size() + " 条数据");
}
@Override
public String getCommandName() {
return "crawl";
}
public List<? extends CrawlerItem> getResult() {
return result;
}
}
class SaveCommand implements Command {
private static final Logger logger = Logger.getLogger(SaveCommand.class.getName());
private List<? extends CrawlerItem> items;
private String filename;
public SaveCommand(List<? extends CrawlerItem> items, String filename) {
this.items = items;
this.filename = filename;
}
@Override
public void execute() throws CrawlerException {
if (items == null || items.isEmpty()) {
throw new CrawlerException("没有可保存的数据");
}
try (FileWriter writer = new FileWriter(filename)) {
String header = generateHeader(items.get(0));
writer.write(header + "\n");
for (CrawlerItem item : items) {
String line = generateLine(item);
writer.write(line + "\n");
}
logger.info("数据已保存到: " + filename);
System.out.println("数据已保存到: " + filename);
} catch (IOException e) {
throw new CrawlerException("保存文件失败: " + e.getMessage(), e);
}
}
private String generateHeader(CrawlerItem item) {
if (item instanceof GitHubItem) {
return "排名,名称,链接,描述,语言,星数,今日星数,来源";
} else if (item instanceof WeiboItem) {
return "排名,名称,链接,描述,热度,来源";
} else if (item instanceof DoubanItem) {
return "排名,名称,链接,描述,评分,导演,来源";
}
return "排名,名称,链接,描述,来源";
}
private String generateLine(CrawlerItem item) {
StringBuilder sb = new StringBuilder();
sb.append(item.getRank()).append(",");
sb.append(escapeCsv(item.getName())).append(",");
sb.append(escapeCsv(item.getUrl())).append(",");
sb.append(escapeCsv(item.getDescription())).append(",");
if (item instanceof GitHubItem) {
GitHubItem gitHub = (GitHubItem) item;
sb.append(escapeCsv(gitHub.getLanguage())).append(",");
sb.append(gitHub.getStars()).append(",");
sb.append(gitHub.getTodayStars()).append(",");
} else if (item instanceof WeiboItem) {
WeiboItem weibo = (WeiboItem) item;
sb.append(weibo.getHot()).append(",");
} else if (item instanceof DoubanItem) {
DoubanItem douban = (DoubanItem) item;
sb.append(douban.getRating()).append(",");
sb.append(escapeCsv(douban.getDirector())).append(",");
}
sb.append(escapeCsv(item.getSource()));
return sb.toString();
}
private String escapeCsv(String value) {
if (value == null) return "";
if (value.contains(",") || value.contains("\"") || value.contains("\n")) {
return "\"" + value.replace("\"", "\"\"") + "\"";
}
return value;
}
@Override
public String getCommandName() {
return "save";
}
}
class DisplayCommand implements Command {
private static final Logger logger = Logger.getLogger(DisplayCommand.class.getName());
private List<? extends CrawlerItem> items;
public DisplayCommand(List<? extends CrawlerItem> items) {
this.items = items;
}
@Override
public void execute() throws CrawlerException {
if (items == null || items.isEmpty()) {
throw new CrawlerException("没有可显示的数据");
}
System.out.println("\n=== 爬取结果 ===");
for (CrawlerItem item : items) {
System.out.println("\n" + item.getRank() + ". " + item.getName());
System.out.println("链接: " + item.getUrl());
System.out.println("描述: " + item.getDescription());
System.out.println("来源: " + item.getSource());
if (item instanceof GitHubItem) {
GitHubItem gitHub = (GitHubItem) item;
System.out.println("语言: " + gitHub.getLanguage());
System.out.println("星数: " + gitHub.getStars());
System.out.println("今日星数: " + gitHub.getTodayStars());
} else if (item instanceof WeiboItem) {
WeiboItem weibo = (WeiboItem) item;
System.out.println("热度: " + weibo.getHot());
} else if (item instanceof DoubanItem) {
DoubanItem douban = (DoubanItem) item;
System.out.println("评分: " + douban.getRating());
System.out.println("导演: " + douban.getDirector());
}
}
logger.info("已显示 " + items.size() + " 条数据");
}
@Override
public String getCommandName() {
return "display";
}
}
class CrawlerController {
private static final Logger logger = Logger.getLogger(CrawlerController.class.getName());
public void executeCrawl(String source) throws CrawlerException {
Crawler strategy = createStrategy(source);
executeWithStrategy(strategy, source);
}
public void executeCrawlAll() throws CrawlerException {
String[] sources = {"GitHub Trending", "微博热搜", "豆瓣电影Top250"};
Crawler[] crawlers = {new GitHubStrategy(), new WeiboStrategy(), new DoubanStrategy()};
for (int i = 0; i < crawlers.length; i++) {
try {
executeWithStrategy(crawlers[i], sources[i]);
} catch (CrawlerException e) {
logger.severe("爬取 " + sources[i] + " 失败: " + e.getMessage());
}
}
}
private Crawler createStrategy(String source) throws CrawlerException {
switch (source.toLowerCase()) {
case "github":
return new GitHubStrategy();
case "weibo":
return new WeiboStrategy();
case "douban":
return new DoubanStrategy();
default:
throw new CrawlerException("不支持的数据源: " + source);
}
}
private void executeWithStrategy(Crawler strategy, String sourceName) throws CrawlerException {
CrawlCommand crawlCommand = new CrawlCommand(strategy);
crawlCommand.execute();
List<? extends CrawlerItem> result = crawlCommand.getResult();
DisplayCommand displayCommand = new DisplayCommand(result);
displayCommand.execute();
String filename = sourceName.replace(" ", "_") + "_" + System.currentTimeMillis() + ".csv";
SaveCommand saveCommand = new SaveCommand(result, filename);
saveCommand.execute();
}
}
class CLI {
private static final Logger logger = Logger.getLogger(CLI.class.getName());
private CrawlerController controller;
public CLI() {
this.controller = new CrawlerController();
}
public void start() {
printWelcome();
Scanner scanner = new Scanner(System.in);
while (true) {
printMenu();
System.out.print("请输入选择: ");
String input = scanner.nextLine().trim();
try {
handleInput(input);
} catch (CrawlerException e) {
System.err.println("错误: " + e.getMessage());
logger.severe("执行失败: " + e.getMessage());
}
if (input.equalsIgnoreCase("exit")) {
break;
}
}
scanner.close();
System.out.println("\n感谢使用爬虫工具,再见!");
}
private void printWelcome() {
System.out.println("=========================================");
System.out.println(" 多网站爬虫工具 v1.0");
System.out.println("=========================================");
System.out.println("支持爬取: GitHub Trending / 微博热搜 / 豆瓣电影");
System.out.println("=========================================\n");
}
private void printMenu() {
System.out.println("\n请选择操作:");
System.out.println("1. 爬取 GitHub Trending");
System.out.println("2. 爬取 微博热搜");
System.out.println("3. 爬取 豆瓣电影Top250");
System.out.println("4. 爬取所有网站");
System.out.println("5. 帮助");
System.out.println("6. 退出");
}
private void handleInput(String input) throws CrawlerException {
switch (input) {
case "1":
case "github":
controller.executeCrawl("github");
break;
case "2":
case "weibo":
controller.executeCrawl("weibo");
break;
case "3":
case "douban":
controller.executeCrawl("douban");
break;
case "4":
case "all":
controller.executeCrawlAll();
break;
case "5":
case "help":
printHelp();
break;
case "6":
case "exit":
break;
default:
System.out.println("无效输入,请输入数字 1-6 或命令名称");
}
}
private void printHelp() {
System.out.println("\n=== 帮助信息 ===");
System.out.println("命令列表:");
System.out.println(" 1 / github - 爬取 GitHub Trending");
System.out.println(" 2 / weibo - 爬取 微博热搜");
System.out.println(" 3 / douban - 爬取 豆瓣电影Top250");
System.out.println(" 4 / all - 爬取所有网站");
System.out.println(" 5 / help - 显示帮助信息");
System.out.println(" 6 / exit - 退出程序");
System.out.println("\n输出说明:");
System.out.println(" - 控制台会显示爬取结果");
System.out.println(" - 数据会自动保存为 CSV 文件");
System.out.println(" - 文件名格式: [来源]_[时间戳].csv");
}
}
public class CrawlerProject {
public static void main(String[] args) {
CLI cli = new CLI();
cli.start();
}
}

BIN
202506050310-黄若妍-期末实验报告/项目报告v1(1).docx

Binary file not shown.
Loading…
Cancel
Save