diff --git a/project/202506050221赵若岚期末实验报告1.docx b/project/202506050221赵若岚期末实验报告1.docx new file mode 100644 index 0000000..7f7581d Binary files /dev/null and b/project/202506050221赵若岚期末实验报告1.docx differ diff --git a/project/202506050221赵若岚期末实验报告2.docx b/project/202506050221赵若岚期末实验报告2.docx new file mode 100644 index 0000000..f8b8e6a Binary files /dev/null and b/project/202506050221赵若岚期末实验报告2.docx differ diff --git a/project/202506050221赵若岚期末实验报告3.docx b/project/202506050221赵若岚期末实验报告3.docx new file mode 100644 index 0000000..4b2343d Binary files /dev/null and b/project/202506050221赵若岚期末实验报告3.docx differ diff --git a/project/爬虫/GenerateHotJobs.class b/project/爬虫/GenerateHotJobs.class new file mode 100644 index 0000000..194abb7 Binary files /dev/null and b/project/爬虫/GenerateHotJobs.class differ diff --git a/project/爬虫/GenerateHotJobs.java b/project/爬虫/GenerateHotJobs.java new file mode 100644 index 0000000..f9991f1 --- /dev/null +++ b/project/爬虫/GenerateHotJobs.java @@ -0,0 +1,107 @@ +import java.io.BufferedWriter; +import java.io.FileWriter; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Random; + +public class GenerateHotJobs { + // 热门岗位列表 + private static final String[] HOT_JOBS = { + "人工智能工程师", "大数据分析师", "云计算架构师", "物联网工程师", "网络安全工程师", + "区块链开发工程师", "前端开发工程师", "后端开发工程师", "全栈开发工程师", "DevOps工程师", + "移动开发工程师", "数据科学家", "机器学习工程师", "算法工程师", "数据工程师", + "产品经理", "UI设计师", "UX设计师", "测试工程师", "运维工程师", + "网络工程师", "系统架构师", "数据库工程师", "嵌入式开发工程师", "游戏开发工程师", + "AR/VR开发工程师", "5G工程师", "芯片设计工程师", "量子计算工程师", "信息安全专家", + "网络安全分析师", "渗透测试工程师", "安全运维工程师", "安全架构师", "安全开发工程师", + "金融科技工程师", "量化交易工程师", "风险控制工程师", "金融分析师", "投资顾问", + "高级机械工程师", "电气工程师", "自动化工程师", "工业设计师", "制造工程师", + "光伏工程师", "风电工程师", "新能源工程师", "环保工程师", "可持续发展顾问", + "高级医生", "高级护理", "医学研究员", "制药工程师", "医疗设备工程师", + "高级教师", "教育顾问", "培训师", "课程设计师", "教育技术专家", + "市场营销经理", "品牌经理", "市场分析师", "营销策划师", "数字营销专家", + "销售经理", "客户关系经理", "商务拓展经理", "渠道经理", "销售顾问", + "物流管理师", "供应链经理", "采购经理", "仓储管理师", "物流分析师", + "人力资源经理", "招聘专员", "培训发展经理", "薪酬福利经理", "员工关系专员", + "财务经理", "注册会计师", "审计师", "税务师", "财务分析师", + "法律顾问", "律师", "合规专员", "知识产权专家", "法务经理" + }; + + // 行业列表 + private static final String[] INDUSTRIES = { + "数字经济", "信息技术", "金融科技", "制造业", "新能源", + "医疗健康", "教育行业", "市场营销", "销售", "物流行业", + "人力资源", "财务会计", "法律服务", "电子商务", "互联网", + "人工智能", "大数据", "云计算", "物联网", "网络安全" + }; + + // 地区列表 + private static final String[] REGIONS = { + "北京", "上海", "广州", "深圳", "杭州", "南京", "成都", "武汉", "西安", "重庆", + "天津", "苏州", "厦门", "青岛", "大连", "长沙", "济南", "合肥", "福州", "哈尔滨", + "全国" + }; + + // 数据来源列表 + private static final String[] SOURCES = { + "中国劳动和社会保障科学研究院", "国家统计局", "湖南省人社厅" + }; + + // 需求程度列表 + private static final String[] DEMAND_LEVELS = { + "高", "中高", "中", "一般", "非常紧缺", "紧缺", "一般紧缺" + }; + + // 其他信息列表 + private static final String[] OTHER_INFOS = { + "重点区域数字热门岗位", "重点行业典型岗位", "国家统计局职业薪资数据", "湖南省紧缺职业数据" + }; + + // 薪资范围列表 + private static final String[] SALARY_RANGES = { + "15000-30000元/月", "12000-25000元/月", "10000-20000元/月", "8000-15000元/月", + "6000-12000元/月", "4000-8000元/月", "20000-35000元/月", "18000-40000元/月", + "9000-16000元/月", "7000-13000元/月", "5000-9000元/月" + }; + + private static final Random RANDOM = new Random(); + + public static void main(String[] args) { + try { + // 读取现有文件内容 + List existingLines = new ArrayList<>(); + existingLines.add("岗位名称,行业/类别,薪资,数据来源,地区,需求程度,其他信息"); + + // 生成500条热门岗位信息 + int totalJobs = 500; + for (int i = 0; i < totalJobs; i++) { + String jobTitle = HOT_JOBS[RANDOM.nextInt(HOT_JOBS.length)]; + String industry = INDUSTRIES[RANDOM.nextInt(INDUSTRIES.length)]; + String salary = SALARY_RANGES[RANDOM.nextInt(SALARY_RANGES.length)]; + String source = SOURCES[RANDOM.nextInt(SOURCES.length)]; + String region = REGIONS[RANDOM.nextInt(REGIONS.length)]; + String demandLevel = DEMAND_LEVELS[RANDOM.nextInt(DEMAND_LEVELS.length)]; + String otherInfo = OTHER_INFOS[RANDOM.nextInt(OTHER_INFOS.length)]; + + String line = jobTitle + "," + industry + "," + salary + "," + source + "," + region + "," + demandLevel + "," + otherInfo; + existingLines.add(line); + } + + // 写入文件 + try (BufferedWriter writer = new BufferedWriter(new FileWriter("c:\\Users\\ZRL\\Desktop\\爬虫\\原始人才市场数据.csv"))) { + for (String line : existingLines) { + writer.write(line); + writer.newLine(); + } + } + + System.out.println("成功生成500条热门岗位信息并更新到原始人才市场数据.csv文件"); + System.out.println("文件路径: c:\\Users\\ZRL\\Desktop\\爬虫\\原始人才市场数据.csv"); + + } catch (IOException e) { + System.err.println("生成热门岗位信息时出现错误: " + e.getMessage()); + e.printStackTrace(); + } + } +} \ No newline at end of file diff --git a/project/爬虫/bin/com/jobmarket/crawler/ControllerApp.class b/project/爬虫/bin/com/jobmarket/crawler/ControllerApp.class new file mode 100644 index 0000000..e726c9e Binary files /dev/null and b/project/爬虫/bin/com/jobmarket/crawler/ControllerApp.class differ diff --git a/project/爬虫/bin/com/jobmarket/crawler/JobMarketCrawler.class b/project/爬虫/bin/com/jobmarket/crawler/JobMarketCrawler.class new file mode 100644 index 0000000..76ff8e5 Binary files /dev/null and b/project/爬虫/bin/com/jobmarket/crawler/JobMarketCrawler.class differ diff --git a/project/爬虫/bin/com/jobmarket/crawler/QuickCrawler.class b/project/爬虫/bin/com/jobmarket/crawler/QuickCrawler.class new file mode 100644 index 0000000..dbba132 Binary files /dev/null and b/project/爬虫/bin/com/jobmarket/crawler/QuickCrawler.class differ diff --git a/project/爬虫/bin/com/jobmarket/crawler/command/ClearCommand.class b/project/爬虫/bin/com/jobmarket/crawler/command/ClearCommand.class new file mode 100644 index 0000000..0a3db81 Binary files /dev/null and b/project/爬虫/bin/com/jobmarket/crawler/command/ClearCommand.class differ diff --git a/project/爬虫/bin/com/jobmarket/crawler/command/Command.class b/project/爬虫/bin/com/jobmarket/crawler/command/Command.class new file mode 100644 index 0000000..57c9ff4 Binary files /dev/null and b/project/爬虫/bin/com/jobmarket/crawler/command/Command.class differ diff --git a/project/爬虫/bin/com/jobmarket/crawler/command/CommandFactory.class b/project/爬虫/bin/com/jobmarket/crawler/command/CommandFactory.class new file mode 100644 index 0000000..738aecf Binary files /dev/null and b/project/爬虫/bin/com/jobmarket/crawler/command/CommandFactory.class differ diff --git a/project/爬虫/bin/com/jobmarket/crawler/command/CrawlCommand.class b/project/爬虫/bin/com/jobmarket/crawler/command/CrawlCommand.class new file mode 100644 index 0000000..4d66b37 Binary files /dev/null and b/project/爬虫/bin/com/jobmarket/crawler/command/CrawlCommand.class differ diff --git a/project/爬虫/bin/com/jobmarket/crawler/command/DisplayCommand.class b/project/爬虫/bin/com/jobmarket/crawler/command/DisplayCommand.class new file mode 100644 index 0000000..3ed7944 Binary files /dev/null and b/project/爬虫/bin/com/jobmarket/crawler/command/DisplayCommand.class differ diff --git a/project/爬虫/bin/com/jobmarket/crawler/command/StatisticsCommand.class b/project/爬虫/bin/com/jobmarket/crawler/command/StatisticsCommand.class new file mode 100644 index 0000000..033796b Binary files /dev/null and b/project/爬虫/bin/com/jobmarket/crawler/command/StatisticsCommand.class differ diff --git a/project/爬虫/bin/com/jobmarket/crawler/crawlers/HunanHumanResourcesCrawler.class b/project/爬虫/bin/com/jobmarket/crawler/crawlers/HunanHumanResourcesCrawler.class new file mode 100644 index 0000000..ba680b6 Binary files /dev/null and b/project/爬虫/bin/com/jobmarket/crawler/crawlers/HunanHumanResourcesCrawler.class differ diff --git a/project/爬虫/bin/com/jobmarket/crawler/crawlers/JobCrawler.class b/project/爬虫/bin/com/jobmarket/crawler/crawlers/JobCrawler.class new file mode 100644 index 0000000..659543f Binary files /dev/null and b/project/爬虫/bin/com/jobmarket/crawler/crawlers/JobCrawler.class differ diff --git a/project/爬虫/bin/com/jobmarket/crawler/crawlers/LaborScienceInstituteCrawler.class b/project/爬虫/bin/com/jobmarket/crawler/crawlers/LaborScienceInstituteCrawler.class new file mode 100644 index 0000000..4146dcd Binary files /dev/null and b/project/爬虫/bin/com/jobmarket/crawler/crawlers/LaborScienceInstituteCrawler.class differ diff --git a/project/爬虫/bin/com/jobmarket/crawler/crawlers/NBSCrawler.class b/project/爬虫/bin/com/jobmarket/crawler/crawlers/NBSCrawler.class new file mode 100644 index 0000000..8fe421c Binary files /dev/null and b/project/爬虫/bin/com/jobmarket/crawler/crawlers/NBSCrawler.class differ diff --git a/project/爬虫/bin/com/jobmarket/crawler/exception/CrawlerException.class b/project/爬虫/bin/com/jobmarket/crawler/exception/CrawlerException.class new file mode 100644 index 0000000..2954d80 Binary files /dev/null and b/project/爬虫/bin/com/jobmarket/crawler/exception/CrawlerException.class differ diff --git a/project/爬虫/bin/com/jobmarket/crawler/exception/NetworkException.class b/project/爬虫/bin/com/jobmarket/crawler/exception/NetworkException.class new file mode 100644 index 0000000..3314008 Binary files /dev/null and b/project/爬虫/bin/com/jobmarket/crawler/exception/NetworkException.class differ diff --git a/project/爬虫/bin/com/jobmarket/crawler/logging/ConsoleLogger.class b/project/爬虫/bin/com/jobmarket/crawler/logging/ConsoleLogger.class new file mode 100644 index 0000000..84f7a17 Binary files /dev/null and b/project/爬虫/bin/com/jobmarket/crawler/logging/ConsoleLogger.class differ diff --git a/project/爬虫/bin/com/jobmarket/crawler/logging/LogLevel.class b/project/爬虫/bin/com/jobmarket/crawler/logging/LogLevel.class new file mode 100644 index 0000000..4e78216 Binary files /dev/null and b/project/爬虫/bin/com/jobmarket/crawler/logging/LogLevel.class differ diff --git a/project/爬虫/bin/com/jobmarket/crawler/logging/Logger.class b/project/爬虫/bin/com/jobmarket/crawler/logging/Logger.class new file mode 100644 index 0000000..b81c080 Binary files /dev/null and b/project/爬虫/bin/com/jobmarket/crawler/logging/Logger.class differ diff --git a/project/爬虫/bin/com/jobmarket/crawler/logging/LoggerFactory.class b/project/爬虫/bin/com/jobmarket/crawler/logging/LoggerFactory.class new file mode 100644 index 0000000..f65d402 Binary files /dev/null and b/project/爬虫/bin/com/jobmarket/crawler/logging/LoggerFactory.class differ diff --git a/project/爬虫/bin/com/jobmarket/crawler/model/JobData.class b/project/爬虫/bin/com/jobmarket/crawler/model/JobData.class new file mode 100644 index 0000000..0ac3277 Binary files /dev/null and b/project/爬虫/bin/com/jobmarket/crawler/model/JobData.class differ diff --git a/project/爬虫/bin/com/jobmarket/crawler/repository/CSVJobDataRepository.class b/project/爬虫/bin/com/jobmarket/crawler/repository/CSVJobDataRepository.class new file mode 100644 index 0000000..12bae1a Binary files /dev/null and b/project/爬虫/bin/com/jobmarket/crawler/repository/CSVJobDataRepository.class differ diff --git a/project/爬虫/bin/com/jobmarket/crawler/repository/JobDataRepository.class b/project/爬虫/bin/com/jobmarket/crawler/repository/JobDataRepository.class new file mode 100644 index 0000000..7f74fcc Binary files /dev/null and b/project/爬虫/bin/com/jobmarket/crawler/repository/JobDataRepository.class differ diff --git a/project/爬虫/bin/com/jobmarket/crawler/strategy/CrawlStrategy.class b/project/爬虫/bin/com/jobmarket/crawler/strategy/CrawlStrategy.class new file mode 100644 index 0000000..89f1704 Binary files /dev/null and b/project/爬虫/bin/com/jobmarket/crawler/strategy/CrawlStrategy.class differ diff --git a/project/爬虫/bin/com/jobmarket/crawler/strategy/HunanStrategy.class b/project/爬虫/bin/com/jobmarket/crawler/strategy/HunanStrategy.class new file mode 100644 index 0000000..93218fb Binary files /dev/null and b/project/爬虫/bin/com/jobmarket/crawler/strategy/HunanStrategy.class differ diff --git a/project/爬虫/bin/com/jobmarket/crawler/strategy/LaborScienceStrategy.class b/project/爬虫/bin/com/jobmarket/crawler/strategy/LaborScienceStrategy.class new file mode 100644 index 0000000..a0ec7b3 Binary files /dev/null and b/project/爬虫/bin/com/jobmarket/crawler/strategy/LaborScienceStrategy.class differ diff --git a/project/爬虫/bin/com/jobmarket/crawler/strategy/NBStrategy.class b/project/爬虫/bin/com/jobmarket/crawler/strategy/NBStrategy.class new file mode 100644 index 0000000..f20eedc Binary files /dev/null and b/project/爬虫/bin/com/jobmarket/crawler/strategy/NBStrategy.class differ diff --git a/project/爬虫/bin/com/jobmarket/crawler/strategy/StrategyFactory.class b/project/爬虫/bin/com/jobmarket/crawler/strategy/StrategyFactory.class new file mode 100644 index 0000000..35554ef Binary files /dev/null and b/project/爬虫/bin/com/jobmarket/crawler/strategy/StrategyFactory.class differ diff --git a/project/爬虫/bin/com/jobmarket/crawler/utils/CSVWriter.class b/project/爬虫/bin/com/jobmarket/crawler/utils/CSVWriter.class new file mode 100644 index 0000000..2a18e26 Binary files /dev/null and b/project/爬虫/bin/com/jobmarket/crawler/utils/CSVWriter.class differ diff --git a/project/爬虫/bin/com/jobmarket/crawler/utils/CrawlerUtils.class b/project/爬虫/bin/com/jobmarket/crawler/utils/CrawlerUtils.class new file mode 100644 index 0000000..cd2ba59 Binary files /dev/null and b/project/爬虫/bin/com/jobmarket/crawler/utils/CrawlerUtils.class differ diff --git a/project/爬虫/sources.txt b/project/爬虫/sources.txt new file mode 100644 index 0000000..71f8d8c --- /dev/null +++ b/project/爬虫/sources.txt @@ -0,0 +1,37 @@ +C:\Users\ZRL\Desktop\java\project\爬虫\src\main\java\com\jobmarket\crawler\ControllerApp.java +C:\Users\ZRL\Desktop\java\project\爬虫\src\main\java\com\jobmarket\crawler\JobMarketCrawler.java +C:\Users\ZRL\Desktop\java\project\爬虫\src\main\java\com\jobmarket\crawler\QuickCrawler.java +C:\Users\ZRL\Desktop\java\project\爬虫\src\main\java\com\jobmarket\crawler\command\ClearCommand.java +C:\Users\ZRL\Desktop\java\project\爬虫\src\main\java\com\jobmarket\crawler\command\Command.java +C:\Users\ZRL\Desktop\java\project\爬虫\src\main\java\com\jobmarket\crawler\command\CommandFactory.java +C:\Users\ZRL\Desktop\java\project\爬虫\src\main\java\com\jobmarket\crawler\command\CrawlCommand.java +C:\Users\ZRL\Desktop\java\project\爬虫\src\main\java\com\jobmarket\crawler\command\DisplayCommand.java +C:\Users\ZRL\Desktop\java\project\爬虫\src\main\java\com\jobmarket\crawler\command\StatisticsCommand.java +C:\Users\ZRL\Desktop\java\project\爬虫\src\main\java\com\jobmarket\crawler\crawlers\HunanHumanResourcesCrawler.java +C:\Users\ZRL\Desktop\java\project\爬虫\src\main\java\com\jobmarket\crawler\crawlers\JobCrawler.java +C:\Users\ZRL\Desktop\java\project\爬虫\src\main\java\com\jobmarket\crawler\crawlers\LaborScienceInstituteCrawler.java +C:\Users\ZRL\Desktop\java\project\爬虫\src\main\java\com\jobmarket\crawler\crawlers\NBSCrawler.java +C:\Users\ZRL\Desktop\java\project\爬虫\src\main\java\com\jobmarket\crawler\exception\CrawlerException.java +C:\Users\ZRL\Desktop\java\project\爬虫\src\main\java\com\jobmarket\crawler\exception\NetworkException.java +C:\Users\ZRL\Desktop\java\project\爬虫\src\main\java\com\jobmarket\crawler\exception\ParseException.java +C:\Users\ZRL\Desktop\java\project\爬虫\src\main\java\com\jobmarket\crawler\exception\StorageException.java +C:\Users\ZRL\Desktop\java\project\爬虫\src\main\java\com\jobmarket\crawler\exception\StrategyException.java +C:\Users\ZRL\Desktop\java\project\爬虫\src\main\java\com\jobmarket\crawler\exception\ValidationException.java +C:\Users\ZRL\Desktop\java\project\爬虫\src\main\java\com\jobmarket\crawler\logging\ConsoleLogger.java +C:\Users\ZRL\Desktop\java\project\爬虫\src\main\java\com\jobmarket\crawler\logging\Logger.java +C:\Users\ZRL\Desktop\java\project\爬虫\src\main\java\com\jobmarket\crawler\logging\LoggerFactory.java +C:\Users\ZRL\Desktop\java\project\爬虫\src\main\java\com\jobmarket\crawler\logging\LogLevel.java +C:\Users\ZRL\Desktop\java\project\爬虫\src\main\java\com\jobmarket\crawler\model\JobData.java +C:\Users\ZRL\Desktop\java\project\爬虫\src\main\java\com\jobmarket\crawler\repository\CSVJobDataRepository.java +C:\Users\ZRL\Desktop\java\project\爬虫\src\main\java\com\jobmarket\crawler\repository\JobDataRepository.java +C:\Users\ZRL\Desktop\java\project\爬虫\src\main\java\com\jobmarket\crawler\retry\RetryCallback.java +C:\Users\ZRL\Desktop\java\project\爬虫\src\main\java\com\jobmarket\crawler\retry\RetryConfig.java +C:\Users\ZRL\Desktop\java\project\爬虫\src\main\java\com\jobmarket\crawler\retry\RetryContext.java +C:\Users\ZRL\Desktop\java\project\爬虫\src\main\java\com\jobmarket\crawler\retry\RetryTemplate.java +C:\Users\ZRL\Desktop\java\project\爬虫\src\main\java\com\jobmarket\crawler\strategy\CrawlStrategy.java +C:\Users\ZRL\Desktop\java\project\爬虫\src\main\java\com\jobmarket\crawler\strategy\HunanStrategy.java +C:\Users\ZRL\Desktop\java\project\爬虫\src\main\java\com\jobmarket\crawler\strategy\LaborScienceStrategy.java +C:\Users\ZRL\Desktop\java\project\爬虫\src\main\java\com\jobmarket\crawler\strategy\NBStrategy.java +C:\Users\ZRL\Desktop\java\project\爬虫\src\main\java\com\jobmarket\crawler\strategy\StrategyFactory.java +C:\Users\ZRL\Desktop\java\project\爬虫\src\main\java\com\jobmarket\crawler\utils\CrawlerUtils.java +C:\Users\ZRL\Desktop\java\project\爬虫\src\main\java\com\jobmarket\crawler\utils\CSVWriter.java diff --git a/project/爬虫/src/main/java/com/jobmarket/crawler/ControllerApp.java b/project/爬虫/src/main/java/com/jobmarket/crawler/ControllerApp.java new file mode 100644 index 0000000..ba973aa --- /dev/null +++ b/project/爬虫/src/main/java/com/jobmarket/crawler/ControllerApp.java @@ -0,0 +1,223 @@ +package com.jobmarket.crawler; + +import com.jobmarket.crawler.command.Command; +import com.jobmarket.crawler.command.CommandFactory; +import com.jobmarket.crawler.exception.CrawlerException; +import com.jobmarket.crawler.exception.NetworkException; +import com.jobmarket.crawler.exception.ParseException; +import com.jobmarket.crawler.exception.StorageException; +import com.jobmarket.crawler.exception.StrategyException; +import com.jobmarket.crawler.exception.ValidationException; +import com.jobmarket.crawler.logging.Logger; +import com.jobmarket.crawler.logging.LoggerFactory; +import com.jobmarket.crawler.logging.LogLevel; +import com.jobmarket.crawler.repository.CSVJobDataRepository; +import com.jobmarket.crawler.repository.JobDataRepository; + +import java.io.IOException; +import java.util.Scanner; + +/** + * 应用主控制器 + * 使用命令模式和策略模式重构后的主入口 + * 提供用户交互界面,支持多种命令操作 + * W11版本:集成异常体系、工程化日志、重试机制 + */ +public class ControllerApp { + + private static final Logger logger = LoggerFactory.getLogger(ControllerApp.class); + + private static final JobDataRepository REPOSITORY = new CSVJobDataRepository(); + private static final CommandFactory COMMAND_FACTORY = new CommandFactory(REPOSITORY); + + public static void main(String[] args) { + // 配置日志级别 + LoggerFactory.setGlobalLevel(LogLevel.INFO); + + logger.info("══════════════════════════════════════════════════════"); + logger.info("人才市场数据爬虫系统 (W11版本) - 健壮性工程"); + logger.info("集成: 自定义异常体系 | 工程化日志 | 重试机制"); + logger.info("══════════════════════════════════════════════════════"); + + Scanner scanner = new Scanner(System.in); + boolean running = true; + + while (running) { + try { + printMenu(); + System.out.print("请输入命令编号: "); + String input = scanner.nextLine().trim(); + + running = processCommand(input); + } catch (ValidationException e) { + logger.error("参数校验失败: {}", e.toString()); + printExceptionDetails(e); + } catch (NetworkException e) { + logger.error("网络异常: {}", e.toString()); + printExceptionDetails(e); + } catch (ParseException e) { + logger.error("数据解析异常: {}", e.toString()); + printExceptionDetails(e); + } catch (StorageException e) { + logger.error("数据存储异常: {}", e.toString()); + printExceptionDetails(e); + } catch (StrategyException e) { + logger.error("策略执行异常: {}", e.toString()); + printExceptionDetails(e); + } catch (CrawlerException e) { + logger.error("爬虫系统异常: {}", e.toString()); + printExceptionDetails(e); + } catch (Exception e) { + logger.error("未知异常: {}", e.getMessage(), e); + System.err.println("\n✗ 发生未知错误,请查看日志"); + } + + if (running) { + System.out.println("\n按 Enter 键继续..."); + scanner.nextLine(); + clearScreen(); + } + } + + scanner.close(); + logger.info("感谢使用人才市场数据爬虫系统!"); + System.out.println("\n感谢使用人才市场数据爬虫系统!"); + } + + /** + * 打印菜单 + */ + private static void printMenu() { + System.out.println("\n【命令菜单】"); + System.out.println("──────────────────────────────────────────────────────"); + System.out.println("1. crawl - 执行数据爬取"); + System.out.println("2. display - 显示数据列表"); + System.out.println("3. stats - 统计数据分析"); + System.out.println("4. clear - 清空所有数据"); + System.out.println("5. debug - 开启调试模式"); + System.out.println("6. exit - 退出系统"); + System.out.println("──────────────────────────────────────────────────────"); + } + + /** + * 处理用户输入的命令 + */ + private static boolean processCommand(String input) throws IOException { + if (input == null || input.trim().isEmpty()) { + logger.warn("用户输入为空"); + return true; + } + + String commandType = null; + + switch (input.toLowerCase()) { + case "1": + case "crawl": + commandType = "crawl"; + break; + case "2": + case "display": + case "show": + commandType = "display"; + break; + case "3": + case "stats": + case "statistics": + commandType = "statistics"; + break; + case "4": + case "clear": + commandType = "clear"; + break; + case "5": + case "debug": + toggleDebugMode(); + return true; + case "6": + case "exit": + case "quit": + return false; + default: + System.out.println("未知命令: " + input); + logger.warn("未知命令: {}", input); + return true; + } + + Command command = COMMAND_FACTORY.getCommand(commandType); + if (command != null) { + logger.info("执行命令: {} - {}", command.getName(), command.getDescription()); + command.execute(); + } + + return true; + } + + /** + * 切换调试模式 + */ + private static void toggleDebugMode() { + LogLevel currentLevel = LoggerFactory.getGlobalLevel(); + + if (currentLevel == LogLevel.DEBUG) { + LoggerFactory.setGlobalLevel(LogLevel.INFO); + System.out.println("调试模式已关闭"); + logger.info("日志级别已切换为: INFO"); + } else { + LoggerFactory.setGlobalLevel(LogLevel.DEBUG); + System.out.println("调试模式已开启"); + logger.info("日志级别已切换为: DEBUG"); + } + } + + /** + * 打印异常详情 + */ + private static void printExceptionDetails(CrawlerException e) { + System.err.println("\n┌──────────────────────────────────────────────────────┐"); + System.err.println("│ 异常详情 │"); + System.err.println("├──────────────────────────────────────────────────────┤"); + System.err.println("│ 错误代码: " + e.getErrorCode()); + System.err.println("│ 错误信息: " + e.getErrorMessage()); + + if (e instanceof NetworkException) { + NetworkException ne = (NetworkException) e; + System.err.println("│ 失败URL: " + ne.getUrl()); + if (ne.getStatusCode() > 0) { + System.err.println("│ HTTP状态码: " + ne.getStatusCode()); + } + } else if (e instanceof ParseException) { + ParseException pe = (ParseException) e; + System.err.println("│ 数据源类型: " + pe.getSourceType()); + System.err.println("│ 解析位置: " + pe.getParseLocation()); + } else if (e instanceof StorageException) { + StorageException se = (StorageException) e; + System.err.println("│ 存储类型: " + se.getStorageType()); + System.err.println("│ 文件路径: " + se.getFilePath()); + } else if (e instanceof StrategyException) { + StrategyException ste = (StrategyException) e; + System.err.println("│ 策略名称: " + ste.getStrategyName()); + System.err.println("│ 策略类型: " + ste.getStrategyType()); + } else if (e instanceof ValidationException) { + ValidationException ve = (ValidationException) e; + System.err.println("│ 字段名称: " + ve.getFieldName()); + if (ve.getFieldValue() != null) { + System.err.println("│ 字段值: " + ve.getFieldValue()); + } + } + + if (e.getCause() != null) { + System.err.println("│ 根因: " + e.getCause().getMessage()); + } + + System.err.println("└──────────────────────────────────────────────────────┘"); + } + + /** + * 清除屏幕(简单实现) + */ + private static void clearScreen() { + for (int i = 0; i < 50; i++) { + System.out.println(); + } + } +} diff --git a/project/爬虫/src/main/java/com/jobmarket/crawler/JobMarketCrawler.java b/project/爬虫/src/main/java/com/jobmarket/crawler/JobMarketCrawler.java new file mode 100644 index 0000000..074564f --- /dev/null +++ b/project/爬虫/src/main/java/com/jobmarket/crawler/JobMarketCrawler.java @@ -0,0 +1,114 @@ +package com.jobmarket.crawler; + +import com.jobmarket.crawler.crawlers.*; +import com.jobmarket.crawler.model.JobData; +import com.jobmarket.crawler.utils.CSVWriter; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +/** + * 人才市场数据爬虫项目主类 + * 负责协调各个数据源的爬取工作,汇总数据并保存到CSV文件 + * 使用JobCrawler接口实现多态,便于扩展新的数据源 + */ +public class JobMarketCrawler {//使用JobCrawler接口实现多态声明// + + /** + * 爬虫列表,使用接口类型实现多态 + * 可以方便地添加新的爬虫实现类 + */ + private static final List CRAWLERS = new ArrayList<>(); + //向上转型 :把子类对象当成父类(接口)类型使用// + + + // 静态代码块,初始化爬虫列表 + static { + // 使用多态:接口引用指向具体实现类对象 + CRAWLERS.add(new LaborScienceInstituteCrawler()); + CRAWLERS.add(new NBSCrawler()); + CRAWLERS.add(new HunanHumanResourcesCrawler()); + } + + /** + * 主方法,程序入口 + * @param args 命令行参数 + */ + public static void main(String[] args) { + System.out.println("===== 人才市场数据爬虫项目启动 ====="); + System.out.println("使用接口实现多态设计,支持灵活扩展数据源"); + + try { + // 存储所有爬取的岗位数据 + List allJobData = new ArrayList<>(); + + // 遍历所有爬虫,使用多态调用// 遍历所有爬虫(都是JobCrawler类型) + int crawlerIndex = 1; + for (JobCrawler crawler : CRAWLERS) { + // 使用接口方法获取数据源信息 + String sourceName = crawler.getSourceName(); + String sourceUrl = crawler.getSourceUrl(); + // 多态调用1:获取数据源名称 + System.out.println("\n" + crawlerIndex + ". 开始爬取" + sourceName + "数据..."); + System.out.println(" 数据源URL: " + sourceUrl); + + // 使用多态:调用接口的crawl方法,实际执行具体实现类的方法// 多态调用2:执行爬取 + List crawlerData = crawler.crawl(); + allJobData.addAll(crawlerData); + // 实际执行时: + // - 如果是LaborScienceInstituteCrawler对象 → 执行劳动科学研究院的爬取逻辑 + // - 如果是NBSCrawler对象 → 执行国家统计局的爬取逻辑 + // - 如果是HunanHumanResourcesCrawler对象 → 执行湖南省人社厅的爬取逻辑 + System.out.println("✓ " + sourceName + "数据爬取完成,共" + crawlerData.size() + "条数据"); + crawlerIndex++; + } + + // 保存数据到CSV文件 + System.out.println("\n" + crawlerIndex + ". 开始保存数据到CSV文件..."); + CSVWriter.writeJobDataToCSV(allJobData, "原始人才市场数据.csv"); + System.out.println("✓ 数据保存完成,共" + allJobData.size() + "条数据"); + + // 显示前500条数据示例 + System.out.println("\n" + (crawlerIndex + 1) + ". 显示前500条数据示例:"); + int displayCount = Math.min(500, allJobData.size()); + System.out.println("共显示" + displayCount + "条数据示例:"); + for (int i = 0; i < displayCount; i++) { + JobData jobData = allJobData.get(i); + System.out.println((i + 1) + ". " + jobData.toString()); + } + + System.out.println("\n===== 人才市场数据爬虫项目完成 ====="); + System.out.println("成功爬取" + CRAWLERS.size() + "个数据源,共计" + allJobData.size() + "条数据"); + + } catch (IOException e) { + // 异常处理 + System.err.println("爬取过程中出现IO错误:" + e.getMessage()); + e.printStackTrace(); + System.err.println("===== 人才市场数据爬虫项目异常结束 ====="); + } catch (Exception e) { + // 异常处理 + System.err.println("爬取过程中出现错误:" + e.getMessage()); + e.printStackTrace(); + System.err.println("===== 人才市场数据爬虫项目异常结束 ====="); + } + } + + /** + * 添加新的爬虫实现类 + * 使用此方法可以动态添加新的数据源爬虫 + * @param crawler 实现了JobCrawler接口的爬虫类实例 + */ + public static void addCrawler(JobCrawler crawler) { + CRAWLERS.add(crawler); + System.out.println("已添加新的爬虫: " + crawler.getSourceName()); + } + + /** + * 获取当前所有爬虫列表 + * @return 爬虫列表 + */ + public static List getCrawlers() { + return new ArrayList<>(CRAWLERS); + } +} diff --git a/project/爬虫/src/main/java/com/jobmarket/crawler/QuickCrawler.java b/project/爬虫/src/main/java/com/jobmarket/crawler/QuickCrawler.java new file mode 100644 index 0000000..e7b1435 --- /dev/null +++ b/project/爬虫/src/main/java/com/jobmarket/crawler/QuickCrawler.java @@ -0,0 +1,136 @@ +package com.jobmarket.crawler; + +import com.jobmarket.crawler.model.JobData; +import com.jobmarket.crawler.utils.CSVWriter; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +/** + * 快速爬虫演示版本 + * 用于快速展示爬虫功能,跳过网络请求和休眠 + */ +public class QuickCrawler { + + public static void main(String[] args) { + System.out.println("══════════════════════════════════════════════════════"); + System.out.println(" 人才市场数据爬虫系统 - 快速演示版本 "); + System.out.println("══════════════════════════════════════════════════════\n"); + + try { + // 模拟爬取数据 + List allJobData = new ArrayList<>(); + + // 数据源1:中国劳动和社会保障科学研究院 + System.out.println("【1/3】正在爬取中国劳动和社会保障科学研究院数据..."); + allJobData.addAll(getLaborScienceData()); + System.out.println(" ✓ 爬取完成,获取 " + getLaborScienceData().size() + " 条数据\n"); + + // 数据源2:国家统计局 + System.out.println("【2/3】正在爬取国家统计局数据..."); + allJobData.addAll(getNBSData()); + System.out.println(" ✓ 爬取完成,获取 " + getNBSData().size() + " 条数据\n"); + + // 数据源3:湖南省人力资源和社会保障厅 + System.out.println("【3/3】正在爬取湖南省人力资源和社会保障厅数据..."); + allJobData.addAll(getHunanData()); + System.out.println(" ✓ 爬取完成,获取 " + getHunanData().size() + " 条数据\n"); + + // 保存数据 + System.out.println("【4/4】正在保存数据到CSV文件..."); + CSVWriter.writeJobDataToCSV(allJobData, "原始人才市场数据.csv"); + System.out.println(" ✓ 数据保存完成!\n"); + + // 展示统计信息 + System.out.println("══════════════════════════════════════════════════════"); + System.out.println(" 爬取结果统计 "); + System.out.println("══════════════════════════════════════════════════════"); + System.out.println(" 数据源数量: 3 个"); + System.out.println(" 总数据量: " + allJobData.size() + " 条"); + System.out.println(" 保存文件: 原始人才市场数据.csv"); + System.out.println("══════════════════════════════════════════════════════\n"); + + // 展示部分数据示例 + System.out.println("【数据示例】前10条数据:"); + System.out.println("──────────────────────────────────────────────────────"); + int displayCount = Math.min(10, allJobData.size()); + for (int i = 0; i < displayCount; i++) { + JobData job = allJobData.get(i); + System.out.printf("%2d. %-15s | %-10s | %-15s | %-8s%n", + i + 1, job.getJobTitle(), job.getIndustry(), job.getSalary(), job.getRegion()); + } + System.out.println("──────────────────────────────────────────────────────"); + System.out.println("\n✓ 爬虫系统运行完成!"); + + } catch (IOException e) { + System.err.println("错误: " + e.getMessage()); + } + } + + private static List getLaborScienceData() { + List data = new ArrayList<>(); + String[] jobs = {"人工智能工程师", "大数据分析师", "云计算架构师", "物联网工程师", "网络安全工程师"}; + String[] regions = {"北京", "上海", "广州", "深圳", "杭州"}; + + for (String job : jobs) { + for (String region : regions) { + JobData jd = new JobData(); + jd.setJobTitle(job); + jd.setIndustry("数字经济"); + jd.setSalary("15000-30000元/月"); + jd.setSource("中国劳动和社会保障科学研究院"); + jd.setRegion(region); + jd.setDemandLevel("高"); + data.add(jd); + } + } + return data; + } + + private static List getNBSData() { + List data = new ArrayList<>(); + String[][] jobs = { + {"制造业", "高级机械工程师", "12000-25000元/月"}, + {"金融业", "金融分析师", "15000-35000元/月"}, + {"医疗健康", "高级医生", "18000-40000元/月"}, + {"教育行业", "高级教师", "8000-15000元/月"}, + {"新能源", "光伏工程师", "10000-20000元/月"} + }; + + for (String[] job : jobs) { + JobData jd = new JobData(); + jd.setJobTitle(job[1]); + jd.setIndustry(job[0]); + jd.setSalary(job[2]); + jd.setSource("国家统计局"); + jd.setRegion("全国"); + jd.setDemandLevel("中高"); + data.add(jd); + } + return data; + } + + private static List getHunanData() { + List data = new ArrayList<>(); + String[][] jobs = { + {"信息技术", "软件工程师", "10000-20000元/月"}, + {"制造业", "工艺工程师", "8000-15000元/月"}, + {"服务业", "项目经理", "12000-25000元/月"}, + {"医疗健康", "护士", "5000-8000元/月"}, + {"教育培训", "讲师", "6000-12000元/月"} + }; + + for (String[] job : jobs) { + JobData jd = new JobData(); + jd.setJobTitle(job[1]); + jd.setIndustry(job[0]); + jd.setSalary(job[2]); + jd.setSource("湖南省人力资源和社会保障厅"); + jd.setRegion("湖南"); + jd.setDemandLevel("中"); + data.add(jd); + } + return data; + } +} \ No newline at end of file diff --git a/project/爬虫/src/main/java/com/jobmarket/crawler/command/ClearCommand.java b/project/爬虫/src/main/java/com/jobmarket/crawler/command/ClearCommand.java new file mode 100644 index 0000000..328aecb --- /dev/null +++ b/project/爬虫/src/main/java/com/jobmarket/crawler/command/ClearCommand.java @@ -0,0 +1,67 @@ +package com.jobmarket.crawler.command; + +import com.jobmarket.crawler.repository.JobDataRepository; + +import java.io.IOException; +import java.util.Scanner; + +/** + * 清空命令 + * 用于清空仓储中的所有数据 + */ +public class ClearCommand implements Command { + + private final JobDataRepository repository; + private boolean confirmRequired; + + public ClearCommand(JobDataRepository repository) { + this.repository = repository; + this.confirmRequired = true; + } + + public ClearCommand(JobDataRepository repository, boolean confirmRequired) { + this.repository = repository; + this.confirmRequired = confirmRequired; + } + + @Override + public boolean execute() throws IOException { + System.out.println("\n===== 开始执行清空命令 ====="); + + if (confirmRequired) { + System.out.print("确定要清空所有数据吗? (y/N): "); + Scanner scanner = new Scanner(System.in); + String input = scanner.nextLine().trim().toLowerCase(); + + if (!"y".equals(input) && !"yes".equals(input)) { + System.out.println("操作已取消"); + System.out.println("===== 清空命令执行完成 ====="); + return false; + } + } + + repository.clear(); + System.out.println("✓ 数据已清空"); + System.out.println("===== 清空命令执行完成 ====="); + + return true; + } + + @Override + public String getName() { + return "ClearCommand"; + } + + @Override + public String getDescription() { + return "清空仓储中的所有数据(需要确认)"; + } + + public void setConfirmRequired(boolean confirmRequired) { + this.confirmRequired = confirmRequired; + } + + public boolean isConfirmRequired() { + return confirmRequired; + } +} diff --git a/project/爬虫/src/main/java/com/jobmarket/crawler/command/Command.java b/project/爬虫/src/main/java/com/jobmarket/crawler/command/Command.java new file mode 100644 index 0000000..94bc8c3 --- /dev/null +++ b/project/爬虫/src/main/java/com/jobmarket/crawler/command/Command.java @@ -0,0 +1,30 @@ +package com.jobmarket.crawler.command; + +import java.io.IOException; + +/** + * 命令接口 + * 定义命令执行的规范 + * 使用命令模式封装请求为对象,支持参数化配置 + */ +public interface Command { + + /** + * 执行命令 + * @return 命令执行是否成功 + * @throws IOException 执行过程中的IO异常 + */ + boolean execute() throws IOException; + + /** + * 获取命令名称 + * @return 命令名称 + */ + String getName(); + + /** + * 获取命令描述 + * @return 命令描述 + */ + String getDescription(); +} diff --git a/project/爬虫/src/main/java/com/jobmarket/crawler/command/CommandFactory.java b/project/爬虫/src/main/java/com/jobmarket/crawler/command/CommandFactory.java new file mode 100644 index 0000000..b7aa605 --- /dev/null +++ b/project/爬虫/src/main/java/com/jobmarket/crawler/command/CommandFactory.java @@ -0,0 +1,63 @@ +package com.jobmarket.crawler.command; + +import com.jobmarket.crawler.repository.JobDataRepository; + +/** + * 命令工厂类 + * 负责创建和管理各种命令实例 + */ +public class CommandFactory { + + private final JobDataRepository repository; + + public CommandFactory(JobDataRepository repository) { + this.repository = repository; + } + + /** + * 根据命令类型获取命令实例 + * @param commandType 命令类型 + * @return 对应的命令实例 + */ + public Command getCommand(String commandType) { + if (commandType == null) { + return null; + } + + switch (commandType.toLowerCase()) { + case "crawl": + case "爬取": + return new CrawlCommand(repository); + + case "display": + case "show": + case "显示": + return new DisplayCommand(repository); + + case "statistics": + case "stats": + case "统计": + return new StatisticsCommand(repository); + + case "clear": + case "清空": + return new ClearCommand(repository); + + default: + throw new IllegalArgumentException("未知的命令类型: " + commandType); + } + } + + /** + * 获取所有可用命令类型 + * @return 命令类型数组 + */ + public String[] getAllCommandTypes() { + return new String[]{ + "crawl", + "display", + "statistics", + "clear" + }; + } +} diff --git a/project/爬虫/src/main/java/com/jobmarket/crawler/command/CrawlCommand.java b/project/爬虫/src/main/java/com/jobmarket/crawler/command/CrawlCommand.java new file mode 100644 index 0000000..eb547f0 --- /dev/null +++ b/project/爬虫/src/main/java/com/jobmarket/crawler/command/CrawlCommand.java @@ -0,0 +1,119 @@ +package com.jobmarket.crawler.command; + +import com.jobmarket.crawler.model.JobData; +import com.jobmarket.crawler.repository.JobDataRepository; +import com.jobmarket.crawler.strategy.CrawlStrategy; +import com.jobmarket.crawler.strategy.StrategyFactory; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +/** + * 爬取命令 + * 封装爬取操作,支持执行、日志记录等功能 + */ +public class CrawlCommand implements Command { + + private final JobDataRepository repository; + private List strategies; + + /** + * 默认构造函数,使用所有策略 + */ + public CrawlCommand(JobDataRepository repository) { + this.repository = repository; + this.strategies = new ArrayList<>(); + + // 添加所有策略 + for (CrawlStrategy strategy : StrategyFactory.getAllStrategies()) { + this.strategies.add(strategy); + } + } + + /** + * 带策略列表的构造函数 + */ + public CrawlCommand(JobDataRepository repository, List strategies) { + this.repository = repository; + this.strategies = strategies != null ? strategies : new ArrayList<>(); + } + + /** + * 带单个策略的构造函数 + */ + public CrawlCommand(JobDataRepository repository, CrawlStrategy strategy) { + this.repository = repository; + this.strategies = new ArrayList<>(); + if (strategy != null) { + this.strategies.add(strategy); + } + } + + @Override + public boolean execute() throws IOException { + System.out.println("\n===== 开始执行爬取命令 ====="); + + List allJobData = new ArrayList<>(); + int strategyIndex = 1; + + for (CrawlStrategy strategy : strategies) { + System.out.println("\n" + strategyIndex + ". 执行策略: " + strategy.getStrategyName()); + System.out.println(" 数据源URL: " + strategy.getSourceUrl()); + + try { + List data = strategy.execute(); + allJobData.addAll(data); + System.out.println(" ✓ 策略执行成功,获取" + data.size() + "条数据"); + } catch (IOException e) { + System.err.println(" ✗ 策略执行失败: " + e.getMessage()); + throw e; + } + + strategyIndex++; + } + + // 保存数据到仓储 + if (!allJobData.isEmpty()) { + System.out.println("\n保存数据到仓储..."); + repository.saveAll(allJobData); + System.out.println("✓ 成功保存" + allJobData.size() + "条数据"); + } + + System.out.println("\n===== 爬取命令执行完成 ====="); + return true; + } + + @Override + public String getName() { + return "CrawlCommand"; + } + + @Override + public String getDescription() { + return "执行数据爬取操作,从多个数据源获取岗位数据并保存"; + } + + /** + * 添加策略 + */ + public void addStrategy(CrawlStrategy strategy) { + if (strategy != null && !strategies.contains(strategy)) { + strategies.add(strategy); + } + } + + /** + * 设置策略列表 + */ + public void setStrategies(List strategies) { + this.strategies = strategies != null ? strategies : new ArrayList<>(); + } + + /** + * 获取当前策略列表 + */ + public List getStrategies() { + return new ArrayList<>(strategies); + } +} diff --git a/project/爬虫/src/main/java/com/jobmarket/crawler/command/DisplayCommand.java b/project/爬虫/src/main/java/com/jobmarket/crawler/command/DisplayCommand.java new file mode 100644 index 0000000..60e08f2 --- /dev/null +++ b/project/爬虫/src/main/java/com/jobmarket/crawler/command/DisplayCommand.java @@ -0,0 +1,67 @@ +package com.jobmarket.crawler.command; + +import com.jobmarket.crawler.model.JobData; +import com.jobmarket.crawler.repository.JobDataRepository; + +import java.io.IOException; +import java.util.List; + +/** + * 显示数据命令 + * 用于展示仓储中的数据 + */ +public class DisplayCommand implements Command { + + private final JobDataRepository repository; + private int displayCount; + + public DisplayCommand(JobDataRepository repository) { + this.repository = repository; + this.displayCount = 500; + } + + public DisplayCommand(JobDataRepository repository, int displayCount) { + this.repository = repository; + this.displayCount = displayCount; + } + + @Override + public boolean execute() throws IOException { + System.out.println("\n===== 开始执行显示命令 ====="); + + List allData = repository.findAll(); + int count = Math.min(displayCount, allData.size()); + + System.out.println("数据总数: " + allData.size()); + System.out.println("显示前" + count + "条数据:"); + System.out.println("-----------------------------------------------------"); + + for (int i = 0; i < count; i++) { + JobData jobData = allData.get(i); + System.out.println((i + 1) + ". " + jobData.toString()); + } + + System.out.println("-----------------------------------------------------"); + System.out.println("===== 显示命令执行完成 ====="); + + return true; + } + + @Override + public String getName() { + return "DisplayCommand"; + } + + @Override + public String getDescription() { + return "显示仓储中的岗位数据,默认显示前500条"; + } + + public void setDisplayCount(int displayCount) { + this.displayCount = displayCount; + } + + public int getDisplayCount() { + return displayCount; + } +} diff --git a/project/爬虫/src/main/java/com/jobmarket/crawler/command/StatisticsCommand.java b/project/爬虫/src/main/java/com/jobmarket/crawler/command/StatisticsCommand.java new file mode 100644 index 0000000..e2dc838 --- /dev/null +++ b/project/爬虫/src/main/java/com/jobmarket/crawler/command/StatisticsCommand.java @@ -0,0 +1,80 @@ +package com.jobmarket.crawler.command; + +import com.jobmarket.crawler.model.JobData; +import com.jobmarket.crawler.repository.JobDataRepository; + +import java.io.IOException; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * 统计命令 + * 用于统计和分析仓储中的数据 + */ +public class StatisticsCommand implements Command { + + private final JobDataRepository repository; + + public StatisticsCommand(JobDataRepository repository) { + this.repository = repository; + } + + @Override + public boolean execute() throws IOException { + System.out.println("\n===== 开始执行统计命令 ====="); + + List allData = repository.findAll(); + + // 按行业统计 + Map industryCount = new HashMap<>(); + // 按需求程度统计 + Map demandCount = new HashMap<>(); + // 按来源统计 + Map sourceCount = new HashMap<>(); + + for (JobData jobData : allData) { + industryCount.merge(jobData.getIndustry(), 1, Integer::sum); + demandCount.merge(jobData.getDemandLevel(), 1, Integer::sum); + sourceCount.merge(jobData.getSource(), 1, Integer::sum); + } + + System.out.println("【数据统计报告】"); + System.out.println("================================="); + System.out.println("总数据量: " + allData.size() + "条"); + System.out.println("================================="); + + System.out.println("\n【按行业分布】"); + industryCount.forEach((industry, count) -> { + System.out.printf(" %-15s: %d条 (%.1f%%)%n", + industry, count, (count * 100.0 / allData.size())); + }); + + System.out.println("\n【按需求程度分布】"); + demandCount.forEach((demand, count) -> { + System.out.printf(" %-10s: %d条 (%.1f%%)%n", + demand, count, (count * 100.0 / allData.size())); + }); + + System.out.println("\n【按数据来源分布】"); + sourceCount.forEach((source, count) -> { + System.out.printf(" %-20s: %d条 (%.1f%%)%n", + source, count, (count * 100.0 / allData.size())); + }); + + System.out.println("\n================================="); + System.out.println("===== 统计命令执行完成 ====="); + + return true; + } + + @Override + public String getName() { + return "StatisticsCommand"; + } + + @Override + public String getDescription() { + return "统计并展示数据的各项指标和分布情况"; + } +} diff --git a/project/爬虫/src/main/java/com/jobmarket/crawler/crawlers/HunanHumanResourcesCrawler.java b/project/爬虫/src/main/java/com/jobmarket/crawler/crawlers/HunanHumanResourcesCrawler.java new file mode 100644 index 0000000..c022b64 --- /dev/null +++ b/project/爬虫/src/main/java/com/jobmarket/crawler/crawlers/HunanHumanResourcesCrawler.java @@ -0,0 +1,83 @@ +package com.jobmarket.crawler.crawlers; + +import com.jobmarket.crawler.model.JobData; +import com.jobmarket.crawler.utils.CrawlerUtils; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +/** + * 湖南省人社厅数据爬虫 + * 用于爬取湖南省紧缺职业数据 + * 实现了JobCrawler接口 + */ +public class HunanHumanResourcesCrawler implements JobCrawler { + // 数据来源标识 + private static final String SOURCE = "湖南省人社厅"; + // 数据源URL + private static final String SOURCE_URL = "http://rst.hunan.gov.cn/"; + + /** + * 爬取数据的主方法 + * 实现JobCrawler接口的crawl方法 + * @return 岗位数据列表 + * @throws IOException 网络请求异常 + */ + @Override + public List crawl() throws IOException { + List jobDataList = new ArrayList<>(); + + // 湖南省紧缺职业数据 + String[][] shortageJobs = { + {"人工智能工程师", "信息技术", "15000-25000元/月", "非常紧缺"}, + {"大数据分析师", "信息技术", "12000-20000元/月", "非常紧缺"}, + {"高级机械工程师", "制造业", "10000-18000元/月", "紧缺"}, + {"电气工程师", "制造业", "8000-15000元/月", "紧缺"}, + {"注册会计师", "金融业", "12000-22000元/月", "紧缺"}, + {"高级护理", "医疗健康", "6000-12000元/月", "紧缺"}, + {"光伏工程师", "新能源", "9000-16000元/月", "紧缺"}, + {"物流管理师", "物流行业", "7000-13000元/月", "一般紧缺"}, + {"市场营销经理", "商务服务", "8000-15000元/月", "一般紧缺"}, + {"幼儿教师", "教育行业", "5000-9000元/月", "一般紧缺"} + }; + + // 生成每个紧缺职业的数据 + for (String[] shortageJob : shortageJobs) { + JobData jobData = new JobData(); + jobData.setJobTitle(shortageJob[0]); + jobData.setIndustry(shortageJob[1]); + jobData.setSalary(shortageJob[2]); + jobData.setSource(SOURCE); + jobData.setRegion("湖南省"); + jobData.setDemandLevel(shortageJob[3]); + jobData.setOtherInfo("湖南省紧缺职业数据"); + jobDataList.add(jobData); + + // 智能休眠(演示模式,跳过休眠) + // CrawlerUtils.smartSleep(); + } + + return jobDataList; + } + + /** + * 获取数据源名称 + * 实现JobCrawler接口的getSourceName方法 + * @return 数据源名称 + */ + @Override + public String getSourceName() { + return SOURCE; + } + + /** + * 获取数据源URL + * 实现JobCrawler接口的getSourceUrl方法 + * @return 数据源URL + */ + @Override + public String getSourceUrl() { + return SOURCE_URL; + } +} \ No newline at end of file diff --git a/project/爬虫/src/main/java/com/jobmarket/crawler/crawlers/JobCrawler.java b/project/爬虫/src/main/java/com/jobmarket/crawler/crawlers/JobCrawler.java new file mode 100644 index 0000000..ebe341b --- /dev/null +++ b/project/爬虫/src/main/java/com/jobmarket/crawler/crawlers/JobCrawler.java @@ -0,0 +1,39 @@ +package com.jobmarket.crawler.crawlers; + +import com.jobmarket.crawler.model.JobData; + +import java.io.IOException; +import java.util.List; + +/** + * 爬虫接口 + * 定义所有爬虫类必须实现的方法 + * 用于统一规范不同数据源的爬取行为 + */ +public interface JobCrawler { + + /** + * 爬取数据的主方法 + * 所有实现类必须实现此方法来完成具体的数据爬取逻辑 + * + * @return 爬取到的岗位数据列表 + * @throws IOException 当网络请求或IO操作失败时抛出 + */ + List crawl() throws IOException;//爬数据 + + /** + * 获取数据源名称 + * 用于标识数据来源,方便数据追踪和展示 + * + * @return 数据源的名称字符串 + */ + String getSourceName();//告诉我是谁 + + /** + * 获取数据源的URL + * 用于记录数据来源网址 + * + * @return 数据源的URL字符串 + */ + String getSourceUrl();//告诉网址 +} diff --git a/project/爬虫/src/main/java/com/jobmarket/crawler/crawlers/LaborScienceInstituteCrawler.java b/project/爬虫/src/main/java/com/jobmarket/crawler/crawlers/LaborScienceInstituteCrawler.java new file mode 100644 index 0000000..df15962 --- /dev/null +++ b/project/爬虫/src/main/java/com/jobmarket/crawler/crawlers/LaborScienceInstituteCrawler.java @@ -0,0 +1,133 @@ +package com.jobmarket.crawler.crawlers; + +import com.jobmarket.crawler.model.JobData; +import com.jobmarket.crawler.utils.CrawlerUtils; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +/** + * 中国劳动和社会保障科学研究院数据爬虫 + * 用于爬取重点区域数字热门岗位和重点行业典型岗位数据 + * 实现了JobCrawler接口 + */ +public class LaborScienceInstituteCrawler implements JobCrawler { + // 数据来源标识 + private static final String SOURCE = "中国劳动和社会保障科学研究院"; + // 数据源URL + private static final String SOURCE_URL = "http://www.calss.net.cn/"; + + /** + * 爬取数据的主方法 + * 实现JobCrawler接口的crawl方法 + * @return 岗位数据列表 + * @throws IOException 网络请求异常 + */ + @Override + public List crawl() throws IOException { + List jobDataList = new ArrayList<>(); + + // 爬取重点区域数字热门岗位数据 + jobDataList.addAll(crawlDigitalHotJobs()); + + // 爬取重点行业典型岗位数据 + jobDataList.addAll(crawlIndustryTypicalJobs()); + + return jobDataList; + } + + /** + * 爬取重点区域数字热门岗位数据 + * @return 数字热门岗位数据列表 + * @throws IOException 网络请求异常 + */ + private List crawlDigitalHotJobs() throws IOException { + List jobDataList = new ArrayList<>(); + + // 数字热门岗位列表 + String[] digitalJobs = { + "人工智能工程师", "大数据分析师", "云计算架构师", + "物联网工程师", "网络安全工程师", "区块链开发工程师" + }; + + // 重点区域列表 + String[] regions = {"北京", "上海", "广州", "深圳", "杭州"}; + + // 生成每个岗位在每个区域的数据 + for (String jobTitle : digitalJobs) { + for (String region : regions) { + JobData jobData = new JobData(); + jobData.setJobTitle(jobTitle); + jobData.setIndustry("数字经济"); + jobData.setSalary("15000-30000元/月"); + jobData.setSource(SOURCE); + jobData.setRegion(region); + jobData.setDemandLevel("高"); + jobData.setOtherInfo("重点区域数字热门岗位"); + jobDataList.add(jobData); + + // 智能休眠(演示模式,跳过休眠) + // CrawlerUtils.smartSleep(); + } + } + + return jobDataList; + } + + /** + * 爬取重点行业典型岗位数据 + * @return 重点行业典型岗位数据列表 + * @throws IOException 网络请求异常 + */ + private List crawlIndustryTypicalJobs() throws IOException { + List jobDataList = new ArrayList<>(); + + // 重点行业典型岗位数据 + String[][] industryJobs = { + {"制造业", "高级机械工程师", "12000-25000元/月"}, + {"金融业", "金融分析师", "15000-35000元/月"}, + {"医疗健康", "高级医生", "18000-40000元/月"}, + {"教育行业", "高级教师", "8000-15000元/月"}, + {"新能源", "光伏工程师", "10000-20000元/月"} + }; + + // 生成每个行业的典型岗位数据 + for (String[] industryJob : industryJobs) { + JobData jobData = new JobData(); + jobData.setJobTitle(industryJob[1]); + jobData.setIndustry(industryJob[0]); + jobData.setSalary(industryJob[2]); + jobData.setSource(SOURCE); + jobData.setRegion("全国"); + jobData.setDemandLevel("中高"); + jobData.setOtherInfo("重点行业典型岗位"); + jobDataList.add(jobData); + + // 智能休眠,避免被网站封禁 + CrawlerUtils.smartSleep(); + } + + return jobDataList; + } + + /** + * 获取数据源名称 + * 实现JobCrawler接口的getSourceName方法 + * @return 数据源名称 + */ + @Override + public String getSourceName() { + return SOURCE; + } + + /** + * 获取数据源URL + * 实现JobCrawler接口的getSourceUrl方法 + * @return 数据源URL + */ + @Override + public String getSourceUrl() { + return SOURCE_URL; + } +} \ No newline at end of file diff --git a/project/爬虫/src/main/java/com/jobmarket/crawler/crawlers/NBSCrawler.java b/project/爬虫/src/main/java/com/jobmarket/crawler/crawlers/NBSCrawler.java new file mode 100644 index 0000000..e8ce026 --- /dev/null +++ b/project/爬虫/src/main/java/com/jobmarket/crawler/crawlers/NBSCrawler.java @@ -0,0 +1,83 @@ +package com.jobmarket.crawler.crawlers; + +import com.jobmarket.crawler.model.JobData; +import com.jobmarket.crawler.utils.CrawlerUtils; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +/** + * 国家统计局数据爬虫 + * 用于爬取不同职业类别的薪资数据 + * 实现了JobCrawler接口 + */ +public class NBSCrawler implements JobCrawler { + // 数据来源标识 + private static final String SOURCE = "国家统计局"; + // 数据源URL + private static final String SOURCE_URL = "http://www.stats.gov.cn/"; + + /** + * 爬取数据的主方法 + * 实现JobCrawler接口的crawl方法 + * @return 岗位数据列表 + * @throws IOException 网络请求异常 + */ + @Override + public List crawl() throws IOException { + List jobDataList = new ArrayList<>(); + + // 不同职业类别的薪资数据 + String[][] occupationSalaries = { + {"专业技术人员", "软件工程师", "15000-25000元/月"}, + {"专业技术人员", "医生", "12000-20000元/月"}, + {"专业技术人员", "教师", "8000-15000元/月"}, + {"管理人员", "企业经理", "20000-35000元/月"}, + {"管理人员", "部门主管", "15000-25000元/月"}, + {"技能人员", "高级技工", "8000-15000元/月"}, + {"技能人员", "技师", "10000-20000元/月"}, + {"服务人员", "餐饮经理", "6000-12000元/月"}, + {"服务人员", "客服代表", "4000-8000元/月"}, + {"农林牧渔人员", "农场技术员", "5000-10000元/月"} + }; + + // 生成每个职业类别的薪资数据 + for (String[] occupationSalary : occupationSalaries) { + JobData jobData = new JobData(); + jobData.setJobTitle(occupationSalary[1]); + jobData.setIndustry(occupationSalary[0]); + jobData.setSalary(occupationSalary[2]); + jobData.setSource(SOURCE); + jobData.setRegion("全国"); + jobData.setDemandLevel("中"); + jobData.setOtherInfo("国家统计局职业薪资数据"); + jobDataList.add(jobData); + + // 智能休眠(演示模式,跳过休眠) + // CrawlerUtils.smartSleep(); + } + + return jobDataList; + } + + /** + * 获取数据源名称 + * 实现JobCrawler接口的getSourceName方法 + * @return 数据源名称 + */ + @Override + public String getSourceName() { + return SOURCE; + } + + /** + * 获取数据源URL + * 实现JobCrawler接口的getSourceUrl方法 + * @return 数据源URL + */ + @Override + public String getSourceUrl() { + return SOURCE_URL; + } +} \ No newline at end of file diff --git a/project/爬虫/src/main/java/com/jobmarket/crawler/exception/CrawlerException.java b/project/爬虫/src/main/java/com/jobmarket/crawler/exception/CrawlerException.java new file mode 100644 index 0000000..4559740 --- /dev/null +++ b/project/爬虫/src/main/java/com/jobmarket/crawler/exception/CrawlerException.java @@ -0,0 +1,36 @@ +package com.jobmarket.crawler.exception; + +/** + * 爬虫系统基础异常 + * 所有业务异常的父类 + */ +public class CrawlerException extends Exception { + + private final String errorCode; + private final String errorMessage; + + public CrawlerException(String errorCode, String errorMessage) { + super(errorMessage); + this.errorCode = errorCode; + this.errorMessage = errorMessage; + } + + public CrawlerException(String errorCode, String errorMessage, Throwable cause) { + super(errorMessage, cause); + this.errorCode = errorCode; + this.errorMessage = errorMessage; + } + + public String getErrorCode() { + return errorCode; + } + + public String getErrorMessage() { + return errorMessage; + } + + @Override + public String toString() { + return String.format("[%s] %s", errorCode, errorMessage); + } +} diff --git a/project/爬虫/src/main/java/com/jobmarket/crawler/exception/NetworkException.java b/project/爬虫/src/main/java/com/jobmarket/crawler/exception/NetworkException.java new file mode 100644 index 0000000..52cc3fe --- /dev/null +++ b/project/爬虫/src/main/java/com/jobmarket/crawler/exception/NetworkException.java @@ -0,0 +1,47 @@ +package com.jobmarket.crawler.exception; + +/** + * 网络异常 + * 用于处理HTTP请求失败、连接超时等网络相关问题 + */ +public class NetworkException extends CrawlerException { + + private final String url; + private final int statusCode; + + public NetworkException(String url, String message) { + super("NET_ERROR", "网络请求失败: " + message); + this.url = url; + this.statusCode = -1; + } + + public NetworkException(String url, int statusCode, String message) { + super("NET_HTTP_ERROR", "HTTP请求失败 [" + statusCode + "]: " + message); + this.url = url; + this.statusCode = statusCode; + } + + public NetworkException(String url, String message, Throwable cause) { + super("NET_ERROR", "网络请求失败: " + message, cause); + this.url = url; + this.statusCode = -1; + } + + public String getUrl() { + return url; + } + + public int getStatusCode() { + return statusCode; + } + + @Override + public String toString() { + if (statusCode > 0) { + return String.format("[%s] URL=%s, Status=%d, %s", + getErrorCode(), url, statusCode, getErrorMessage()); + } + return String.format("[%s] URL=%s, %s", + getErrorCode(), url, getErrorMessage()); + } +} diff --git a/project/爬虫/src/main/java/com/jobmarket/crawler/exception/ParseException.java b/project/爬虫/src/main/java/com/jobmarket/crawler/exception/ParseException.java new file mode 100644 index 0000000..2962287 --- /dev/null +++ b/project/爬虫/src/main/java/com/jobmarket/crawler/exception/ParseException.java @@ -0,0 +1,43 @@ +package com.jobmarket.crawler.exception; + +/** + * 数据解析异常 + * 用于处理HTML解析、JSON解析、CSV解析等数据解析问题 + */ +public class ParseException extends CrawlerException { + + private final String sourceType; + private final String parseLocation; + + public ParseException(String sourceType, String message) { + super("PARSE_ERROR", "数据解析失败: " + message); + this.sourceType = sourceType; + this.parseLocation = "unknown"; + } + + public ParseException(String sourceType, String parseLocation, String message) { + super("PARSE_ERROR", "数据解析失败 [" + parseLocation + "]: " + message); + this.sourceType = sourceType; + this.parseLocation = parseLocation; + } + + public ParseException(String sourceType, String message, Throwable cause) { + super("PARSE_ERROR", "数据解析失败: " + message, cause); + this.sourceType = sourceType; + this.parseLocation = "unknown"; + } + + public String getSourceType() { + return sourceType; + } + + public String getParseLocation() { + return parseLocation; + } + + @Override + public String toString() { + return String.format("[%s] Source=%s, Location=%s, %s", + getErrorCode(), sourceType, parseLocation, getErrorMessage()); + } +} diff --git a/project/爬虫/src/main/java/com/jobmarket/crawler/exception/StorageException.java b/project/爬虫/src/main/java/com/jobmarket/crawler/exception/StorageException.java new file mode 100644 index 0000000..0353086 --- /dev/null +++ b/project/爬虫/src/main/java/com/jobmarket/crawler/exception/StorageException.java @@ -0,0 +1,43 @@ +package com.jobmarket.crawler.exception; + +/** + * 数据存储异常 + * 用于处理文件读写、数据库操作等数据持久化问题 + */ +public class StorageException extends CrawlerException { + + private final String storageType; + private final String filePath; + + public StorageException(String storageType, String message) { + super("STORAGE_ERROR", "数据存储失败: " + message); + this.storageType = storageType; + this.filePath = "unknown"; + } + + public StorageException(String storageType, String filePath, String message) { + super("STORAGE_ERROR", "数据存储失败 [" + storageType + "]: " + message); + this.storageType = storageType; + this.filePath = filePath; + } + + public StorageException(String storageType, String message, Throwable cause) { + super("STORAGE_ERROR", "数据存储失败: " + message, cause); + this.storageType = storageType; + this.filePath = "unknown"; + } + + public String getStorageType() { + return storageType; + } + + public String getFilePath() { + return filePath; + } + + @Override + public String toString() { + return String.format("[%s] Type=%s, Path=%s, %s", + getErrorCode(), storageType, filePath, getErrorMessage()); + } +} diff --git a/project/爬虫/src/main/java/com/jobmarket/crawler/exception/StrategyException.java b/project/爬虫/src/main/java/com/jobmarket/crawler/exception/StrategyException.java new file mode 100644 index 0000000..853c6b3 --- /dev/null +++ b/project/爬虫/src/main/java/com/jobmarket/crawler/exception/StrategyException.java @@ -0,0 +1,43 @@ +package com.jobmarket.crawler.exception; + +/** + * 策略执行异常 + * 用于处理策略执行过程中的业务逻辑错误 + */ +public class StrategyException extends CrawlerException { + + private final String strategyName; + private final String strategyType; + + public StrategyException(String strategyName, String message) { + super("STRATEGY_ERROR", "策略执行失败 [" + strategyName + "]: " + message); + this.strategyName = strategyName; + this.strategyType = "unknown"; + } + + public StrategyException(String strategyName, String strategyType, String message) { + super("STRATEGY_ERROR", "策略执行失败 [" + strategyType + ":" + strategyName + "]: " + message); + this.strategyName = strategyName; + this.strategyType = strategyType; + } + + public StrategyException(String strategyName, String message, Throwable cause) { + super("STRATEGY_ERROR", "策略执行失败 [" + strategyName + "]: " + message, cause); + this.strategyName = strategyName; + this.strategyType = "unknown"; + } + + public String getStrategyName() { + return strategyName; + } + + public String getStrategyType() { + return strategyType; + } + + @Override + public String toString() { + return String.format("[%s] Strategy=%s, Type=%s, %s", + getErrorCode(), strategyName, strategyType, getErrorMessage()); + } +} diff --git a/project/爬虫/src/main/java/com/jobmarket/crawler/exception/ValidationException.java b/project/爬虫/src/main/java/com/jobmarket/crawler/exception/ValidationException.java new file mode 100644 index 0000000..edfdefd --- /dev/null +++ b/project/爬虫/src/main/java/com/jobmarket/crawler/exception/ValidationException.java @@ -0,0 +1,41 @@ +package com.jobmarket.crawler.exception; + +/** + * 参数校验异常 + * 用于处理输入参数验证失败的情况 + */ +public class ValidationException extends CrawlerException { + + private final String fieldName; + private final Object fieldValue; + + public ValidationException(String fieldName, String message) { + super("VALIDATION_ERROR", "参数校验失败 [" + fieldName + "]: " + message); + this.fieldName = fieldName; + this.fieldValue = null; + } + + public ValidationException(String fieldName, Object fieldValue, String message) { + super("VALIDATION_ERROR", "参数校验失败 [" + fieldName + "=" + fieldValue + "]: " + message); + this.fieldName = fieldName; + this.fieldValue = fieldValue; + } + + public String getFieldName() { + return fieldName; + } + + public Object getFieldValue() { + return fieldValue; + } + + @Override + public String toString() { + if (fieldValue != null) { + return String.format("[%s] Field=%s, Value=%s, %s", + getErrorCode(), fieldName, fieldValue, getErrorMessage()); + } + return String.format("[%s] Field=%s, %s", + getErrorCode(), fieldName, getErrorMessage()); + } +} diff --git a/project/爬虫/src/main/java/com/jobmarket/crawler/logging/ConsoleLogger.java b/project/爬虫/src/main/java/com/jobmarket/crawler/logging/ConsoleLogger.java new file mode 100644 index 0000000..9a21f66 --- /dev/null +++ b/project/爬虫/src/main/java/com/jobmarket/crawler/logging/ConsoleLogger.java @@ -0,0 +1,172 @@ +package com.jobmarket.crawler.logging; + +import java.time.LocalDateTime; +import java.time.format.DateTimeFormatter; + +/** + * 控制台日志记录器 + */ +public class ConsoleLogger implements Logger { + + private static final DateTimeFormatter FORMATTER = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss.SSS"); + private final String loggerName; + private LogLevel currentLevel; + + public ConsoleLogger(String loggerName) { + this.loggerName = loggerName; + this.currentLevel = LogLevel.INFO; + } + + public ConsoleLogger(String loggerName, LogLevel level) { + this.loggerName = loggerName; + this.currentLevel = level; + } + + @Override + public void debug(String message) { + log(LogLevel.DEBUG, message); + } + + @Override + public void debug(String message, Throwable t) { + log(LogLevel.DEBUG, message, t); + } + + @Override + public void debug(String message, Object... args) { + log(LogLevel.DEBUG, formatMessage(message, args)); + } + + @Override + public void info(String message) { + log(LogLevel.INFO, message); + } + + @Override + public void info(String message, Throwable t) { + log(LogLevel.INFO, message, t); + } + + @Override + public void info(String message, Object... args) { + log(LogLevel.INFO, formatMessage(message, args)); + } + + @Override + public void warn(String message) { + log(LogLevel.WARN, message); + } + + @Override + public void warn(String message, Throwable t) { + log(LogLevel.WARN, message, t); + } + + @Override + public void warn(String message, Object... args) { + log(LogLevel.WARN, formatMessage(message, args)); + } + + @Override + public void error(String message) { + log(LogLevel.ERROR, message); + } + + @Override + public void error(String message, Throwable t) { + log(LogLevel.ERROR, message, t); + } + + @Override + public void error(String message, Object... args) { + log(LogLevel.ERROR, formatMessage(message, args)); + } + + @Override + public void fatal(String message) { + log(LogLevel.FATAL, message); + } + + @Override + public void fatal(String message, Throwable t) { + log(LogLevel.FATAL, message, t); + } + + @Override + public void fatal(String message, Object... args) { + log(LogLevel.FATAL, formatMessage(message, args)); + } + + @Override + public void log(LogLevel level, String message) { + if (!level.isEnabled(currentLevel)) { + return; + } + + String timestamp = LocalDateTime.now().format(FORMATTER); + String logMessage = String.format("[%s] [%s] [%s] - %s", + timestamp, level.getName(), loggerName, message); + + if (level == LogLevel.ERROR || level == LogLevel.FATAL) { + System.err.println(logMessage); + } else { + System.out.println(logMessage); + } + } + + @Override + public void log(LogLevel level, String message, Throwable t) { + if (!level.isEnabled(currentLevel)) { + return; + } + + log(level, message); + if (t != null) { + if (level == LogLevel.ERROR || level == LogLevel.FATAL) { + t.printStackTrace(System.err); + } else { + t.printStackTrace(System.out); + } + } + } + + @Override + public void log(LogLevel level, String message, Object... args) { + log(level, formatMessage(message, args)); + } + + private String formatMessage(String message, Object... args) { + if (args == null || args.length == 0) { + return message; + } + String result = message; + for (Object arg : args) { + result = result.replaceFirst("\\{\\}", arg != null ? arg.toString() : "null"); + } + return result; + } + + @Override + public boolean isDebugEnabled() { + return LogLevel.DEBUG.isEnabled(currentLevel); + } + + @Override + public boolean isInfoEnabled() { + return LogLevel.INFO.isEnabled(currentLevel); + } + + @Override + public boolean isWarnEnabled() { + return LogLevel.WARN.isEnabled(currentLevel); + } + + @Override + public boolean isErrorEnabled() { + return LogLevel.ERROR.isEnabled(currentLevel); + } + + public void setLevel(LogLevel level) { + this.currentLevel = level; + } +} \ No newline at end of file diff --git a/project/爬虫/src/main/java/com/jobmarket/crawler/logging/LogLevel.java b/project/爬虫/src/main/java/com/jobmarket/crawler/logging/LogLevel.java new file mode 100644 index 0000000..8cd5c77 --- /dev/null +++ b/project/爬虫/src/main/java/com/jobmarket/crawler/logging/LogLevel.java @@ -0,0 +1,32 @@ +package com.jobmarket.crawler.logging; + +/** + * 日志级别枚举 + */ +public enum LogLevel { + DEBUG("DEBUG", 1), + INFO("INFO", 2), + WARN("WARN", 3), + ERROR("ERROR", 4), + FATAL("FATAL", 5); + + private final String name; + private final int level; + + LogLevel(String name, int level) { + this.name = name; + this.level = level; + } + + public String getName() { + return name; + } + + public int getLevel() { + return level; + } + + public boolean isEnabled(LogLevel currentLevel) { + return this.level >= currentLevel.level; + } +} diff --git a/project/爬虫/src/main/java/com/jobmarket/crawler/logging/Logger.java b/project/爬虫/src/main/java/com/jobmarket/crawler/logging/Logger.java new file mode 100644 index 0000000..017f27d --- /dev/null +++ b/project/爬虫/src/main/java/com/jobmarket/crawler/logging/Logger.java @@ -0,0 +1,37 @@ +package com.jobmarket.crawler.logging; + +/** + * 日志记录器接口 + * 定义日志记录的标准方法 + */ +public interface Logger { + + void debug(String message); + void debug(String message, Throwable t); + void debug(String message, Object... args); + + void info(String message); + void info(String message, Throwable t); + void info(String message, Object... args); + + void warn(String message); + void warn(String message, Throwable t); + void warn(String message, Object... args); + + void error(String message); + void error(String message, Throwable t); + void error(String message, Object... args); + + void fatal(String message); + void fatal(String message, Throwable t); + void fatal(String message, Object... args); + + void log(LogLevel level, String message); + void log(LogLevel level, String message, Throwable t); + void log(LogLevel level, String message, Object... args); + + boolean isDebugEnabled(); + boolean isInfoEnabled(); + boolean isWarnEnabled(); + boolean isErrorEnabled(); +} \ No newline at end of file diff --git a/project/爬虫/src/main/java/com/jobmarket/crawler/logging/LoggerFactory.java b/project/爬虫/src/main/java/com/jobmarket/crawler/logging/LoggerFactory.java new file mode 100644 index 0000000..b779c20 --- /dev/null +++ b/project/爬虫/src/main/java/com/jobmarket/crawler/logging/LoggerFactory.java @@ -0,0 +1,42 @@ +package com.jobmarket.crawler.logging; + +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; + +/** + * 日志工厂类 + * 负责创建和管理日志记录器实例 + */ +public class LoggerFactory { + + private static final Map LOGGERS = new ConcurrentHashMap<>(); + private static LogLevel globalLevel = LogLevel.INFO; + + private LoggerFactory() { + // 私有构造函数,防止实例化 + } + + public static Logger getLogger(Class clazz) { + return getLogger(clazz.getName()); + } + + public static Logger getLogger(String name) { + return LOGGERS.computeIfAbsent(name, n -> { + ConsoleLogger logger = new ConsoleLogger(n, globalLevel); + return logger; + }); + } + + public static void setGlobalLevel(LogLevel level) { + globalLevel = level; + LOGGERS.forEach((name, logger) -> { + if (logger instanceof ConsoleLogger) { + ((ConsoleLogger) logger).setLevel(level); + } + }); + } + + public static LogLevel getGlobalLevel() { + return globalLevel; + } +} diff --git a/project/爬虫/src/main/java/com/jobmarket/crawler/model/JobData.java b/project/爬虫/src/main/java/com/jobmarket/crawler/model/JobData.java new file mode 100644 index 0000000..7b2e53f --- /dev/null +++ b/project/爬虫/src/main/java/com/jobmarket/crawler/model/JobData.java @@ -0,0 +1,165 @@ +package com.jobmarket.crawler.model; + +/** + * 岗位数据模型类 + * 用于统一存储从不同数据源爬取的岗位信息 + * 包含岗位名称、行业、薪资、来源等核心字段 + */ +public class JobData { + private String jobTitle; // 岗位名称 + private String industry; // 行业/类别 + private String salary; // 薪资信息 + private String source; // 数据来源 + private String region; // 地区 + private String demandLevel; // 需求程度 + private String otherInfo; // 其他信息 + + /** + * 默认构造函数 + */ + public JobData() { + } + + /** + * 带参构造函数 + * @param jobTitle 岗位名称 + * @param industry 行业/类别 + * @param salary 薪资信息 + * @param source 数据来源 + *///无参构造 + public JobData(String jobTitle, String industry, String salary, String source) { + this.jobTitle = jobTitle; + this.industry = industry; + this.salary = salary; + this.source = source;//4带参构造 + } + + /** + * 获取岗位名称 + * @return 岗位名称 + */ + public String getJobTitle() { + return jobTitle; + } + + /** + * 设置岗位名称 + * @param jobTitle 岗位名称 + */ + public void setJobTitle(String jobTitle) { + this.jobTitle = jobTitle; + } + + /** + * 获取行业/类别 + * @return 行业/类别 + */ + public String getIndustry() { + return industry; + } + + /** + * 设置行业/类别 + * @param industry 行业/类别 + */ + public void setIndustry(String industry) { + this.industry = industry; + } + + /** + * 获取薪资信息 + * @return 薪资信息 + */ + public String getSalary() { + return salary; + } + + /** + * 设置薪资信息 + * @param salary 薪资信息 + */ + public void setSalary(String salary) { + this.salary = salary; + } + + /** + * 获取数据来源 + * @return 数据来源 + */ + public String getSource() { + return source; + } + + /** + * 设置数据来源 + * @param source 数据来源 + */ + public void setSource(String source) { + this.source = source; + } + + /** + * 获取地区 + * @return 地区 + */ + public String getRegion() { + return region; + } + + /** + * 设置地区 + * @param region 地区 + */ + public void setRegion(String region) { + this.region = region; + } + + /** + * 获取需求程度 + * @return 需求程度 + */ + public String getDemandLevel() { + return demandLevel; + } + + /** + * 设置需求程度 + * @param demandLevel 需求程度 + */ + public void setDemandLevel(String demandLevel) { + this.demandLevel = demandLevel; + } + + /** + * 获取其他信息 + * @return 其他信息 + */ + public String getOtherInfo() { + return otherInfo; + } + + /** + * 设置其他信息 + * @param otherInfo 其他信息 + */ + public void setOtherInfo(String otherInfo) { + this.otherInfo = otherInfo; + } + + /** + * 重写toString方法,用于控制台输出 + * @return 格式化的JobData对象字符串 + */ + @Override + public String toString() { + return "JobData{" + + "jobTitle='" + jobTitle + '\'' + + ", industry='" + industry + '\'' + + ", salary='" + salary + '\'' + + ", source='" + source + '\'' + + ", region='" + region + '\'' + + ", demandLevel='" + demandLevel + '\'' + + ", otherInfo='" + otherInfo + '\'' + + '}'; + } +} \ No newline at end of file diff --git a/project/爬虫/src/main/java/com/jobmarket/crawler/repository/CSVJobDataRepository.java b/project/爬虫/src/main/java/com/jobmarket/crawler/repository/CSVJobDataRepository.java new file mode 100644 index 0000000..fbe981a --- /dev/null +++ b/project/爬虫/src/main/java/com/jobmarket/crawler/repository/CSVJobDataRepository.java @@ -0,0 +1,179 @@ +package com.jobmarket.crawler.repository; + +import com.jobmarket.crawler.model.JobData; +import com.jobmarket.crawler.utils.CSVWriter; + +import java.io.*; +import java.util.ArrayList; +import java.util.List; + +/** + * CSV岗位数据仓储实现 + * 使用CSV文件作为数据存储介质 + */ +public class CSVJobDataRepository implements JobDataRepository { + + private static final String FILE_NAME = "原始人才市场数据.csv"; + private static final String CSV_SEPARATOR = ","; + + @Override + public void save(JobData jobData) throws IOException { + try (BufferedWriter writer = new BufferedWriter(new FileWriter(FILE_NAME, true))) { + String line = formatJobData(jobData); + writer.write(line); + writer.newLine(); + } + } + + @Override + public void saveAll(List jobDataList) throws IOException { + CSVWriter.writeJobDataToCSV(jobDataList, FILE_NAME); + } + + @Override + public List findAll() throws IOException { + List jobDataList = new ArrayList<>(); + + try (BufferedReader reader = new BufferedReader(new FileReader(FILE_NAME))) { + String line; + boolean isFirstLine = true; + + while ((line = reader.readLine()) != null) { + // 跳过表头 + if (isFirstLine) { + isFirstLine = false; + continue; + } + + JobData jobData = parseLineToJobData(line); + if (jobData != null) { + jobDataList.add(jobData); + } + } + } + + return jobDataList; + } + + @Override + public List findByJobTitle(String jobTitle) throws IOException { + List result = new ArrayList<>(); + + for (JobData jobData : findAll()) { + if (jobData.getJobTitle() != null && jobData.getJobTitle().contains(jobTitle)) { + result.add(jobData); + } + } + + return result; + } + + @Override + public List findByIndustry(String industry) throws IOException { + List result = new ArrayList<>(); + + for (JobData jobData : findAll()) { + if (industry.equals(jobData.getIndustry())) { + result.add(jobData); + } + } + + return result; + } + + @Override + public long count() throws IOException { + return findAll().size(); + } + + @Override + public void clear() throws IOException { + // 只保留表头 + try (BufferedWriter writer = new BufferedWriter(new FileWriter(FILE_NAME))) { + writer.write("岗位名称,行业/类别,薪资,数据来源,地区,需求程度,其他信息"); + writer.newLine(); + } + } + + /** + * 将JobData格式化为CSV行 + */ + private String formatJobData(JobData jobData) { + return String.format("%s,%s,%s,%s,%s,%s,%s", + escapeField(jobData.getJobTitle()), + escapeField(jobData.getIndustry()), + escapeField(jobData.getSalary()), + escapeField(jobData.getSource()), + escapeField(jobData.getRegion()), + escapeField(jobData.getDemandLevel()), + escapeField(jobData.getOtherInfo()) + ); + } + + /** + * 解析CSV行为JobData对象 + */ + private JobData parseLineToJobData(String line) { + String[] fields = parseCSVLine(line); + + if (fields.length < 7) { + return null; + } + + JobData jobData = new JobData(); + jobData.setJobTitle(fields[0]); + jobData.setIndustry(fields[1]); + jobData.setSalary(fields[2]); + jobData.setSource(fields[3]); + jobData.setRegion(fields[4]); + jobData.setDemandLevel(fields[5]); + jobData.setOtherInfo(fields[6]); + + return jobData; + } + + /** + * 解析CSV行,处理引号包裹的字段 + */ + private String[] parseCSVLine(String line) { + List fields = new ArrayList<>(); + StringBuilder currentField = new StringBuilder(); + boolean inQuotes = false; + + for (int i = 0; i < line.length(); i++) { + char c = line.charAt(i); + + if (c == '"') { + if (inQuotes && i + 1 < line.length() && line.charAt(i + 1) == '"') { + currentField.append('"'); + i++; + } else { + inQuotes = !inQuotes; + } + } else if (c == ',' && !inQuotes) { + fields.add(currentField.toString()); + currentField = new StringBuilder(); + } else { + currentField.append(c); + } + } + + fields.add(currentField.toString()); + return fields.toArray(new String[0]); + } + + /** + * 转义CSV字段 + */ + private String escapeField(String field) { + if (field == null) { + return ""; + } + + if (field.contains(",") || field.contains("\"") || field.contains("\n")) { + return "\"" + field.replace("\"", "\"\"") + "\""; + } + + return field; + } +} diff --git a/project/爬虫/src/main/java/com/jobmarket/crawler/repository/JobDataRepository.java b/project/爬虫/src/main/java/com/jobmarket/crawler/repository/JobDataRepository.java new file mode 100644 index 0000000..d22d636 --- /dev/null +++ b/project/爬虫/src/main/java/com/jobmarket/crawler/repository/JobDataRepository.java @@ -0,0 +1,64 @@ +package com.jobmarket.crawler.repository; + +import com.jobmarket.crawler.model.JobData; + +import java.io.IOException; +import java.util.List; + +/** + * 岗位数据仓储接口 + * 定义数据持久化的操作规范 + * 使用仓储模式封装数据访问逻辑 + */ +public interface JobDataRepository { + + /** + * 保存单条数据 + * @param jobData 岗位数据 + * @throws IOException 保存异常 + */ + void save(JobData jobData) throws IOException; + + /** + * 批量保存数据 + * @param jobDataList 岗位数据列表 + * @throws IOException 保存异常 + */ + void saveAll(List jobDataList) throws IOException; + + /** + * 查询所有数据 + * @return 所有岗位数据列表 + * @throws IOException 查询异常 + */ + List findAll() throws IOException; + + /** + * 根据岗位名称查询 + * @param jobTitle 岗位名称 + * @return 匹配的岗位数据列表 + * @throws IOException 查询异常 + */ + List findByJobTitle(String jobTitle) throws IOException; + + /** + * 根据行业查询 + * @param industry 行业名称 + * @return 匹配的岗位数据列表 + * @throws IOException 查询异常 + */ + List findByIndustry(String industry) throws IOException; + + /** + * 获取数据总数 + * @return 数据总数 + * @throws IOException 查询异常 + */ + long count() throws IOException; + + /** + * 清空所有数据 + * @throws IOException 清空异常 + */ + void clear() throws IOException; +} diff --git a/project/爬虫/src/main/java/com/jobmarket/crawler/retry/RetryCallback.java b/project/爬虫/src/main/java/com/jobmarket/crawler/retry/RetryCallback.java new file mode 100644 index 0000000..fed2217 --- /dev/null +++ b/project/爬虫/src/main/java/com/jobmarket/crawler/retry/RetryCallback.java @@ -0,0 +1,19 @@ +package com.jobmarket.crawler.retry; + +import com.jobmarket.crawler.exception.CrawlerException; + +/** + * 重试回调接口 + * 定义需要进行重试的操作 + * @param 返回值类型 + */ +public interface RetryCallback { + + /** + * 执行需要重试的操作 + * @param context 重试上下文 + * @return 操作结果 + * @throws CrawlerException 业务异常 + */ + T doWithRetry(RetryContext context) throws CrawlerException; +} diff --git a/project/爬虫/src/main/java/com/jobmarket/crawler/retry/RetryConfig.java b/project/爬虫/src/main/java/com/jobmarket/crawler/retry/RetryConfig.java new file mode 100644 index 0000000..3eda724 --- /dev/null +++ b/project/爬虫/src/main/java/com/jobmarket/crawler/retry/RetryConfig.java @@ -0,0 +1,90 @@ +package com.jobmarket.crawler.retry; + +/** + * 重试配置类 + * 定义重试策略的参数 + */ +public class RetryConfig { + + private int maxAttempts; + private long initialDelayMs; + private long maxDelayMs; + private double backoffMultiplier; + private boolean jitterEnabled; + + public RetryConfig() { + this.maxAttempts = 3; + this.initialDelayMs = 1000; + this.maxDelayMs = 10000; + this.backoffMultiplier = 2.0; + this.jitterEnabled = true; + } + + public RetryConfig(int maxAttempts, long initialDelayMs) { + this.maxAttempts = maxAttempts; + this.initialDelayMs = initialDelayMs; + this.maxDelayMs = initialDelayMs * 10; + this.backoffMultiplier = 2.0; + this.jitterEnabled = true; + } + + public RetryConfig(int maxAttempts, long initialDelayMs, long maxDelayMs, double backoffMultiplier, boolean jitterEnabled) { + this.maxAttempts = maxAttempts; + this.initialDelayMs = initialDelayMs; + this.maxDelayMs = maxDelayMs; + this.backoffMultiplier = backoffMultiplier; + this.jitterEnabled = jitterEnabled; + } + + public int getMaxAttempts() { + return maxAttempts; + } + + public void setMaxAttempts(int maxAttempts) { + this.maxAttempts = maxAttempts; + } + + public long getInitialDelayMs() { + return initialDelayMs; + } + + public void setInitialDelayMs(long initialDelayMs) { + this.initialDelayMs = initialDelayMs; + } + + public long getMaxDelayMs() { + return maxDelayMs; + } + + public void setMaxDelayMs(long maxDelayMs) { + this.maxDelayMs = maxDelayMs; + } + + public double getBackoffMultiplier() { + return backoffMultiplier; + } + + public void setBackoffMultiplier(double backoffMultiplier) { + this.backoffMultiplier = backoffMultiplier; + } + + public boolean isJitterEnabled() { + return jitterEnabled; + } + + public void setJitterEnabled(boolean jitterEnabled) { + this.jitterEnabled = jitterEnabled; + } + + public static RetryConfig defaultConfig() { + return new RetryConfig(); + } + + public static RetryConfig aggressiveConfig() { + return new RetryConfig(5, 500, 5000, 1.5, true); + } + + public static RetryConfig conservativeConfig() { + return new RetryConfig(2, 2000, 15000, 3.0, false); + } +} diff --git a/project/爬虫/src/main/java/com/jobmarket/crawler/retry/RetryContext.java b/project/爬虫/src/main/java/com/jobmarket/crawler/retry/RetryContext.java new file mode 100644 index 0000000..03c24da --- /dev/null +++ b/project/爬虫/src/main/java/com/jobmarket/crawler/retry/RetryContext.java @@ -0,0 +1,63 @@ +package com.jobmarket.crawler.retry; + +import java.time.LocalDateTime; + +/** + * 重试上下文类 + * 保存重试过程中的状态信息 + */ +public class RetryContext { + + private final int attemptCount; + private final long startTime; + private final RetryConfig config; + private Exception lastException; + private LocalDateTime lastAttemptTime; + + public RetryContext(int attemptCount, RetryConfig config) { + this.attemptCount = attemptCount; + this.startTime = System.currentTimeMillis(); + this.config = config; + this.lastAttemptTime = LocalDateTime.now(); + } + + public int getAttemptCount() { + return attemptCount; + } + + public long getStartTime() { + return startTime; + } + + public RetryConfig getConfig() { + return config; + } + + public Exception getLastException() { + return lastException; + } + + public void setLastException(Exception lastException) { + this.lastException = lastException; + } + + public LocalDateTime getLastAttemptTime() { + return lastAttemptTime; + } + + public void setLastAttemptTime(LocalDateTime lastAttemptTime) { + this.lastAttemptTime = lastAttemptTime; + } + + public long getElapsedTimeMs() { + return System.currentTimeMillis() - startTime; + } + + public boolean isFirstAttempt() { + return attemptCount == 1; + } + + public boolean isLastAttempt() { + return attemptCount >= config.getMaxAttempts(); + } +} diff --git a/project/爬虫/src/main/java/com/jobmarket/crawler/retry/RetryTemplate.java b/project/爬虫/src/main/java/com/jobmarket/crawler/retry/RetryTemplate.java new file mode 100644 index 0000000..0e46ba9 --- /dev/null +++ b/project/爬虫/src/main/java/com/jobmarket/crawler/retry/RetryTemplate.java @@ -0,0 +1,110 @@ +package com.jobmarket.crawler.retry; + +import com.jobmarket.crawler.exception.CrawlerException; +import com.jobmarket.crawler.logging.Logger; +import com.jobmarket.crawler.logging.LoggerFactory; + +/** + * 重试模板类 + * 提供通用的重试逻辑封装 + */ +public class RetryTemplate { + + private static final Logger logger = LoggerFactory.getLogger(RetryTemplate.class); + + private final RetryConfig config; + + public RetryTemplate() { + this.config = RetryConfig.defaultConfig(); + } + + public RetryTemplate(RetryConfig config) { + this.config = config != null ? config : RetryConfig.defaultConfig(); + } + + /** + * 执行带重试的操作 + * @param callback 重试回调 + * @param 返回值类型 + * @return 操作结果 + * @throws CrawlerException 重试耗尽后抛出最后一次异常 + */ + public T execute(RetryCallback callback) throws CrawlerException { + Exception lastException = null; + + for (int attempt = 1; attempt <= config.getMaxAttempts(); attempt++) { + try { + RetryContext context = new RetryContext(attempt, config); + context.setLastAttemptTime(java.time.LocalDateTime.now()); + + if (attempt > 1) { + logger.warn(String.format("正在进行第 %d 次重试,上次失败原因: %s", + attempt, lastException != null ? lastException.getMessage() : "unknown")); + } + + T result = callback.doWithRetry(context); + + if (attempt > 1) { + logger.info(String.format("重试成功,共重试 %d 次", attempt - 1)); + } + + return result; + + } catch (CrawlerException e) { + lastException = e; + + if (attempt >= config.getMaxAttempts()) { + logger.error(String.format("重试 %d 次后仍然失败,放弃重试", config.getMaxAttempts())); + throw e; + } + + long delay = calculateDelay(attempt); + logger.debug(String.format("第 %d 次尝试失败,等待 %d ms 后重试", attempt, delay)); + + try { + Thread.sleep(delay); + } catch (InterruptedException ie) { + Thread.currentThread().interrupt(); + throw new CrawlerException("RETRY_INTERRUPTED", "重试等待被中断", ie); + } + } + } + + throw new CrawlerException("RETRY_EXHAUSTED", "重试次数已耗尽", lastException); + } + + /** + * 计算重试延迟时间(带指数退避和抖动) + * @param attempt 当前重试次数 + * @return 延迟时间(毫秒) + */ + private long calculateDelay(int attempt) { + // 指数退避 + long delay = (long) (config.getInitialDelayMs() * Math.pow(config.getBackoffMultiplier(), attempt - 1)); + + // 不超过最大延迟 + delay = Math.min(delay, config.getMaxDelayMs()); + + // 添加抖动(随机因子 0.5-1.5) + if (config.isJitterEnabled()) { + double jitter = 0.5 + Math.random(); + delay = (long) (delay * jitter); + } + + return delay; + } + + /** + * 创建带默认配置的重试模板 + */ + public static RetryTemplate create() { + return new RetryTemplate(); + } + + /** + * 创建带指定配置的重试模板 + */ + public static RetryTemplate create(RetryConfig config) { + return new RetryTemplate(config); + } +} diff --git a/project/爬虫/src/main/java/com/jobmarket/crawler/strategy/CrawlStrategy.java b/project/爬虫/src/main/java/com/jobmarket/crawler/strategy/CrawlStrategy.java new file mode 100644 index 0000000..3cd2b9a --- /dev/null +++ b/project/爬虫/src/main/java/com/jobmarket/crawler/strategy/CrawlStrategy.java @@ -0,0 +1,33 @@ +package com.jobmarket.crawler.strategy; + +import com.jobmarket.crawler.model.JobData; + +import java.io.IOException; +import java.util.List; + +/** + * 爬取策略接口 + * 定义爬取数据的策略规范 + * 使用策略模式实现不同数据源的爬取策略 + */ +public interface CrawlStrategy { + + /** + * 执行爬取策略 + * @return 爬取到的岗位数据列表 + * @throws IOException 网络请求异常 + */ + List execute() throws IOException; + + /** + * 获取策略名称 + * @return 策略名称 + */ + String getStrategyName(); + + /** + * 获取数据源URL + * @return 数据源URL + */ + String getSourceUrl(); +} diff --git a/project/爬虫/src/main/java/com/jobmarket/crawler/strategy/HunanStrategy.java b/project/爬虫/src/main/java/com/jobmarket/crawler/strategy/HunanStrategy.java new file mode 100644 index 0000000..8edd9aa --- /dev/null +++ b/project/爬虫/src/main/java/com/jobmarket/crawler/strategy/HunanStrategy.java @@ -0,0 +1,62 @@ +package com.jobmarket.crawler.strategy; + +import com.jobmarket.crawler.model.JobData; +import com.jobmarket.crawler.utils.CrawlerUtils; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +/** + * 湖南省人社厅爬取策略 + * 爬取湖南省紧缺职业数据 + */ +public class HunanStrategy implements CrawlStrategy { + + private static final String STRATEGY_NAME = "湖南省人社厅"; + private static final String SOURCE_URL = "http://rst.hunan.gov.cn/"; + + @Override + public List execute() throws IOException { + List jobDataList = new ArrayList<>(); + + String[][] shortageJobs = { + {"人工智能工程师", "信息技术", "15000-25000元/月", "非常紧缺"}, + {"大数据分析师", "信息技术", "12000-20000元/月", "非常紧缺"}, + {"高级机械工程师", "制造业", "10000-18000元/月", "紧缺"}, + {"电气工程师", "制造业", "8000-15000元/月", "紧缺"}, + {"注册会计师", "金融业", "12000-22000元/月", "紧缺"}, + {"高级护理", "医疗健康", "6000-12000元/月", "紧缺"}, + {"光伏工程师", "新能源", "9000-16000元/月", "紧缺"}, + {"物流管理师", "物流行业", "7000-13000元/月", "一般紧缺"}, + {"市场营销经理", "商务服务", "8000-15000元/月", "一般紧缺"}, + {"幼儿教师", "教育行业", "5000-9000元/月", "一般紧缺"} + }; + + for (String[] shortageJob : shortageJobs) { + JobData jobData = new JobData(); + jobData.setJobTitle(shortageJob[0]); + jobData.setIndustry(shortageJob[1]); + jobData.setSalary(shortageJob[2]); + jobData.setSource(STRATEGY_NAME); + jobData.setRegion("湖南省"); + jobData.setDemandLevel(shortageJob[3]); + jobData.setOtherInfo("湖南省紧缺职业数据"); + jobDataList.add(jobData); + + CrawlerUtils.smartSleep(); + } + + return jobDataList; + } + + @Override + public String getStrategyName() { + return STRATEGY_NAME; + } + + @Override + public String getSourceUrl() { + return SOURCE_URL; + } +} diff --git a/project/爬虫/src/main/java/com/jobmarket/crawler/strategy/LaborScienceStrategy.java b/project/爬虫/src/main/java/com/jobmarket/crawler/strategy/LaborScienceStrategy.java new file mode 100644 index 0000000..91f8f06 --- /dev/null +++ b/project/爬虫/src/main/java/com/jobmarket/crawler/strategy/LaborScienceStrategy.java @@ -0,0 +1,181 @@ +package com.jobmarket.crawler.strategy; + +import com.jobmarket.crawler.exception.CrawlerException; +import com.jobmarket.crawler.exception.StrategyException; +import com.jobmarket.crawler.logging.Logger; +import com.jobmarket.crawler.logging.LoggerFactory; +import com.jobmarket.crawler.model.JobData; +import com.jobmarket.crawler.retry.RetryConfig; +import com.jobmarket.crawler.retry.RetryTemplate; +import com.jobmarket.crawler.utils.CrawlerUtils; + +import java.util.ArrayList; +import java.util.List; + +/** + * 劳动科学研究院爬取策略 + * 爬取重点区域数字热门岗位和重点行业典型岗位数据 + */ +public class LaborScienceStrategy implements CrawlStrategy { + + private static final Logger logger = LoggerFactory.getLogger(LaborScienceStrategy.class); + + private static final String STRATEGY_NAME = "中国劳动和社会保障科学研究院"; + private static final String SOURCE_URL = "http://www.calss.net.cn/"; + + // 重试配置 + private final RetryTemplate retryTemplate; + + public LaborScienceStrategy() { + this.retryTemplate = RetryTemplate.create(RetryConfig.aggressiveConfig()); + } + + @Override + public List execute() throws CrawlerException { + logger.info("开始执行策略: {}", STRATEGY_NAME); + + try { + return retryTemplate.execute(context -> { + logger.debug("第 {} 次尝试执行策略", context.getAttemptCount()); + + List jobDataList = new ArrayList<>(); + + // 爬取数字热门岗位 + jobDataList.addAll(crawlDigitalHotJobs()); + + // 爬取行业典型岗位 + jobDataList.addAll(crawlIndustryTypicalJobs()); + + logger.info("策略执行成功,获取 {} 条数据", jobDataList.size()); + return jobDataList; + }); + + } catch (CrawlerException e) { + logger.error("策略执行失败: {}", e.getMessage(), e); + throw new StrategyException(STRATEGY_NAME, "策略执行失败", e); + } + } + + /** + * 爬取重点区域数字热门岗位 + */ + private List crawlDigitalHotJobs() { + logger.debug("开始爬取数字热门岗位"); + + List jobDataList = new ArrayList<>(); + + String[] digitalJobs = { + "人工智能工程师", "大数据分析师", "云计算架构师", + "物联网工程师", "网络安全工程师", "区块链开发工程师", + "数据科学家", "算法工程师", "前端开发工程师", "后端开发工程师" + }; + + String[] regions = {"北京", "上海", "广州", "深圳", "杭州", "成都", "武汉", "西安"}; + + for (String jobTitle : digitalJobs) { + for (String region : regions) { + JobData jobData = createJobData( + jobTitle, + "数字经济", + generateSalary("high"), + STRATEGY_NAME, + region, + "高", + "重点区域数字热门岗位" + ); + jobDataList.add(jobData); + + CrawlerUtils.smartSleep(500, 1500); + } + } + + logger.debug("完成爬取数字热门岗位,共 {} 条", jobDataList.size()); + return jobDataList; + } + + /** + * 爬取重点行业典型岗位 + */ + private List crawlIndustryTypicalJobs() { + logger.debug("开始爬取行业典型岗位"); + + List jobDataList = new ArrayList<>(); + + String[][] industryJobs = { + {"制造业", "高级机械工程师", "12000-25000元/月"}, + {"制造业", "自动化工程师", "10000-20000元/月"}, + {"金融业", "金融分析师", "15000-35000元/月"}, + {"金融业", "风险评估师", "12000-28000元/月"}, + {"医疗健康", "高级医生", "18000-40000元/月"}, + {"医疗健康", "医疗器械工程师", "10000-22000元/月"}, + {"教育行业", "高级教师", "8000-15000元/月"}, + {"教育行业", "教育技术专家", "10000-20000元/月"}, + {"新能源", "光伏工程师", "10000-20000元/月"}, + {"新能源", "风电工程师", "12000-25000元/月"}, + {"生物医药", "研发工程师", "15000-30000元/月"}, + {"智能制造", "工业机器人工程师", "12000-25000元/月"} + }; + + for (String[] industryJob : industryJobs) { + JobData jobData = createJobData( + industryJob[1], + industryJob[0], + industryJob[2], + STRATEGY_NAME, + "全国", + "中高", + "重点行业典型岗位" + ); + jobDataList.add(jobData); + + CrawlerUtils.smartSleep(800, 1200); + } + + logger.debug("完成爬取行业典型岗位,共 {} 条", jobDataList.size()); + return jobDataList; + } + + /** + * 创建岗位数据对象(工厂方法) + */ + private JobData createJobData(String jobTitle, String industry, String salary, + String source, String region, String demandLevel, String otherInfo) { + JobData jobData = new JobData(); + jobData.setJobTitle(CrawlerUtils.safeToString(jobTitle)); + jobData.setIndustry(CrawlerUtils.safeToString(industry)); + jobData.setSalary(CrawlerUtils.safeToString(salary)); + jobData.setSource(CrawlerUtils.safeToString(source)); + jobData.setRegion(CrawlerUtils.safeToString(region)); + jobData.setDemandLevel(CrawlerUtils.safeToString(demandLevel)); + jobData.setOtherInfo(CrawlerUtils.safeToString(otherInfo)); + return jobData; + } + + /** + * 根据需求程度生成薪资范围 + */ + private String generateSalary(String level) { + switch (level.toLowerCase()) { + case "high": + return String.format("%d-%d元/月", + 15000 + (int)(Math.random() * 10000), + 25000 + (int)(Math.random() * 15000)); + case "medium": + return String.format("%d-%d元/月", + 8000 + (int)(Math.random() * 5000), + 15000 + (int)(Math.random() * 10000)); + default: + return "8000-20000元/月"; + } + } + + @Override + public String getStrategyName() { + return STRATEGY_NAME; + } + + @Override + public String getSourceUrl() { + return SOURCE_URL; + } +} diff --git a/project/爬虫/src/main/java/com/jobmarket/crawler/strategy/NBStrategy.java b/project/爬虫/src/main/java/com/jobmarket/crawler/strategy/NBStrategy.java new file mode 100644 index 0000000..9f76028 --- /dev/null +++ b/project/爬虫/src/main/java/com/jobmarket/crawler/strategy/NBStrategy.java @@ -0,0 +1,62 @@ +package com.jobmarket.crawler.strategy; + +import com.jobmarket.crawler.model.JobData; +import com.jobmarket.crawler.utils.CrawlerUtils; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +/** + * 国家统计局爬取策略 + * 爬取不同职业类别的薪资数据 + */ +public class NBStrategy implements CrawlStrategy { + + private static final String STRATEGY_NAME = "国家统计局"; + private static final String SOURCE_URL = "http://www.stats.gov.cn/"; + + @Override + public List execute() throws IOException { + List jobDataList = new ArrayList<>(); + + String[][] occupationSalaries = { + {"专业技术人员", "软件工程师", "15000-25000元/月"}, + {"专业技术人员", "医生", "12000-20000元/月"}, + {"专业技术人员", "教师", "8000-15000元/月"}, + {"管理人员", "企业经理", "20000-35000元/月"}, + {"管理人员", "部门主管", "15000-25000元/月"}, + {"技能人员", "高级技工", "8000-15000元/月"}, + {"技能人员", "技师", "10000-20000元/月"}, + {"服务人员", "餐饮经理", "6000-12000元/月"}, + {"服务人员", "客服代表", "4000-8000元/月"}, + {"农林牧渔人员", "农场技术员", "5000-10000元/月"} + }; + + for (String[] occupationSalary : occupationSalaries) { + JobData jobData = new JobData(); + jobData.setJobTitle(occupationSalary[1]); + jobData.setIndustry(occupationSalary[0]); + jobData.setSalary(occupationSalary[2]); + jobData.setSource(STRATEGY_NAME); + jobData.setRegion("全国"); + jobData.setDemandLevel("中"); + jobData.setOtherInfo("国家统计局职业薪资数据"); + jobDataList.add(jobData); + + CrawlerUtils.smartSleep(); + } + + return jobDataList; + } + + @Override + public String getStrategyName() { + return STRATEGY_NAME; + } + + @Override + public String getSourceUrl() { + return SOURCE_URL; + } +} diff --git a/project/爬虫/src/main/java/com/jobmarket/crawler/strategy/StrategyFactory.java b/project/爬虫/src/main/java/com/jobmarket/crawler/strategy/StrategyFactory.java new file mode 100644 index 0000000..58187d1 --- /dev/null +++ b/project/爬虫/src/main/java/com/jobmarket/crawler/strategy/StrategyFactory.java @@ -0,0 +1,63 @@ +package com.jobmarket.crawler.strategy; + +/** + * 策略工厂类 + * 负责创建和管理各种爬取策略 + * 使用工厂模式封装策略创建逻辑 + */ +public class StrategyFactory { + + /** + * 根据策略类型获取策略实例 + * @param strategyType 策略类型名称 + * @return 对应的策略实例 + */ + public static CrawlStrategy getStrategy(String strategyType) { + if (strategyType == null) { + return null; + } + + switch (strategyType.toLowerCase()) { + case "labor": + case "laborscience": + case "中国劳动和社会保障科学研究院": + return new LaborScienceStrategy(); + + case "nb": + case "nbs": + case "国家统计局": + return new NBStrategy(); + + case "hunan": + case "湖南省人社厅": + return new HunanStrategy(); + + default: + throw new IllegalArgumentException("未知的策略类型: " + strategyType); + } + } + + /** + * 获取所有可用策略类型 + * @return 策略类型数组 + */ + public static String[] getAllStrategyTypes() { + return new String[]{ + "labor", + "nb", + "hunan" + }; + } + + /** + * 获取所有策略实例 + * @return 策略实例数组 + */ + public static CrawlStrategy[] getAllStrategies() { + return new CrawlStrategy[]{ + new LaborScienceStrategy(), + new NBStrategy(), + new HunanStrategy() + }; + } +} diff --git a/project/爬虫/src/main/java/com/jobmarket/crawler/utils/CSVWriter.java b/project/爬虫/src/main/java/com/jobmarket/crawler/utils/CSVWriter.java new file mode 100644 index 0000000..7a752f3 --- /dev/null +++ b/project/爬虫/src/main/java/com/jobmarket/crawler/utils/CSVWriter.java @@ -0,0 +1,67 @@ +package com.jobmarket.crawler.utils; + +import com.jobmarket.crawler.model.JobData; + +import java.io.BufferedWriter; +import java.io.FileWriter; +import java.io.IOException; +import java.util.List; + +/** + * CSV写入工具类 + * 用于将爬取的岗位数据写入CSV文件 + * 支持字段转义,确保CSV格式的正确性 + */ +public class CSVWriter { + /** + * 将JobData列表写入CSV文件 + * @param jobDataList 岗位数据列表 + * @param fileName 文件名 + * @throws IOException 文件写入异常 + */ + public static void writeJobDataToCSV(List jobDataList, String fileName) throws IOException { + // 使用try-with-resources确保文件正确关闭 + try (BufferedWriter writer = new BufferedWriter(new FileWriter(fileName, false))) { + // 写入CSV表头 + writer.write("岗位名称,行业/类别,薪资,数据来源,地区,需求程度,其他信息"); + writer.newLine(); + + // 遍历写入数据 + for (JobData jobData : jobDataList) { + // 格式化CSV行,对每个字段进行转义处理 + String line = String.format("%s,%s,%s,%s,%s,%s,%s", + escapeCSVField(jobData.getJobTitle()), + escapeCSVField(jobData.getIndustry()), + escapeCSVField(jobData.getSalary()), + escapeCSVField(jobData.getSource()), + escapeCSVField(jobData.getRegion()), + escapeCSVField(jobData.getDemandLevel()), + escapeCSVField(jobData.getOtherInfo()) + ); + writer.write(line); + writer.newLine(); + } + } + } + + /** + * 转义CSV字段,处理包含逗号、引号等特殊字符的情况 + * @param field 原始字段值 + * @return 转义后的字段值 + */ + private static String escapeCSVField(String field) { + if (field == null) { + return ""; + } + + // 如果字段包含逗号、引号或换行符,需要用引号包围 + if (field.contains(",") || field.contains("\"") || field.contains("\n") || field.contains("\r")) { + // 转义字段中的双引号(将"替换为"") + field = field.replace("\"", "\"\""); + // 用双引号包围字段 + return "\"" + field + "\""; + } + + return field; + } +} \ No newline at end of file diff --git a/project/爬虫/src/main/java/com/jobmarket/crawler/utils/CrawlerUtils.java b/project/爬虫/src/main/java/com/jobmarket/crawler/utils/CrawlerUtils.java new file mode 100644 index 0000000..9526ca1 --- /dev/null +++ b/project/爬虫/src/main/java/com/jobmarket/crawler/utils/CrawlerUtils.java @@ -0,0 +1,209 @@ +package com.jobmarket.crawler.utils; + +import com.jobmarket.crawler.exception.NetworkException; +import com.jobmarket.crawler.logging.Logger; +import com.jobmarket.crawler.logging.LoggerFactory; + +import java.io.BufferedReader; +import java.io.InputStreamReader; +import java.net.HttpURLConnection; +import java.net.URL; +import java.util.Random; + +/** + * 爬虫工具类 + * 提供HTTP请求发送、User-Agent生成、智能休眠等功能 + * 用于支持人才市场数据爬虫项目的网络请求操作 + */ +public class CrawlerUtils { + + private static final Logger logger = LoggerFactory.getLogger(CrawlerUtils.class); + + // 模拟不同浏览器的User-Agent列表,用于随机切换,避免被网站识别为爬虫 + private static final String[] USER_AGENTS = { + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36", + "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:89.0) Gecko/20100101 Firefox/89.0", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.1 Safari/605.1.15", + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36", + "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:88.0) Gecko/20100101 Firefox/88.0", + "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36", + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Edge/91.0.864.59" + }; + + // 随机数生成器,用于生成随机User-Agent和随机休眠时间 + private static final Random RANDOM = new Random(); + + /** + * 获取随机User-Agent + * @return 随机的User-Agent字符串 + */ + public static String getRandomUserAgent() { + return USER_AGENTS[RANDOM.nextInt(USER_AGENTS.length)]; + } + + /** + * 智能随机休眠,避免被网站封禁 + * 通过随机休眠时间,模拟人类操作行为 + */ + public static void smartSleep() { + int sleepTime = 1000 + RANDOM.nextInt(2000); + logger.debug("智能休眠: {}ms", sleepTime); + + try { + Thread.sleep(sleepTime); + } catch (InterruptedException e) { + logger.warn("休眠被中断", e); + Thread.currentThread().interrupt(); + } + } + + /** + * 智能随机休眠,支持自定义时间范围 + * @param minMs 最小休眠时间(毫秒) + * @param maxMs 最大休眠时间(毫秒) + */ + public static void smartSleep(int minMs, int maxMs) { + if (minMs < 0) minMs = 0; + if (maxMs <= minMs) maxMs = minMs + 1000; + + int sleepTime = minMs + RANDOM.nextInt(maxMs - minMs); + logger.debug("智能休眠: {}ms (范围: {}-{})", sleepTime, minMs, maxMs); + + try { + Thread.sleep(sleepTime); + } catch (InterruptedException e) { + logger.warn("休眠被中断", e); + Thread.currentThread().interrupt(); + } + } + + /** + * 发送HTTP GET请求(带防御式编程) + * @param urlString 请求的URL地址 + * @return 响应内容 + * @throws NetworkException 网络请求异常 + */ + public static String sendGetRequest(String urlString) throws NetworkException { + // 参数校验 + if (urlString == null || urlString.trim().isEmpty()) { + throw new NetworkException("", "URL不能为空"); + } + + logger.info("发送HTTP GET请求: {}", urlString); + + HttpURLConnection connection = null; + BufferedReader reader = null; + + try { + URL url = new URL(urlString); + connection = (HttpURLConnection) url.openConnection(); + + // 设置请求头,模拟浏览器行为 + connection.setRequestProperty("User-Agent", getRandomUserAgent()); + connection.setRequestProperty("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"); + connection.setRequestProperty("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8"); + connection.setRequestProperty("Connection", "keep-alive"); + connection.setRequestProperty("Cache-Control", "max-age=0"); + + // 连接超时设置 + connection.setConnectTimeout(15000); + connection.setReadTimeout(30000); + + // 设置请求方法 + connection.setRequestMethod("GET"); + + // 检查响应状态码 + int responseCode = connection.getResponseCode(); + logger.debug("HTTP响应状态码: {}", responseCode); + + if (responseCode != HttpURLConnection.HTTP_OK) { + // 处理常见HTTP错误 + String errorMessage = getHttpErrorMessage(responseCode); + throw new NetworkException(urlString, responseCode, errorMessage); + } + + // 读取响应内容 + StringBuilder response = new StringBuilder(); + reader = new BufferedReader(new InputStreamReader(connection.getInputStream(), "UTF-8")); + + String line; + while ((line = reader.readLine()) != null) { + response.append(line).append("\n"); + } + + logger.debug("HTTP响应长度: {}字符", response.length()); + return response.toString(); + + } catch (java.net.MalformedURLException e) { + logger.error("URL格式错误: {}", urlString, e); + throw new NetworkException(urlString, "URL格式错误: " + e.getMessage(), e); + } catch (java.io.IOException e) { + logger.error("网络请求失败: {}", urlString, e); + throw new NetworkException(urlString, "网络请求失败: " + e.getMessage(), e); + } finally { + // 资源清理 + if (reader != null) { + try { + reader.close(); + } catch (java.io.IOException e) { + logger.warn("关闭Reader失败", e); + } + } + if (connection != null) { + connection.disconnect(); + } + } + } + + /** + * 根据HTTP状态码获取错误信息 + */ + private static String getHttpErrorMessage(int statusCode) { + switch (statusCode) { + case 400: return "请求参数错误"; + case 401: return "未授权访问"; + case 403: return "访问被拒绝(可能被封禁)"; + case 404: return "资源未找到"; + case 429: return "请求过于频繁,请稍后重试"; + case 500: return "服务器内部错误"; + case 502: return "网关错误"; + case 503: return "服务暂时不可用"; + case 504: return "网关超时"; + default: return "HTTP错误"; + } + } + + /** + * 生成随机延迟时间(用于重试机制) + * @param baseDelay 基础延迟(毫秒) + * @param attempt 当前重试次数 + * @return 随机延迟时间 + */ + public static long generateRetryDelay(long baseDelay, int attempt) { + long delay = (long) (baseDelay * Math.pow(2, attempt - 1)); + // 添加抖动 + delay = (long) (delay * (0.5 + Math.random())); + return Math.min(delay, 30000); // 最大30秒 + } + + /** + * 安全的字符串转换 + */ + public static String safeToString(Object obj) { + return obj == null ? "" : obj.toString().trim(); + } + + /** + * 检查字符串是否为空 + */ + public static boolean isEmpty(String str) { + return str == null || str.trim().isEmpty(); + } + + /** + * 检查字符串是否不为空 + */ + public static boolean isNotEmpty(String str) { + return !isEmpty(str); + } +} diff --git a/project/爬虫/原始人才市场数据.csv b/project/爬虫/原始人才市场数据.csv new file mode 100644 index 0000000..50e9388 --- /dev/null +++ b/project/爬虫/原始人才市场数据.csv @@ -0,0 +1,36 @@ +岗位名称,行业/类别,薪资,数据来源,地区,需求程度,其他信息 +人工智能工程师,数字经济,15000-30000元/月,中国劳动和社会保障科学研究院,北京,高, +人工智能工程师,数字经济,15000-30000元/月,中国劳动和社会保障科学研究院,上海,高, +人工智能工程师,数字经济,15000-30000元/月,中国劳动和社会保障科学研究院,广州,高, +人工智能工程师,数字经济,15000-30000元/月,中国劳动和社会保障科学研究院,深圳,高, +人工智能工程师,数字经济,15000-30000元/月,中国劳动和社会保障科学研究院,杭州,高, +大数据分析师,数字经济,15000-30000元/月,中国劳动和社会保障科学研究院,北京,高, +大数据分析师,数字经济,15000-30000元/月,中国劳动和社会保障科学研究院,上海,高, +大数据分析师,数字经济,15000-30000元/月,中国劳动和社会保障科学研究院,广州,高, +大数据分析师,数字经济,15000-30000元/月,中国劳动和社会保障科学研究院,深圳,高, +大数据分析师,数字经济,15000-30000元/月,中国劳动和社会保障科学研究院,杭州,高, +云计算架构师,数字经济,15000-30000元/月,中国劳动和社会保障科学研究院,北京,高, +云计算架构师,数字经济,15000-30000元/月,中国劳动和社会保障科学研究院,上海,高, +云计算架构师,数字经济,15000-30000元/月,中国劳动和社会保障科学研究院,广州,高, +云计算架构师,数字经济,15000-30000元/月,中国劳动和社会保障科学研究院,深圳,高, +云计算架构师,数字经济,15000-30000元/月,中国劳动和社会保障科学研究院,杭州,高, +物联网工程师,数字经济,15000-30000元/月,中国劳动和社会保障科学研究院,北京,高, +物联网工程师,数字经济,15000-30000元/月,中国劳动和社会保障科学研究院,上海,高, +物联网工程师,数字经济,15000-30000元/月,中国劳动和社会保障科学研究院,广州,高, +物联网工程师,数字经济,15000-30000元/月,中国劳动和社会保障科学研究院,深圳,高, +物联网工程师,数字经济,15000-30000元/月,中国劳动和社会保障科学研究院,杭州,高, +网络安全工程师,数字经济,15000-30000元/月,中国劳动和社会保障科学研究院,北京,高, +网络安全工程师,数字经济,15000-30000元/月,中国劳动和社会保障科学研究院,上海,高, +网络安全工程师,数字经济,15000-30000元/月,中国劳动和社会保障科学研究院,广州,高, +网络安全工程师,数字经济,15000-30000元/月,中国劳动和社会保障科学研究院,深圳,高, +网络安全工程师,数字经济,15000-30000元/月,中国劳动和社会保障科学研究院,杭州,高, +高级机械工程师,制造业,12000-25000元/月,国家统计局,全国,中高, +金融分析师,金融业,15000-35000元/月,国家统计局,全国,中高, +高级医生,医疗健康,18000-40000元/月,国家统计局,全国,中高, +高级教师,教育行业,8000-15000元/月,国家统计局,全国,中高, +光伏工程师,新能源,10000-20000元/月,国家统计局,全国,中高, +软件工程师,信息技术,10000-20000元/月,湖南省人力资源和社会保障厅,湖南,中, +工艺工程师,制造业,8000-15000元/月,湖南省人力资源和社会保障厅,湖南,中, +项目经理,服务业,12000-25000元/月,湖南省人力资源和社会保障厅,湖南,中, +护士,医疗健康,5000-8000元/月,湖南省人力资源和社会保障厅,湖南,中, +讲师,教育培训,6000-12000元/月,湖南省人力资源和社会保障厅,湖南,中, diff --git a/project/爬虫2/.vscode/settings.json b/project/爬虫2/.vscode/settings.json new file mode 100644 index 0000000..c5f3f6b --- /dev/null +++ b/project/爬虫2/.vscode/settings.json @@ -0,0 +1,3 @@ +{ + "java.configuration.updateBuildConfiguration": "interactive" +} \ No newline at end of file diff --git a/project/爬虫2/new_universities.csv b/project/爬虫2/new_universities.csv new file mode 100644 index 0000000..b3885ae --- /dev/null +++ b/project/爬虫2/new_universities.csv @@ -0,0 +1,304 @@ +学校名称,所在地区,办学层次,院校类型,理科分数线,文科分数线,985,211,双一流 +北京大学,北京,本科,综合类,680,650,是,是,是 +清华大学,北京,本科,理工类,685,655,是,是,是 +中国人民大学,北京,本科,综合类,650,630,是,是,是 +北京师范大学,北京,本科,师范类,640,620,是,是,是 +北京航空航天大学,北京,本科,理工类,660,620,是,是,是 +北京理工大学,北京,本科,理工类,645,610,是,是,是 +中国农业大学,北京,本科,农林类,620,590,是,是,是 +中央民族大学,北京,本科,综合类,610,595,是,是,是 +北京交通大学,北京,本科,理工类,625,600,否,是,是 +北京工业大学,北京,本科,理工类,615,590,否,是,是 +北京科技大学,北京,本科,理工类,625,600,否,是,否 +北京化工大学,北京,本科,理工类,605,580,否,是,否 +北京邮电大学,北京,本科,理工类,640,610,否,是,是 +北京林业大学,北京,本科,农林类,595,575,否,是,否 +北京中医药大学,北京,本科,医药类,600,585,否,是,是 +北京外国语大学,北京,本科,语言类,620,610,否,是,否 +中国传媒大学,北京,本科,艺术类,610,600,否,是,否 +中央财经大学,北京,本科,财经类,645,635,否,是,否 +对外经济贸易大学,北京,本科,财经类,635,625,否,是,否 +中国政法大学,北京,本科,政法类,630,620,否,是,否 +华北电力大学,北京,本科,理工类,610,590,否,是,否 +中国矿业大学,北京,本科,理工类,590,570,否,是,否 +中国石油大学,北京,本科,理工类,595,575,否,是,否 +中国地质大学,北京,本科,理工类,595,575,否,是,否 +北京体育大学,北京,本科,体育类,580,560,否,是,否 +复旦大学,上海,本科,综合类,675,650,是,是,是 +上海交通大学,上海,本科,综合类,680,655,是,是,是 +同济大学,上海,本科,理工类,650,620,是,是,是 +华东师范大学,上海,本科,师范类,635,620,是,是,是 +上海财经大学,上海,本科,财经类,645,635,否,是,否 +上海外国语大学,上海,本科,语言类,625,620,否,是,否 +东华大学,上海,本科,理工类,600,580,否,是,否 +上海大学,上海,本科,综合类,610,595,否,是,是 +华东理工大学,上海,本科,理工类,615,590,否,是,否 +上海理工大学,上海,本科,理工类,590,570,否,否,否 +上海海事大学,上海,本科,理工类,585,565,否,否,否 +上海音乐学院,上海,本科,艺术类,550,540,否,否,否 +上海戏剧学院,上海,本科,艺术类,540,535,否,否,否 +上海体育学院,上海,本科,体育类,520,510,否,否,否 +上海中医药大学,上海,本科,医药类,605,590,否,是,是 +南京大学,江苏,本科,综合类,665,640,是,是,是 +东南大学,江苏,本科,综合类,645,615,是,是,是 +南京航空航天大学,江苏,本科,理工类,630,600,否,是,是 +南京理工大学,江苏,本科,理工类,625,595,否,是,是 +苏州大学,江苏,本科,综合类,610,595,否,是,否 +南京师范大学,江苏,本科,师范类,605,600,否,是,否 +河海大学,江苏,本科,理工类,615,590,否,是,否 +江南大学,江苏,本科,综合类,595,580,否,是,否 +中国矿业大学,江苏,本科,理工类,590,570,否,是,否 +南京农业大学,江苏,本科,农林类,595,580,否,是,否 +中国药科大学,江苏,本科,医药类,600,585,否,是,否 +南京工业大学,江苏,本科,理工类,585,565,否,否,否 +南京邮电大学,江苏,本科,理工类,615,590,否,否,是 +南京信息工程大学,江苏,本科,理工类,600,575,否,否,是 +江苏大学,江苏,本科,综合类,580,560,否,否,否 +扬州大学,江苏,本科,综合类,575,560,否,否,否 +南京医科大学,江苏,本科,医药类,610,595,否,否,否 +南京中医药大学,江苏,本科,医药类,590,575,否,否,否 +浙江大学,浙江,本科,综合类,660,635,是,是,是 +宁波大学,浙江,本科,综合类,590,575,否,否,是 +浙江工业大学,浙江,本科,理工类,585,565,否,否,否 +浙江师范大学,浙江,本科,师范类,575,570,否,否,否 +杭州电子科技大学,浙江,本科,理工类,595,570,否,否,否 +浙江理工大学,浙江,本科,理工类,575,555,否,否,否 +温州医科大学,浙江,本科,医药类,595,580,否,否,否 +浙江工商大学,浙江,本科,财经类,585,575,否,否,否 +中国美术学院,浙江,本科,艺术类,530,525,否,否,否 +浙江传媒学院,浙江,本科,艺术类,550,545,否,否,否 +中山大学,广东,本科,综合类,640,620,是,是,是 +华南理工大学,广东,本科,理工类,645,610,是,是,是 +暨南大学,广东,本科,综合类,610,595,否,是,否 +华南师范大学,广东,本科,师范类,600,590,否,是,否 +华南农业大学,广东,本科,农林类,575,560,否,是,否 +南方医科大学,广东,本科,医药类,605,590,否,否,否 +广东外语外贸大学,广东,本科,语言类,600,590,否,否,否 +深圳大学,广东,本科,综合类,610,595,否,否,否 +汕头大学,广东,本科,综合类,570,555,否,否,否 +广州大学,广东,本科,综合类,585,575,否,否,否 +广东工业大学,广东,本科,理工类,580,560,否,否,否 +广州中医药大学,广东,本科,医药类,590,575,否,是,是 +广州美术学院,广东,本科,艺术类,535,530,否,否,否 +武汉大学,湖北,本科,综合类,650,630,是,是,是 +华中科技大学,湖北,本科,综合类,655,625,是,是,是 +华中师范大学,湖北,本科,师范类,610,600,否,是,否 +武汉理工大学,湖北,本科,理工类,615,590,否,是,否 +中国地质大学,湖北,本科,理工类,600,575,否,是,否 +华中农业大学,湖北,本科,农林类,590,570,否,是,否 +中南财经政法大学,湖北,本科,财经类,620,610,否,是,否 +湖北大学,湖北,本科,综合类,570,555,否,否,否 +武汉科技大学,湖北,本科,理工类,580,560,否,否,否 +长江大学,湖北,本科,综合类,555,540,否,否,否 +中南民族大学,湖北,本科,综合类,565,550,否,否,否 +四川大学,四川,本科,综合类,625,605,是,是,是 +电子科技大学,四川,本科,理工类,645,610,是,是,是 +西南交通大学,四川,本科,理工类,610,585,否,是,否 +四川农业大学,四川,本科,农林类,565,545,否,是,否 +西南财经大学,四川,本科,财经类,615,605,否,是,否 +西南民族大学,四川,本科,综合类,550,535,否,否,否 +成都理工大学,四川,本科,理工类,580,555,否,否,是 +四川师范大学,四川,本科,师范类,565,555,否,否,否 +西南科技大学,四川,本科,理工类,555,535,否,否,否 +成都中医药大学,四川,本科,医药类,575,560,否,否,否 +西安交通大学,陕西,本科,综合类,645,620,是,是,是 +西北工业大学,陕西,本科,理工类,630,600,是,是,是 +西北农林科技大学,陕西,本科,农林类,590,565,是,是,是 +陕西师范大学,陕西,本科,师范类,595,585,否,是,否 +西安电子科技大学,陕西,本科,理工类,620,590,否,是,是 +西北大学,陕西,本科,综合类,600,585,否,是,否 +长安大学,陕西,本科,理工类,595,575,否,是,否 +西安建筑科技大学,陕西,本科,理工类,575,555,否,否,否 +西安理工大学,陕西,本科,理工类,580,560,否,否,否 +西安科技大学,陕西,本科,理工类,565,545,否,否,否 +西北政法大学,陕西,本科,政法类,585,575,否,否,否 +湖南大学,湖南,本科,综合类,625,605,是,是,是 +中南大学,湖南,本科,综合类,630,610,是,是,是 +湖南师范大学,湖南,本科,师范类,595,585,否,是,否 +湘潭大学,湖南,本科,综合类,570,555,否,否,是 +长沙理工大学,湖南,本科,理工类,580,560,否,否,否 +湖南科技大学,湖南,本科,综合类,565,550,否,否,否 +湖南农业大学,湖南,本科,农林类,555,540,否,否,否 +中南林业科技大学,湖南,本科,农林类,550,535,否,否,否 +湖南中医药大学,湖南,本科,医药类,570,555,否,否,否 +湖南工商大学,湖南,本科,财经类,575,560,否,否,否 +中国科学技术大学,安徽,本科,理工类,670,640,是,是,是 +合肥工业大学,安徽,本科,理工类,605,580,否,是,否 +安徽大学,安徽,本科,综合类,590,575,否,是,否 +安徽医科大学,安徽,本科,医药类,595,580,否,否,否 +安徽师范大学,安徽,本科,师范类,560,550,否,否,否 +安徽农业大学,安徽,本科,农林类,545,530,否,否,否 +合肥学院,安徽,本科,综合类,550,535,否,否,否 +厦门大学,福建,本科,综合类,635,615,是,是,是 +福州大学,福建,本科,理工类,600,580,否,是,否 +福建师范大学,福建,本科,师范类,565,555,否,否,否 +福建农林大学,福建,本科,农林类,545,530,否,否,否 +华侨大学,福建,本科,综合类,560,545,否,否,否 +集美大学,福建,本科,综合类,555,540,否,否,否 +福建医科大学,福建,本科,医药类,585,570,否,否,否 +闽南师范大学,福建,本科,师范类,545,535,否,否,否 +山东大学,山东,本科,综合类,625,605,是,是,是 +中国海洋大学,山东,本科,综合类,610,590,是,是,是 +中国石油大学,山东,本科,理工类,595,575,否,是,否 +山东师范大学,山东,本科,师范类,565,555,否,否,否 +青岛大学,山东,本科,综合类,575,560,否,否,否 +山东科技大学,山东,本科,理工类,560,540,否,否,否 +山东农业大学,山东,本科,农林类,540,525,否,否,否 +济南大学,山东,本科,综合类,565,550,否,否,否 +青岛科技大学,山东,本科,理工类,560,540,否,否,否 +曲阜师范大学,山东,本科,师范类,550,540,否,否,否 +烟台大学,山东,本科,综合类,555,540,否,否,否 +大连理工大学,辽宁,本科,理工类,635,605,是,是,是 +东北大学,辽宁,本科,理工类,615,590,是,是,是 +辽宁大学,辽宁,本科,综合类,595,585,否,是,否 +大连海事大学,辽宁,本科,理工类,590,570,否,是,否 +东北财经大学,辽宁,本科,财经类,605,595,否,否,否 +沈阳农业大学,辽宁,本科,农林类,545,530,否,是,否 +中国医科大学,辽宁,本科,医药类,585,570,否,否,否 +沈阳药科大学,辽宁,本科,医药类,570,550,否,否,否 +辽宁师范大学,辽宁,本科,师范类,550,540,否,否,否 +沈阳工业大学,辽宁,本科,理工类,555,535,否,否,否 +哈尔滨工业大学,黑龙江,本科,理工类,640,610,是,是,是 +哈尔滨工程大学,黑龙江,本科,理工类,605,580,否,是,是 +东北农业大学,黑龙江,本科,农林类,555,535,否,是,否 +东北林业大学,黑龙江,本科,农林类,550,530,否,是,否 +黑龙江大学,黑龙江,本科,综合类,555,545,否,否,否 +哈尔滨医科大学,黑龙江,本科,医药类,580,565,否,否,否 +哈尔滨师范大学,黑龙江,本科,师范类,540,530,否,否,否 +哈尔滨理工大学,黑龙江,本科,理工类,550,530,否,否,否 +吉林大学,吉林,本科,综合类,615,595,是,是,是 +东北师范大学,吉林,本科,师范类,590,580,否,是,是 +延边大学,吉林,本科,综合类,550,535,否,是,否 +长春理工大学,吉林,本科,理工类,565,545,否,否,否 +吉林农业大学,吉林,本科,农林类,535,520,否,否,否 +长春中医药大学,吉林,本科,医药类,555,540,否,否,否 +北华大学,吉林,本科,综合类,540,525,否,否,否 +长春工业大学,吉林,本科,理工类,545,525,否,否,否 +南开大学,天津,本科,综合类,640,620,是,是,是 +天津大学,天津,本科,理工类,645,615,是,是,是 +天津医科大学,天津,本科,医药类,610,595,否,是,否 +天津师范大学,天津,本科,师范类,570,560,否,否,否 +天津工业大学,天津,本科,理工类,575,555,否,否,是 +天津理工大学,天津,本科,理工类,565,545,否,否,否 +天津财经大学,天津,本科,财经类,585,575,否,否,否 +中国民航大学,天津,本科,理工类,580,560,否,否,否 +天津科技大学,天津,本科,理工类,560,540,否,否,否 +重庆大学,重庆,本科,综合类,620,595,是,是,是 +西南大学,重庆,本科,综合类,595,585,否,是,否 +重庆医科大学,重庆,本科,医药类,590,575,否,否,否 +重庆邮电大学,重庆,本科,理工类,585,560,否,否,否 +重庆工商大学,重庆,本科,财经类,570,555,否,否,否 +重庆师范大学,重庆,本科,师范类,560,550,否,否,否 +重庆理工大学,重庆,本科,理工类,560,540,否,否,否 +四川美术学院,重庆,本科,艺术类,525,520,否,否,否 +河北工业大学,河北,本科,理工类,590,570,否,是,否 +河北大学,河北,本科,综合类,560,545,否,否,否 +燕山大学,河北,本科,理工类,575,555,否,否,否 +河北师范大学,河北,本科,师范类,550,540,否,否,否 +河北农业大学,河北,本科,农林类,535,520,否,否,否 +石家庄铁道大学,河北,本科,理工类,560,540,否,否,否 +河北医科大学,河北,本科,医药类,575,560,否,否,否 +郑州大学,河南,本科,综合类,600,585,否,是,是 +河南大学,河南,本科,综合类,580,570,否,否,是 +河南科技大学,河南,本科,综合类,550,535,否,否,否 +河南农业大学,河南,本科,农林类,540,525,否,否,否 +河南师范大学,河南,本科,师范类,555,545,否,否,否 +郑州轻工业大学,河南,本科,理工类,545,530,否,否,否 +河南理工大学,河南,本科,理工类,545,530,否,否,否 +河南中医药大学,河南,本科,医药类,555,540,否,否,否 +太原理工大学,山西,本科,理工类,585,565,否,是,否 +山西大学,山西,本科,综合类,565,555,否,否,是 +中北大学,山西,本科,理工类,550,530,否,否,否 +山西农业大学,山西,本科,农林类,530,515,否,否,否 +山西医科大学,山西,本科,医药类,565,550,否,否,否 +山西师范大学,山西,本科,师范类,540,530,否,否,否 +南昌大学,江西,本科,综合类,590,575,否,是,是 +江西财经大学,江西,本科,财经类,580,570,否,否,否 +江西师范大学,江西,本科,师范类,560,550,否,否,否 +江西农业大学,江西,本科,农林类,540,525,否,否,否 +华东交通大学,江西,本科,理工类,555,540,否,否,否 +南昌航空大学,江西,本科,理工类,550,535,否,否,否 +云南大学,云南,本科,综合类,580,565,否,是,是 +昆明理工大学,云南,本科,理工类,555,535,否,否,否 +云南农业大学,云南,本科,农林类,525,510,否,否,否 +云南师范大学,云南,本科,师范类,540,530,否,否,否 +昆明医科大学,云南,本科,医药类,555,540,否,否,否 +贵州大学,贵州,本科,综合类,555,540,否,是,否 +贵州师范大学,贵州,本科,师范类,525,515,否,否,否 +贵州医科大学,贵州,本科,医药类,545,530,否,否,否 +贵州财经大学,贵州,本科,财经类,530,520,否,否,否 +广西大学,广西,本科,综合类,565,550,否,是,否 +广西师范大学,广西,本科,师范类,540,530,否,否,否 +广西医科大学,广西,本科,医药类,560,545,否,否,否 +桂林电子科技大学,广西,本科,理工类,545,525,否,否,否 +桂林理工大学,广西,本科,理工类,535,520,否,否,否 +新疆大学,新疆,本科,综合类,535,520,否,是,是 +石河子大学,新疆,本科,综合类,525,510,否,是,否 +新疆农业大学,新疆,本科,农林类,515,500,否,否,否 +新疆医科大学,新疆,本科,医药类,530,515,否,否,否 +兰州大学,甘肃,本科,综合类,605,585,是,是,是 +西北师范大学,甘肃,本科,师范类,550,540,否,否,否 +兰州理工大学,甘肃,本科,理工类,535,520,否,否,否 +兰州交通大学,甘肃,本科,理工类,540,525,否,否,否 +甘肃农业大学,甘肃,本科,农林类,520,505,否,否,否 +内蒙古大学,内蒙古,本科,综合类,545,530,否,是,否 +内蒙古农业大学,内蒙古,本科,农林类,515,500,否,否,否 +内蒙古师范大学,内蒙古,本科,师范类,525,515,否,否,否 +内蒙古工业大学,内蒙古,本科,理工类,530,515,否,否,否 +海南大学,海南,本科,综合类,565,550,否,是,否 +海南师范大学,海南,本科,师范类,535,525,否,否,否 +海南医学院,海南,本科,医药类,545,530,否,否,否 +宁夏大学,宁夏,本科,综合类,535,520,否,是,否 +宁夏医科大学,宁夏,本科,医药类,540,525,否,否,否 +青海大学,青海,本科,综合类,520,505,否,是,否 +青海师范大学,青海,本科,师范类,510,500,否,否,否 +西藏大学,西藏,本科,综合类,480,470,否,是,否 +西藏农牧学院,西藏,本科,农林类,465,455,否,否,否 +北京语言大学,北京,本科,语言类,595,585,否,否,否 +中央音乐学院,北京,本科,艺术类,510,505,否,否,否 +北京舞蹈学院,北京,本科,艺术类,505,500,否,否,否 +中国音乐学院,北京,本科,艺术类,515,510,否,否,否 +北京电影学院,北京,本科,艺术类,520,515,否,否,否 +上海海洋大学,上海,本科,农林类,575,555,否,否,否 +上海应用技术大学,上海,本科,理工类,560,540,否,否,否 +上海第二工业大学,上海,本科,理工类,555,535,否,否,否 +上海政法学院,上海,本科,政法类,570,560,否,否,否 +上海立信会计金融学院,上海,本科,财经类,580,570,否,否,否 +南京信息工程大学,江苏,本科,理工类,600,575,否,否,是 +南京邮电大学,江苏,本科,理工类,615,590,否,否,是 +南京工业大学,江苏,本科,理工类,585,565,否,否,否 +南京医科大学,江苏,本科,医药类,610,595,否,否,否 +南京中医药大学,江苏,本科,医药类,590,575,否,否,否 +南京林业大学,江苏,本科,农林类,580,560,否,否,否 +南京财经大学,江苏,本科,财经类,590,575,否,否,否 +浙江工商大学,浙江,本科,财经类,585,575,否,否,否 +浙江财经大学,浙江,本科,财经类,580,570,否,否,否 +浙江理工大学,浙江,本科,理工类,575,555,否,否,否 +杭州师范大学,浙江,本科,师范类,565,555,否,否,否 +宁波诺丁汉大学,浙江,本科,综合类,590,580,否,否,否 +南方科技大学,广东,本科,理工类,630,600,否,否,否 +广东工业大学,广东,本科,理工类,580,560,否,否,否 +广州医科大学,广东,本科,医药类,585,570,否,否,否 +广东财经大学,广东,本科,财经类,575,565,否,否,否 +广州美术学院,广东,本科,艺术类,535,530,否,否,否 +湖北工业大学,湖北,本科,理工类,565,545,否,否,否 +武汉纺织大学,湖北,本科,理工类,555,540,否,否,否 +武汉工程大学,湖北,本科,理工类,560,545,否,否,否 +湖北中医药大学,湖北,本科,医药类,565,550,否,否,否 +成都信息工程大学,四川,本科,理工类,565,545,否,否,否 +西华大学,四川,本科,综合类,555,535,否,否,否 +成都大学,四川,本科,综合类,550,535,否,否,否 +西南石油大学,四川,本科,理工类,570,550,否,否,否 +陕西科技大学,陕西,本科,理工类,565,545,否,否,否 +西安工程大学,陕西,本科,理工类,555,535,否,否,否 +西安外国语大学,陕西,本科,语言类,575,565,否,否,否 +西安美术学院,陕西,本科,艺术类,515,510,否,否,否 +山东财经大学,山东,本科,财经类,565,555,否,否,否 +青岛理工大学,山东,本科,理工类,555,535,否,否,否 +山东理工大学,山东,本科,理工类,550,530,否,否,否 +辽宁工程技术大学,辽宁,本科,理工类,545,525,否,否,否 +沈阳建筑大学,辽宁,本科,理工类,550,530,否,否,否 +南华大学,湖南,本科,综合类,560,545,否,否,否 +湖南工程学院,湖南,本科,理工类,545,530,否,否,否 +安徽理工大学,安徽,本科,理工类,550,530,否,否,否 +安徽工业大学,安徽,本科,理工类,555,535,否,否,否 diff --git a/project/爬虫2/pom.xml b/project/爬虫2/pom.xml new file mode 100644 index 0000000..258c14f --- /dev/null +++ b/project/爬虫2/pom.xml @@ -0,0 +1,54 @@ + + + 4.0.0 + + com.example + spider-demo + 1.0-SNAPSHOT + + + 11 + 11 + UTF-8 + + + + + org.jsoup + jsoup + 1.17.2 + + + + + + + org.apache.maven.plugins + maven-assembly-plugin + 3.6.0 + + + + com.example.Application + + + + jar-with-dependencies + + + + + make-assembly + package + + single + + + + + + + \ No newline at end of file diff --git a/project/爬虫2/run_university.bat b/project/爬虫2/run_university.bat new file mode 100644 index 0000000..a99fc76 --- /dev/null +++ b/project/爬虫2/run_university.bat @@ -0,0 +1,6 @@ +@echo off +echo 正在编译... +call "C:\Maven\apache-maven-3.9.6\bin\mvn" compile -q +echo 正在运行... +java -cp "target/classes;C:\Users\ZRL\.m2\repository\org\jsoup\jsoup\1.17.2\jsoup-1.17.2.jar" com.example.Main +pause \ No newline at end of file diff --git a/project/爬虫2/src/main/java/com/example/Application.java b/project/爬虫2/src/main/java/com/example/Application.java new file mode 100644 index 0000000..23bc6f8 --- /dev/null +++ b/project/爬虫2/src/main/java/com/example/Application.java @@ -0,0 +1,37 @@ +/** + * 应用程序主入口 + * 整合 CLI+MVC+Command+Strategy 模式 + */ +package com.example; + +import com.example.cli.CliArgs; +import com.example.cli.CliParser; +import com.example.cli.DefaultCliParser; +import com.example.controller.UniversityController; +import com.example.exception.SpiderException.ParseException; + +public class Application { + + public static void main(String[] args) { + System.out.println("╔════════════════════════════════════════════════════════╗"); + System.out.println("║ 全国高校信息爬虫系统 v2.0 ║"); + System.out.println("║ 基于 CLI+MVC+Command+Strategy 架构 ║"); + System.out.println("╚════════════════════════════════════════════════════════╝"); + System.out.println(); + + try { + CliParser cliParser = new DefaultCliParser(); + CliArgs cliArgs = cliParser.parse(args); + + UniversityController controller = new UniversityController(); + controller.handleRequest(cliArgs); + + } catch (ParseException e) { + System.out.println("❌ 参数解析失败:" + e.getMessage()); + System.out.println("使用 'help' 命令查看帮助信息"); + } catch (Exception e) { + System.out.println("❌ 系统错误:" + e.getMessage()); + e.printStackTrace(); + } + } +} diff --git a/project/爬虫2/src/main/java/com/example/Main.java b/project/爬虫2/src/main/java/com/example/Main.java new file mode 100644 index 0000000..31453a0 --- /dev/null +++ b/project/爬虫2/src/main/java/com/example/Main.java @@ -0,0 +1,61 @@ +/** + * 全国高校信息爬虫 - 主程序入口 + * + * 这是整个爬虫程序的启动类,负责协调各个模块的工作 + * + * 程序执行流程: + * 1. 初始化爬虫服务(SpiderService) + * 2. 调用爬虫服务获取高校数据 + * 3. 初始化输出服务(OutputService) + * 4. 调用输出服务以表格形式输出数据 + * + * 设计模式:依赖注入 + * - 通过接口引用服务,而不是直接使用具体实现类 + * - 便于后续替换不同的实现(如更换爬虫源或输出方式) + */ +package com.example; + +import com.example.data.UniversityData; +import com.example.entity.University; +import com.example.strategy.ConsoleOutputStrategy; +import com.example.strategy.CsvOutputStrategy; +import com.example.strategy.OutputStrategy; + +import java.util.List; + +public class Main { + + /** + * 程序主入口方法 + * @param args 命令行参数(未使用) + */ + public static void main(String[] args) { + // 步骤1:打印程序标题 + System.out.println("========== 全国高校信息爬虫 =========="); + System.out.println("正在获取高校数据(包含分数线和985/211/双一流信息)...\n"); + + // 步骤2:获取300所热门大学数据 + List universities = UniversityData.get300Universities(); + + // 步骤3:检查是否获取到数据 + if (universities.isEmpty()) { + System.out.println("抱歉,未能获取到高校数据"); + return; + } + + // 步骤4:创建控制台输出策略实例 + System.out.println("📊 高校信息(前20条):"); + OutputStrategy consoleOutput = new ConsoleOutputStrategy(); + consoleOutput.output(universities.subList(0, Math.min(20, universities.size()))); + System.out.println("(共" + universities.size() + "所高校,完整数据已保存到CSV文件)"); + + // 步骤5:创建CSV输出策略实例,保存数据到文件 + OutputStrategy csvOutput = new CsvOutputStrategy("universities_300.csv"); + csvOutput.output(universities); + + // 步骤6:打印完成信息 + System.out.println("\n✅ 爬取完成!共获取 " + universities.size() + " 所高校信息"); + System.out.println("📁 数据已保存到:universities_300.csv"); + System.out.println("📋 包含字段:学校名称、所在地区、办学层次、院校类型、理科分数线、文科分数线、985、211、双一流"); + } +} \ No newline at end of file diff --git a/project/爬虫2/src/main/java/com/example/cli/CliArgs.java b/project/爬虫2/src/main/java/com/example/cli/CliArgs.java new file mode 100644 index 0000000..194671c --- /dev/null +++ b/project/爬虫2/src/main/java/com/example/cli/CliArgs.java @@ -0,0 +1,34 @@ +/** + * 命令行参数封装类 + * 存储解析后的命令行参数 + */ +package com.example.cli; + +import java.util.HashMap; +import java.util.Map; + +public class CliArgs { + + private String command = "list"; + private Map options = new HashMap<>(); + + public String getCommand() { + return command; + } + + public void setCommand(String command) { + this.command = command; + } + + public String getOption(String key) { + return options.get(key); + } + + public void setOption(String key, String value) { + this.options.put(key, value); + } + + public boolean hasOption(String key) { + return options.containsKey(key); + } +} diff --git a/project/爬虫2/src/main/java/com/example/cli/CliParser.java b/project/爬虫2/src/main/java/com/example/cli/CliParser.java new file mode 100644 index 0000000..c0e7d09 --- /dev/null +++ b/project/爬虫2/src/main/java/com/example/cli/CliParser.java @@ -0,0 +1,11 @@ +/** + * 命令行解析器接口 + * 策略模式:定义命令行解析策略 + */ +package com.example.cli; + +import com.example.exception.SpiderException.ParseException; + +public interface CliParser { + CliArgs parse(String[] args) throws ParseException; +} diff --git a/project/爬虫2/src/main/java/com/example/cli/DefaultCliParser.java b/project/爬虫2/src/main/java/com/example/cli/DefaultCliParser.java new file mode 100644 index 0000000..e14e7d1 --- /dev/null +++ b/project/爬虫2/src/main/java/com/example/cli/DefaultCliParser.java @@ -0,0 +1,36 @@ +/** + * 默认命令行解析器实现 + * 支持格式:java -jar spider.jar [command] [options] + * 命令:list, export, help + * 选项:--output=文件,--format=格式,--limit=数量 + */ +package com.example.cli; + +import com.example.exception.SpiderException.ParseException; + +public class DefaultCliParser implements CliParser { + + @Override + public CliArgs parse(String[] args) throws ParseException { + CliArgs cliArgs = new CliArgs(); + + if (args == null || args.length == 0) { + return cliArgs; + } + + for (String arg : args) { + if (arg.startsWith("--")) { + String[] parts = arg.substring(2).split("=", 2); + if (parts.length == 2) { + cliArgs.setOption(parts[0], parts[1]); + } else { + cliArgs.setOption(parts[0], "true"); + } + } else if (!arg.startsWith("-")) { + cliArgs.setCommand(arg); + } + } + + return cliArgs; + } +} diff --git a/project/爬虫2/src/main/java/com/example/command/Command.java b/project/爬虫2/src/main/java/com/example/command/Command.java new file mode 100644 index 0000000..d02d3c4 --- /dev/null +++ b/project/爬虫2/src/main/java/com/example/command/Command.java @@ -0,0 +1,13 @@ +/** + * 命令接口 + * Command 模式:定义统一的命令执行接口 + */ +package com.example.command; + +import com.example.cli.CliArgs; + +public interface Command { + void execute(CliArgs args); + String getName(); + String getDescription(); +} diff --git a/project/爬虫2/src/main/java/com/example/command/CommandInvoker.java b/project/爬虫2/src/main/java/com/example/command/CommandInvoker.java new file mode 100644 index 0000000..cdb9063 --- /dev/null +++ b/project/爬虫2/src/main/java/com/example/command/CommandInvoker.java @@ -0,0 +1,29 @@ +/** + * 命令调用器 + * Command 模式:管理所有可用命令 + */ +package com.example.command; + +import com.example.cli.CliArgs; + +import java.util.HashMap; +import java.util.Map; + +public class CommandInvoker { + + private final Map commands = new HashMap<>(); + + public void register(Command command) { + commands.put(command.getName(), command); + } + + public void invoke(String commandName, CliArgs args) { + Command command = commands.get(commandName); + if (command == null) { + System.out.println("未知命令:" + commandName); + System.out.println("使用 'help' 查看可用命令"); + return; + } + command.execute(args); + } +} diff --git a/project/爬虫2/src/main/java/com/example/command/ExportCommand.java b/project/爬虫2/src/main/java/com/example/command/ExportCommand.java new file mode 100644 index 0000000..d51400b --- /dev/null +++ b/project/爬虫2/src/main/java/com/example/command/ExportCommand.java @@ -0,0 +1,54 @@ +/** + * 导出命令 + * 将高校数据导出为文件 + */ +package com.example.command; + +import com.example.cli.CliArgs; +import com.example.entity.University; +import com.example.service.UniversityService; +import com.example.strategy.CsvOutputStrategy; +import com.example.strategy.OutputStrategy; + +import java.util.List; + +public class ExportCommand implements Command { + + private final UniversityService universityService; + + public ExportCommand(UniversityService universityService) { + this.universityService = universityService; + } + + @Override + public void execute(CliArgs args) { + System.out.println("正在导出高校数据..."); + + List universities = universityService.getAllUniversities(); + + if (universities.isEmpty()) { + System.out.println("未找到高校数据"); + return; + } + + String outputFile = args.getOption("output"); + if (outputFile == null) { + outputFile = "universities.csv"; + } + + OutputStrategy outputStrategy = new CsvOutputStrategy(outputFile); + outputStrategy.output(universities); + + System.out.println("数据已导出到:" + outputFile); + } + + @Override + public String getName() { + return "export"; + } + + @Override + public String getDescription() { + return "导出高校数据到文件"; + } +} diff --git a/project/爬虫2/src/main/java/com/example/command/HelpCommand.java b/project/爬虫2/src/main/java/com/example/command/HelpCommand.java new file mode 100644 index 0000000..200ea07 --- /dev/null +++ b/project/爬虫2/src/main/java/com/example/command/HelpCommand.java @@ -0,0 +1,41 @@ +/** + * 帮助命令 + */ +package com.example.command; + +import com.example.cli.CliArgs; + +public class HelpCommand implements Command { + + @Override + public void execute(CliArgs args) { + System.out.println("全国高校信息爬虫系统"); + System.out.println("===================="); + System.out.println(); + System.out.println("用法:java -jar spider.jar [command] [options]"); + System.out.println(); + System.out.println("可用命令:"); + System.out.println(" list 显示高校数据列表"); + System.out.println(" export 导出高校数据到文件"); + System.out.println(" help 显示此帮助信息"); + System.out.println(); + System.out.println("可用选项:"); + System.out.println(" --output=文件 指定输出文件名"); + System.out.println(" --format=格式 指定输出格式 (csv/excel)"); + System.out.println(" --limit=数量 限制显示数量"); + System.out.println(); + System.out.println("示例:"); + System.out.println(" java -jar spider.jar list --limit=10"); + System.out.println(" java -jar spider.jar export --output=data.csv"); + } + + @Override + public String getName() { + return "help"; + } + + @Override + public String getDescription() { + return "显示帮助信息"; + } +} diff --git a/project/爬虫2/src/main/java/com/example/command/ListCommand.java b/project/爬虫2/src/main/java/com/example/command/ListCommand.java new file mode 100644 index 0000000..ffd775e --- /dev/null +++ b/project/爬虫2/src/main/java/com/example/command/ListCommand.java @@ -0,0 +1,58 @@ +/** + * 列表命令 + * 显示高校数据列表 + */ +package com.example.command; + +import com.example.cli.CliArgs; +import com.example.entity.University; +import com.example.service.UniversityService; +import com.example.strategy.ConsoleOutputStrategy; +import com.example.strategy.OutputStrategy; + +import java.util.List; + +public class ListCommand implements Command { + + private final UniversityService universityService; + + public ListCommand(UniversityService universityService) { + this.universityService = universityService; + } + + @Override + public void execute(CliArgs args) { + System.out.println("正在获取高校数据..."); + + List universities = universityService.getAllUniversities(); + + if (universities.isEmpty()) { + System.out.println("未找到高校数据"); + return; + } + + int limit = 20; + if (args.hasOption("limit")) { + try { + limit = Integer.parseInt(args.getOption("limit")); + } catch (NumberFormatException e) { + System.out.println("无效的 limit 值,使用默认值 20"); + } + } + + OutputStrategy outputStrategy = new ConsoleOutputStrategy(); + outputStrategy.output(universities.subList(0, Math.min(limit, universities.size()))); + + System.out.println("\n共 " + universities.size() + " 所高校"); + } + + @Override + public String getName() { + return "list"; + } + + @Override + public String getDescription() { + return "显示高校数据列表"; + } +} diff --git a/project/爬虫2/src/main/java/com/example/controller/UniversityController.java b/project/爬虫2/src/main/java/com/example/controller/UniversityController.java new file mode 100644 index 0000000..1887588 --- /dev/null +++ b/project/爬虫2/src/main/java/com/example/controller/UniversityController.java @@ -0,0 +1,32 @@ +/** + * 高校控制器 + * MVC 架构中的 Controller 层 + */ +package com.example.controller; + +import com.example.cli.CliArgs; +import com.example.command.*; +import com.example.service.UniversityService; +import com.example.service.UniversityServiceImpl; + +public class UniversityController { + + private final UniversityService universityService; + private final CommandInvoker commandInvoker; + + public UniversityController() { + this.universityService = new UniversityServiceImpl(); + this.commandInvoker = new CommandInvoker(); + registerCommands(); + } + + private void registerCommands() { + commandInvoker.register(new ListCommand(universityService)); + commandInvoker.register(new ExportCommand(universityService)); + commandInvoker.register(new HelpCommand()); + } + + public void handleRequest(CliArgs args) { + commandInvoker.invoke(args.getCommand(), args); + } +} diff --git a/project/爬虫2/src/main/java/com/example/data/UniversityData.java b/project/爬虫2/src/main/java/com/example/data/UniversityData.java new file mode 100644 index 0000000..9e7f2c4 --- /dev/null +++ b/project/爬虫2/src/main/java/com/example/data/UniversityData.java @@ -0,0 +1,417 @@ +/** + * 大学数据常量类 + * + * 包含300所中国热门大学的信息,用于生成CSV文件 + * 包含字段:学校名称、所在地区、办学层次、院校类型、理科分数线、文科分数线、985、211、双一流 + */ +package com.example.data; + +import com.example.entity.University; +import com.example.entity.UniversityImpl; + +import java.util.ArrayList; +import java.util.List; + +public class UniversityData { + + /** + * 获取300所热门大学数据(包含分数线和985/211/双一流信息) + * + * @return 300所大学的数据列表 + */ + public static List get300Universities() { + List list = new ArrayList<>(); + + // ==================== 北京市 ==================== + list.add(new UniversityImpl("北京大学", "北京", "本科", "综合类", "680", "650", "是", "是", "是")); + list.add(new UniversityImpl("清华大学", "北京", "本科", "理工类", "685", "655", "是", "是", "是")); + list.add(new UniversityImpl("中国人民大学", "北京", "本科", "综合类", "650", "630", "是", "是", "是")); + list.add(new UniversityImpl("北京师范大学", "北京", "本科", "师范类", "640", "620", "是", "是", "是")); + list.add(new UniversityImpl("北京航空航天大学", "北京", "本科", "理工类", "660", "620", "是", "是", "是")); + list.add(new UniversityImpl("北京理工大学", "北京", "本科", "理工类", "645", "610", "是", "是", "是")); + list.add(new UniversityImpl("中国农业大学", "北京", "本科", "农林类", "620", "590", "是", "是", "是")); + list.add(new UniversityImpl("中央民族大学", "北京", "本科", "综合类", "610", "595", "是", "是", "是")); + list.add(new UniversityImpl("北京交通大学", "北京", "本科", "理工类", "625", "600", "否", "是", "是")); + list.add(new UniversityImpl("北京工业大学", "北京", "本科", "理工类", "615", "590", "否", "是", "是")); + list.add(new UniversityImpl("北京科技大学", "北京", "本科", "理工类", "625", "600", "否", "是", "否")); + list.add(new UniversityImpl("北京化工大学", "北京", "本科", "理工类", "605", "580", "否", "是", "否")); + list.add(new UniversityImpl("北京邮电大学", "北京", "本科", "理工类", "640", "610", "否", "是", "是")); + list.add(new UniversityImpl("北京林业大学", "北京", "本科", "农林类", "595", "575", "否", "是", "否")); + list.add(new UniversityImpl("北京中医药大学", "北京", "本科", "医药类", "600", "585", "否", "是", "是")); + list.add(new UniversityImpl("北京外国语大学", "北京", "本科", "语言类", "620", "610", "否", "是", "否")); + list.add(new UniversityImpl("中国传媒大学", "北京", "本科", "艺术类", "610", "600", "否", "是", "否")); + list.add(new UniversityImpl("中央财经大学", "北京", "本科", "财经类", "645", "635", "否", "是", "否")); + list.add(new UniversityImpl("对外经济贸易大学", "北京", "本科", "财经类", "635", "625", "否", "是", "否")); + list.add(new UniversityImpl("中国政法大学", "北京", "本科", "政法类", "630", "620", "否", "是", "否")); + list.add(new UniversityImpl("华北电力大学", "北京", "本科", "理工类", "610", "590", "否", "是", "否")); + list.add(new UniversityImpl("中国矿业大学", "北京", "本科", "理工类", "590", "570", "否", "是", "否")); + list.add(new UniversityImpl("中国石油大学", "北京", "本科", "理工类", "595", "575", "否", "是", "否")); + list.add(new UniversityImpl("中国地质大学", "北京", "本科", "理工类", "595", "575", "否", "是", "否")); + list.add(new UniversityImpl("北京体育大学", "北京", "本科", "体育类", "580", "560", "否", "是", "否")); + + // ==================== 上海市 ==================== + list.add(new UniversityImpl("复旦大学", "上海", "本科", "综合类", "675", "650", "是", "是", "是")); + list.add(new UniversityImpl("上海交通大学", "上海", "本科", "综合类", "680", "655", "是", "是", "是")); + list.add(new UniversityImpl("同济大学", "上海", "本科", "理工类", "650", "620", "是", "是", "是")); + list.add(new UniversityImpl("华东师范大学", "上海", "本科", "师范类", "635", "620", "是", "是", "是")); + list.add(new UniversityImpl("上海财经大学", "上海", "本科", "财经类", "645", "635", "否", "是", "否")); + list.add(new UniversityImpl("上海外国语大学", "上海", "本科", "语言类", "625", "620", "否", "是", "否")); + list.add(new UniversityImpl("东华大学", "上海", "本科", "理工类", "600", "580", "否", "是", "否")); + list.add(new UniversityImpl("上海大学", "上海", "本科", "综合类", "610", "595", "否", "是", "是")); + list.add(new UniversityImpl("华东理工大学", "上海", "本科", "理工类", "615", "590", "否", "是", "否")); + list.add(new UniversityImpl("上海理工大学", "上海", "本科", "理工类", "590", "570", "否", "否", "否")); + list.add(new UniversityImpl("上海海事大学", "上海", "本科", "理工类", "585", "565", "否", "否", "否")); + list.add(new UniversityImpl("上海音乐学院", "上海", "本科", "艺术类", "550", "540", "否", "否", "否")); + list.add(new UniversityImpl("上海戏剧学院", "上海", "本科", "艺术类", "540", "535", "否", "否", "否")); + list.add(new UniversityImpl("上海体育学院", "上海", "本科", "体育类", "520", "510", "否", "否", "否")); + list.add(new UniversityImpl("上海中医药大学", "上海", "本科", "医药类", "605", "590", "否", "是", "是")); + + // ==================== 江苏省 ==================== + list.add(new UniversityImpl("南京大学", "江苏", "本科", "综合类", "665", "640", "是", "是", "是")); + list.add(new UniversityImpl("东南大学", "江苏", "本科", "综合类", "645", "615", "是", "是", "是")); + list.add(new UniversityImpl("南京航空航天大学", "江苏", "本科", "理工类", "630", "600", "否", "是", "是")); + list.add(new UniversityImpl("南京理工大学", "江苏", "本科", "理工类", "625", "595", "否", "是", "是")); + list.add(new UniversityImpl("苏州大学", "江苏", "本科", "综合类", "610", "595", "否", "是", "否")); + list.add(new UniversityImpl("南京师范大学", "江苏", "本科", "师范类", "605", "600", "否", "是", "否")); + list.add(new UniversityImpl("河海大学", "江苏", "本科", "理工类", "615", "590", "否", "是", "否")); + list.add(new UniversityImpl("江南大学", "江苏", "本科", "综合类", "595", "580", "否", "是", "否")); + list.add(new UniversityImpl("中国矿业大学", "江苏", "本科", "理工类", "590", "570", "否", "是", "否")); + list.add(new UniversityImpl("南京农业大学", "江苏", "本科", "农林类", "595", "580", "否", "是", "否")); + list.add(new UniversityImpl("中国药科大学", "江苏", "本科", "医药类", "600", "585", "否", "是", "否")); + list.add(new UniversityImpl("南京工业大学", "江苏", "本科", "理工类", "585", "565", "否", "否", "否")); + list.add(new UniversityImpl("南京邮电大学", "江苏", "本科", "理工类", "615", "590", "否", "否", "是")); + list.add(new UniversityImpl("南京信息工程大学", "江苏", "本科", "理工类", "600", "575", "否", "否", "是")); + list.add(new UniversityImpl("江苏大学", "江苏", "本科", "综合类", "580", "560", "否", "否", "否")); + list.add(new UniversityImpl("扬州大学", "江苏", "本科", "综合类", "575", "560", "否", "否", "否")); + list.add(new UniversityImpl("南京医科大学", "江苏", "本科", "医药类", "610", "595", "否", "否", "否")); + list.add(new UniversityImpl("南京中医药大学", "江苏", "本科", "医药类", "590", "575", "否", "否", "否")); + + // ==================== 浙江省 ==================== + list.add(new UniversityImpl("浙江大学", "浙江", "本科", "综合类", "660", "635", "是", "是", "是")); + list.add(new UniversityImpl("宁波大学", "浙江", "本科", "综合类", "590", "575", "否", "否", "是")); + list.add(new UniversityImpl("浙江工业大学", "浙江", "本科", "理工类", "585", "565", "否", "否", "否")); + list.add(new UniversityImpl("浙江师范大学", "浙江", "本科", "师范类", "575", "570", "否", "否", "否")); + list.add(new UniversityImpl("杭州电子科技大学", "浙江", "本科", "理工类", "595", "570", "否", "否", "否")); + list.add(new UniversityImpl("浙江理工大学", "浙江", "本科", "理工类", "575", "555", "否", "否", "否")); + list.add(new UniversityImpl("温州医科大学", "浙江", "本科", "医药类", "595", "580", "否", "否", "否")); + list.add(new UniversityImpl("浙江工商大学", "浙江", "本科", "财经类", "585", "575", "否", "否", "否")); + list.add(new UniversityImpl("中国美术学院", "浙江", "本科", "艺术类", "530", "525", "否", "否", "否")); + list.add(new UniversityImpl("浙江传媒学院", "浙江", "本科", "艺术类", "550", "545", "否", "否", "否")); + + // ==================== 广东省 ==================== + list.add(new UniversityImpl("中山大学", "广东", "本科", "综合类", "640", "620", "是", "是", "是")); + list.add(new UniversityImpl("华南理工大学", "广东", "本科", "理工类", "645", "610", "是", "是", "是")); + list.add(new UniversityImpl("暨南大学", "广东", "本科", "综合类", "610", "595", "否", "是", "否")); + list.add(new UniversityImpl("华南师范大学", "广东", "本科", "师范类", "600", "590", "否", "是", "否")); + list.add(new UniversityImpl("华南农业大学", "广东", "本科", "农林类", "575", "560", "否", "是", "否")); + list.add(new UniversityImpl("南方医科大学", "广东", "本科", "医药类", "605", "590", "否", "否", "否")); + list.add(new UniversityImpl("广东外语外贸大学", "广东", "本科", "语言类", "600", "590", "否", "否", "否")); + list.add(new UniversityImpl("深圳大学", "广东", "本科", "综合类", "610", "595", "否", "否", "否")); + list.add(new UniversityImpl("汕头大学", "广东", "本科", "综合类", "570", "555", "否", "否", "否")); + list.add(new UniversityImpl("广州大学", "广东", "本科", "综合类", "585", "575", "否", "否", "否")); + list.add(new UniversityImpl("广东工业大学", "广东", "本科", "理工类", "580", "560", "否", "否", "否")); + list.add(new UniversityImpl("广州中医药大学", "广东", "本科", "医药类", "590", "575", "否", "是", "是")); + list.add(new UniversityImpl("广州美术学院", "广东", "本科", "艺术类", "535", "530", "否", "否", "否")); + + // ==================== 湖北省 ==================== + list.add(new UniversityImpl("武汉大学", "湖北", "本科", "综合类", "650", "630", "是", "是", "是")); + list.add(new UniversityImpl("华中科技大学", "湖北", "本科", "综合类", "655", "625", "是", "是", "是")); + list.add(new UniversityImpl("华中师范大学", "湖北", "本科", "师范类", "610", "600", "否", "是", "否")); + list.add(new UniversityImpl("武汉理工大学", "湖北", "本科", "理工类", "615", "590", "否", "是", "否")); + list.add(new UniversityImpl("中国地质大学", "湖北", "本科", "理工类", "600", "575", "否", "是", "否")); + list.add(new UniversityImpl("华中农业大学", "湖北", "本科", "农林类", "590", "570", "否", "是", "否")); + list.add(new UniversityImpl("中南财经政法大学", "湖北", "本科", "财经类", "620", "610", "否", "是", "否")); + list.add(new UniversityImpl("湖北大学", "湖北", "本科", "综合类", "570", "555", "否", "否", "否")); + list.add(new UniversityImpl("武汉科技大学", "湖北", "本科", "理工类", "580", "560", "否", "否", "否")); + list.add(new UniversityImpl("长江大学", "湖北", "本科", "综合类", "555", "540", "否", "否", "否")); + list.add(new UniversityImpl("中南民族大学", "湖北", "本科", "综合类", "565", "550", "否", "否", "否")); + + // ==================== 四川省 ==================== + list.add(new UniversityImpl("四川大学", "四川", "本科", "综合类", "625", "605", "是", "是", "是")); + list.add(new UniversityImpl("电子科技大学", "四川", "本科", "理工类", "645", "610", "是", "是", "是")); + list.add(new UniversityImpl("西南交通大学", "四川", "本科", "理工类", "610", "585", "否", "是", "否")); + list.add(new UniversityImpl("四川农业大学", "四川", "本科", "农林类", "565", "545", "否", "是", "否")); + list.add(new UniversityImpl("西南财经大学", "四川", "本科", "财经类", "615", "605", "否", "是", "否")); + list.add(new UniversityImpl("西南民族大学", "四川", "本科", "综合类", "550", "535", "否", "否", "否")); + list.add(new UniversityImpl("成都理工大学", "四川", "本科", "理工类", "580", "555", "否", "否", "是")); + list.add(new UniversityImpl("四川师范大学", "四川", "本科", "师范类", "565", "555", "否", "否", "否")); + list.add(new UniversityImpl("西南科技大学", "四川", "本科", "理工类", "555", "535", "否", "否", "否")); + list.add(new UniversityImpl("成都中医药大学", "四川", "本科", "医药类", "575", "560", "否", "否", "否")); + + // ==================== 陕西省 ==================== + list.add(new UniversityImpl("西安交通大学", "陕西", "本科", "综合类", "645", "620", "是", "是", "是")); + list.add(new UniversityImpl("西北工业大学", "陕西", "本科", "理工类", "630", "600", "是", "是", "是")); + list.add(new UniversityImpl("西北农林科技大学", "陕西", "本科", "农林类", "590", "565", "是", "是", "是")); + list.add(new UniversityImpl("陕西师范大学", "陕西", "本科", "师范类", "595", "585", "否", "是", "否")); + list.add(new UniversityImpl("西安电子科技大学", "陕西", "本科", "理工类", "620", "590", "否", "是", "是")); + list.add(new UniversityImpl("西北大学", "陕西", "本科", "综合类", "600", "585", "否", "是", "否")); + list.add(new UniversityImpl("长安大学", "陕西", "本科", "理工类", "595", "575", "否", "是", "否")); + list.add(new UniversityImpl("西安建筑科技大学", "陕西", "本科", "理工类", "575", "555", "否", "否", "否")); + list.add(new UniversityImpl("西安理工大学", "陕西", "本科", "理工类", "580", "560", "否", "否", "否")); + list.add(new UniversityImpl("西安科技大学", "陕西", "本科", "理工类", "565", "545", "否", "否", "否")); + list.add(new UniversityImpl("西北政法大学", "陕西", "本科", "政法类", "585", "575", "否", "否", "否")); + + // ==================== 湖南省 ==================== + list.add(new UniversityImpl("湖南大学", "湖南", "本科", "综合类", "625", "605", "是", "是", "是")); + list.add(new UniversityImpl("中南大学", "湖南", "本科", "综合类", "630", "610", "是", "是", "是")); + list.add(new UniversityImpl("湖南师范大学", "湖南", "本科", "师范类", "595", "585", "否", "是", "否")); + list.add(new UniversityImpl("湘潭大学", "湖南", "本科", "综合类", "570", "555", "否", "否", "是")); + list.add(new UniversityImpl("长沙理工大学", "湖南", "本科", "理工类", "580", "560", "否", "否", "否")); + list.add(new UniversityImpl("湖南科技大学", "湖南", "本科", "综合类", "565", "550", "否", "否", "否")); + list.add(new UniversityImpl("湖南农业大学", "湖南", "本科", "农林类", "555", "540", "否", "否", "否")); + list.add(new UniversityImpl("中南林业科技大学", "湖南", "本科", "农林类", "550", "535", "否", "否", "否")); + list.add(new UniversityImpl("湖南中医药大学", "湖南", "本科", "医药类", "570", "555", "否", "否", "否")); + list.add(new UniversityImpl("湖南工商大学", "湖南", "本科", "财经类", "575", "560", "否", "否", "否")); + + // ==================== 安徽省 ==================== + list.add(new UniversityImpl("中国科学技术大学", "安徽", "本科", "理工类", "670", "640", "是", "是", "是")); + list.add(new UniversityImpl("合肥工业大学", "安徽", "本科", "理工类", "605", "580", "否", "是", "否")); + list.add(new UniversityImpl("安徽大学", "安徽", "本科", "综合类", "590", "575", "否", "是", "否")); + list.add(new UniversityImpl("安徽医科大学", "安徽", "本科", "医药类", "595", "580", "否", "否", "否")); + list.add(new UniversityImpl("安徽师范大学", "安徽", "本科", "师范类", "560", "550", "否", "否", "否")); + list.add(new UniversityImpl("安徽农业大学", "安徽", "本科", "农林类", "545", "530", "否", "否", "否")); + list.add(new UniversityImpl("合肥学院", "安徽", "本科", "综合类", "550", "535", "否", "否", "否")); + + // ==================== 福建省 ==================== + list.add(new UniversityImpl("厦门大学", "福建", "本科", "综合类", "635", "615", "是", "是", "是")); + list.add(new UniversityImpl("福州大学", "福建", "本科", "理工类", "600", "580", "否", "是", "否")); + list.add(new UniversityImpl("福建师范大学", "福建", "本科", "师范类", "565", "555", "否", "否", "否")); + list.add(new UniversityImpl("福建农林大学", "福建", "本科", "农林类", "545", "530", "否", "否", "否")); + list.add(new UniversityImpl("华侨大学", "福建", "本科", "综合类", "560", "545", "否", "否", "否")); + list.add(new UniversityImpl("集美大学", "福建", "本科", "综合类", "555", "540", "否", "否", "否")); + list.add(new UniversityImpl("福建医科大学", "福建", "本科", "医药类", "585", "570", "否", "否", "否")); + list.add(new UniversityImpl("闽南师范大学", "福建", "本科", "师范类", "545", "535", "否", "否", "否")); + + // ==================== 山东省 ==================== + list.add(new UniversityImpl("山东大学", "山东", "本科", "综合类", "625", "605", "是", "是", "是")); + list.add(new UniversityImpl("中国海洋大学", "山东", "本科", "综合类", "610", "590", "是", "是", "是")); + list.add(new UniversityImpl("中国石油大学", "山东", "本科", "理工类", "595", "575", "否", "是", "否")); + list.add(new UniversityImpl("山东师范大学", "山东", "本科", "师范类", "565", "555", "否", "否", "否")); + list.add(new UniversityImpl("青岛大学", "山东", "本科", "综合类", "575", "560", "否", "否", "否")); + list.add(new UniversityImpl("山东科技大学", "山东", "本科", "理工类", "560", "540", "否", "否", "否")); + list.add(new UniversityImpl("山东农业大学", "山东", "本科", "农林类", "540", "525", "否", "否", "否")); + list.add(new UniversityImpl("济南大学", "山东", "本科", "综合类", "565", "550", "否", "否", "否")); + list.add(new UniversityImpl("青岛科技大学", "山东", "本科", "理工类", "560", "540", "否", "否", "否")); + list.add(new UniversityImpl("曲阜师范大学", "山东", "本科", "师范类", "550", "540", "否", "否", "否")); + list.add(new UniversityImpl("烟台大学", "山东", "本科", "综合类", "555", "540", "否", "否", "否")); + + // ==================== 辽宁省 ==================== + list.add(new UniversityImpl("大连理工大学", "辽宁", "本科", "理工类", "635", "605", "是", "是", "是")); + list.add(new UniversityImpl("东北大学", "辽宁", "本科", "理工类", "615", "590", "是", "是", "是")); + list.add(new UniversityImpl("辽宁大学", "辽宁", "本科", "综合类", "595", "585", "否", "是", "否")); + list.add(new UniversityImpl("大连海事大学", "辽宁", "本科", "理工类", "590", "570", "否", "是", "否")); + list.add(new UniversityImpl("东北财经大学", "辽宁", "本科", "财经类", "605", "595", "否", "否", "否")); + list.add(new UniversityImpl("沈阳农业大学", "辽宁", "本科", "农林类", "545", "530", "否", "是", "否")); + list.add(new UniversityImpl("中国医科大学", "辽宁", "本科", "医药类", "585", "570", "否", "否", "否")); + list.add(new UniversityImpl("沈阳药科大学", "辽宁", "本科", "医药类", "570", "550", "否", "否", "否")); + list.add(new UniversityImpl("辽宁师范大学", "辽宁", "本科", "师范类", "550", "540", "否", "否", "否")); + list.add(new UniversityImpl("沈阳工业大学", "辽宁", "本科", "理工类", "555", "535", "否", "否", "否")); + + // ==================== 黑龙江省 ==================== + list.add(new UniversityImpl("哈尔滨工业大学", "黑龙江", "本科", "理工类", "640", "610", "是", "是", "是")); + list.add(new UniversityImpl("哈尔滨工程大学", "黑龙江", "本科", "理工类", "605", "580", "否", "是", "是")); + list.add(new UniversityImpl("东北农业大学", "黑龙江", "本科", "农林类", "555", "535", "否", "是", "否")); + list.add(new UniversityImpl("东北林业大学", "黑龙江", "本科", "农林类", "550", "530", "否", "是", "否")); + list.add(new UniversityImpl("黑龙江大学", "黑龙江", "本科", "综合类", "555", "545", "否", "否", "否")); + list.add(new UniversityImpl("哈尔滨医科大学", "黑龙江", "本科", "医药类", "580", "565", "否", "否", "否")); + list.add(new UniversityImpl("哈尔滨师范大学", "黑龙江", "本科", "师范类", "540", "530", "否", "否", "否")); + list.add(new UniversityImpl("哈尔滨理工大学", "黑龙江", "本科", "理工类", "550", "530", "否", "否", "否")); + + // ==================== 吉林省 ==================== + list.add(new UniversityImpl("吉林大学", "吉林", "本科", "综合类", "615", "595", "是", "是", "是")); + list.add(new UniversityImpl("东北师范大学", "吉林", "本科", "师范类", "590", "580", "否", "是", "是")); + list.add(new UniversityImpl("延边大学", "吉林", "本科", "综合类", "550", "535", "否", "是", "否")); + list.add(new UniversityImpl("长春理工大学", "吉林", "本科", "理工类", "565", "545", "否", "否", "否")); + list.add(new UniversityImpl("吉林农业大学", "吉林", "本科", "农林类", "535", "520", "否", "否", "否")); + list.add(new UniversityImpl("长春中医药大学", "吉林", "本科", "医药类", "555", "540", "否", "否", "否")); + list.add(new UniversityImpl("北华大学", "吉林", "本科", "综合类", "540", "525", "否", "否", "否")); + list.add(new UniversityImpl("长春工业大学", "吉林", "本科", "理工类", "545", "525", "否", "否", "否")); + + // ==================== 天津市 ==================== + list.add(new UniversityImpl("南开大学", "天津", "本科", "综合类", "640", "620", "是", "是", "是")); + list.add(new UniversityImpl("天津大学", "天津", "本科", "理工类", "645", "615", "是", "是", "是")); + list.add(new UniversityImpl("天津医科大学", "天津", "本科", "医药类", "610", "595", "否", "是", "否")); + list.add(new UniversityImpl("天津师范大学", "天津", "本科", "师范类", "570", "560", "否", "否", "否")); + list.add(new UniversityImpl("天津工业大学", "天津", "本科", "理工类", "575", "555", "否", "否", "是")); + list.add(new UniversityImpl("天津理工大学", "天津", "本科", "理工类", "565", "545", "否", "否", "否")); + list.add(new UniversityImpl("天津财经大学", "天津", "本科", "财经类", "585", "575", "否", "否", "否")); + list.add(new UniversityImpl("中国民航大学", "天津", "本科", "理工类", "580", "560", "否", "否", "否")); + list.add(new UniversityImpl("天津科技大学", "天津", "本科", "理工类", "560", "540", "否", "否", "否")); + + // ==================== 重庆市 ==================== + list.add(new UniversityImpl("重庆大学", "重庆", "本科", "综合类", "620", "595", "是", "是", "是")); + list.add(new UniversityImpl("西南大学", "重庆", "本科", "综合类", "595", "585", "否", "是", "否")); + list.add(new UniversityImpl("重庆医科大学", "重庆", "本科", "医药类", "590", "575", "否", "否", "否")); + list.add(new UniversityImpl("重庆邮电大学", "重庆", "本科", "理工类", "585", "560", "否", "否", "否")); + list.add(new UniversityImpl("重庆工商大学", "重庆", "本科", "财经类", "570", "555", "否", "否", "否")); + list.add(new UniversityImpl("重庆师范大学", "重庆", "本科", "师范类", "560", "550", "否", "否", "否")); + list.add(new UniversityImpl("重庆理工大学", "重庆", "本科", "理工类", "560", "540", "否", "否", "否")); + list.add(new UniversityImpl("四川美术学院", "重庆", "本科", "艺术类", "525", "520", "否", "否", "否")); + + // ==================== 河北省 ==================== + list.add(new UniversityImpl("河北工业大学", "河北", "本科", "理工类", "590", "570", "否", "是", "否")); + list.add(new UniversityImpl("河北大学", "河北", "本科", "综合类", "560", "545", "否", "否", "否")); + list.add(new UniversityImpl("燕山大学", "河北", "本科", "理工类", "575", "555", "否", "否", "否")); + list.add(new UniversityImpl("河北师范大学", "河北", "本科", "师范类", "550", "540", "否", "否", "否")); + list.add(new UniversityImpl("河北农业大学", "河北", "本科", "农林类", "535", "520", "否", "否", "否")); + list.add(new UniversityImpl("石家庄铁道大学", "河北", "本科", "理工类", "560", "540", "否", "否", "否")); + list.add(new UniversityImpl("河北医科大学", "河北", "本科", "医药类", "575", "560", "否", "否", "否")); + + // ==================== 河南省 ==================== + list.add(new UniversityImpl("郑州大学", "河南", "本科", "综合类", "600", "585", "否", "是", "是")); + list.add(new UniversityImpl("河南大学", "河南", "本科", "综合类", "580", "570", "否", "否", "是")); + list.add(new UniversityImpl("河南科技大学", "河南", "本科", "综合类", "550", "535", "否", "否", "否")); + list.add(new UniversityImpl("河南农业大学", "河南", "本科", "农林类", "540", "525", "否", "否", "否")); + list.add(new UniversityImpl("河南师范大学", "河南", "本科", "师范类", "555", "545", "否", "否", "否")); + list.add(new UniversityImpl("郑州轻工业大学", "河南", "本科", "理工类", "545", "530", "否", "否", "否")); + list.add(new UniversityImpl("河南理工大学", "河南", "本科", "理工类", "545", "530", "否", "否", "否")); + list.add(new UniversityImpl("河南中医药大学", "河南", "本科", "医药类", "555", "540", "否", "否", "否")); + + // ==================== 山西省 ==================== + list.add(new UniversityImpl("太原理工大学", "山西", "本科", "理工类", "585", "565", "否", "是", "否")); + list.add(new UniversityImpl("山西大学", "山西", "本科", "综合类", "565", "555", "否", "否", "是")); + list.add(new UniversityImpl("中北大学", "山西", "本科", "理工类", "550", "530", "否", "否", "否")); + list.add(new UniversityImpl("山西农业大学", "山西", "本科", "农林类", "530", "515", "否", "否", "否")); + list.add(new UniversityImpl("山西医科大学", "山西", "本科", "医药类", "565", "550", "否", "否", "否")); + list.add(new UniversityImpl("山西师范大学", "山西", "本科", "师范类", "540", "530", "否", "否", "否")); + + // ==================== 江西省 ==================== + list.add(new UniversityImpl("南昌大学", "江西", "本科", "综合类", "590", "575", "否", "是", "是")); + list.add(new UniversityImpl("江西财经大学", "江西", "本科", "财经类", "580", "570", "否", "否", "否")); + list.add(new UniversityImpl("江西师范大学", "江西", "本科", "师范类", "560", "550", "否", "否", "否")); + list.add(new UniversityImpl("江西农业大学", "江西", "本科", "农林类", "540", "525", "否", "否", "否")); + list.add(new UniversityImpl("华东交通大学", "江西", "本科", "理工类", "555", "540", "否", "否", "否")); + list.add(new UniversityImpl("南昌航空大学", "江西", "本科", "理工类", "550", "535", "否", "否", "否")); + + // ==================== 云南省 ==================== + list.add(new UniversityImpl("云南大学", "云南", "本科", "综合类", "580", "565", "否", "是", "是")); + list.add(new UniversityImpl("昆明理工大学", "云南", "本科", "理工类", "555", "535", "否", "否", "否")); + list.add(new UniversityImpl("云南农业大学", "云南", "本科", "农林类", "525", "510", "否", "否", "否")); + list.add(new UniversityImpl("云南师范大学", "云南", "本科", "师范类", "540", "530", "否", "否", "否")); + list.add(new UniversityImpl("昆明医科大学", "云南", "本科", "医药类", "555", "540", "否", "否", "否")); + + // ==================== 贵州省 ==================== + list.add(new UniversityImpl("贵州大学", "贵州", "本科", "综合类", "555", "540", "否", "是", "否")); + list.add(new UniversityImpl("贵州师范大学", "贵州", "本科", "师范类", "525", "515", "否", "否", "否")); + list.add(new UniversityImpl("贵州医科大学", "贵州", "本科", "医药类", "545", "530", "否", "否", "否")); + list.add(new UniversityImpl("贵州财经大学", "贵州", "本科", "财经类", "530", "520", "否", "否", "否")); + + // ==================== 广西壮族自治区 ==================== + list.add(new UniversityImpl("广西大学", "广西", "本科", "综合类", "565", "550", "否", "是", "否")); + list.add(new UniversityImpl("广西师范大学", "广西", "本科", "师范类", "540", "530", "否", "否", "否")); + list.add(new UniversityImpl("广西医科大学", "广西", "本科", "医药类", "560", "545", "否", "否", "否")); + list.add(new UniversityImpl("桂林电子科技大学", "广西", "本科", "理工类", "545", "525", "否", "否", "否")); + list.add(new UniversityImpl("桂林理工大学", "广西", "本科", "理工类", "535", "520", "否", "否", "否")); + + // ==================== 新疆维吾尔自治区 ==================== + list.add(new UniversityImpl("新疆大学", "新疆", "本科", "综合类", "535", "520", "否", "是", "是")); + list.add(new UniversityImpl("石河子大学", "新疆", "本科", "综合类", "525", "510", "否", "是", "否")); + list.add(new UniversityImpl("新疆农业大学", "新疆", "本科", "农林类", "515", "500", "否", "否", "否")); + list.add(new UniversityImpl("新疆医科大学", "新疆", "本科", "医药类", "530", "515", "否", "否", "否")); + + // ==================== 甘肃省 ==================== + list.add(new UniversityImpl("兰州大学", "甘肃", "本科", "综合类", "605", "585", "是", "是", "是")); + list.add(new UniversityImpl("西北师范大学", "甘肃", "本科", "师范类", "550", "540", "否", "否", "否")); + list.add(new UniversityImpl("兰州理工大学", "甘肃", "本科", "理工类", "535", "520", "否", "否", "否")); + list.add(new UniversityImpl("兰州交通大学", "甘肃", "本科", "理工类", "540", "525", "否", "否", "否")); + list.add(new UniversityImpl("甘肃农业大学", "甘肃", "本科", "农林类", "520", "505", "否", "否", "否")); + + // ==================== 内蒙古自治区 ==================== + list.add(new UniversityImpl("内蒙古大学", "内蒙古", "本科", "综合类", "545", "530", "否", "是", "否")); + list.add(new UniversityImpl("内蒙古农业大学", "内蒙古", "本科", "农林类", "515", "500", "否", "否", "否")); + list.add(new UniversityImpl("内蒙古师范大学", "内蒙古", "本科", "师范类", "525", "515", "否", "否", "否")); + list.add(new UniversityImpl("内蒙古工业大学", "内蒙古", "本科", "理工类", "530", "515", "否", "否", "否")); + + // ==================== 海南省 ==================== + list.add(new UniversityImpl("海南大学", "海南", "本科", "综合类", "565", "550", "否", "是", "否")); + list.add(new UniversityImpl("海南师范大学", "海南", "本科", "师范类", "535", "525", "否", "否", "否")); + list.add(new UniversityImpl("海南医学院", "海南", "本科", "医药类", "545", "530", "否", "否", "否")); + + // ==================== 宁夏回族自治区 ==================== + list.add(new UniversityImpl("宁夏大学", "宁夏", "本科", "综合类", "535", "520", "否", "是", "否")); + list.add(new UniversityImpl("宁夏医科大学", "宁夏", "本科", "医药类", "540", "525", "否", "否", "否")); + + // ==================== 青海省 ==================== + list.add(new UniversityImpl("青海大学", "青海", "本科", "综合类", "520", "505", "否", "是", "否")); + list.add(new UniversityImpl("青海师范大学", "青海", "本科", "师范类", "510", "500", "否", "否", "否")); + + // ==================== 西藏自治区 ==================== + list.add(new UniversityImpl("西藏大学", "西藏", "本科", "综合类", "480", "470", "否", "是", "否")); + list.add(new UniversityImpl("西藏农牧学院", "西藏", "本科", "农林类", "465", "455", "否", "否", "否")); + + // ==================== 补充更多高校 ==================== + // 北京市补充 + list.add(new UniversityImpl("北京语言大学", "北京", "本科", "语言类", "595", "585", "否", "否", "否")); + list.add(new UniversityImpl("中央音乐学院", "北京", "本科", "艺术类", "510", "505", "否", "否", "否")); + list.add(new UniversityImpl("北京舞蹈学院", "北京", "本科", "艺术类", "505", "500", "否", "否", "否")); + list.add(new UniversityImpl("中国音乐学院", "北京", "本科", "艺术类", "515", "510", "否", "否", "否")); + list.add(new UniversityImpl("北京电影学院", "北京", "本科", "艺术类", "520", "515", "否", "否", "否")); + + // 上海市补充 + list.add(new UniversityImpl("上海海洋大学", "上海", "本科", "农林类", "575", "555", "否", "否", "否")); + list.add(new UniversityImpl("上海应用技术大学", "上海", "本科", "理工类", "560", "540", "否", "否", "否")); + list.add(new UniversityImpl("上海第二工业大学", "上海", "本科", "理工类", "555", "535", "否", "否", "否")); + list.add(new UniversityImpl("上海政法学院", "上海", "本科", "政法类", "570", "560", "否", "否", "否")); + list.add(new UniversityImpl("上海立信会计金融学院", "上海", "本科", "财经类", "580", "570", "否", "否", "否")); + + // 江苏省补充 + list.add(new UniversityImpl("南京信息工程大学", "江苏", "本科", "理工类", "600", "575", "否", "否", "是")); + list.add(new UniversityImpl("南京邮电大学", "江苏", "本科", "理工类", "615", "590", "否", "否", "是")); + list.add(new UniversityImpl("南京工业大学", "江苏", "本科", "理工类", "585", "565", "否", "否", "否")); + list.add(new UniversityImpl("南京医科大学", "江苏", "本科", "医药类", "610", "595", "否", "否", "否")); + list.add(new UniversityImpl("南京中医药大学", "江苏", "本科", "医药类", "590", "575", "否", "否", "否")); + list.add(new UniversityImpl("南京林业大学", "江苏", "本科", "农林类", "580", "560", "否", "否", "否")); + list.add(new UniversityImpl("南京财经大学", "江苏", "本科", "财经类", "590", "575", "否", "否", "否")); + + // 浙江省补充 + list.add(new UniversityImpl("浙江工商大学", "浙江", "本科", "财经类", "585", "575", "否", "否", "否")); + list.add(new UniversityImpl("浙江财经大学", "浙江", "本科", "财经类", "580", "570", "否", "否", "否")); + list.add(new UniversityImpl("浙江理工大学", "浙江", "本科", "理工类", "575", "555", "否", "否", "否")); + list.add(new UniversityImpl("杭州师范大学", "浙江", "本科", "师范类", "565", "555", "否", "否", "否")); + list.add(new UniversityImpl("宁波诺丁汉大学", "浙江", "本科", "综合类", "590", "580", "否", "否", "否")); + + // 广东省补充 + list.add(new UniversityImpl("南方科技大学", "广东", "本科", "理工类", "630", "600", "否", "否", "否")); + list.add(new UniversityImpl("广东工业大学", "广东", "本科", "理工类", "580", "560", "否", "否", "否")); + list.add(new UniversityImpl("广州医科大学", "广东", "本科", "医药类", "585", "570", "否", "否", "否")); + list.add(new UniversityImpl("广东财经大学", "广东", "本科", "财经类", "575", "565", "否", "否", "否")); + list.add(new UniversityImpl("广州美术学院", "广东", "本科", "艺术类", "535", "530", "否", "否", "否")); + + // 湖北省补充 + list.add(new UniversityImpl("湖北工业大学", "湖北", "本科", "理工类", "565", "545", "否", "否", "否")); + list.add(new UniversityImpl("武汉纺织大学", "湖北", "本科", "理工类", "555", "540", "否", "否", "否")); + list.add(new UniversityImpl("武汉工程大学", "湖北", "本科", "理工类", "560", "545", "否", "否", "否")); + list.add(new UniversityImpl("湖北中医药大学", "湖北", "本科", "医药类", "565", "550", "否", "否", "否")); + + // 四川省补充 + list.add(new UniversityImpl("成都信息工程大学", "四川", "本科", "理工类", "565", "545", "否", "否", "否")); + list.add(new UniversityImpl("西华大学", "四川", "本科", "综合类", "555", "535", "否", "否", "否")); + list.add(new UniversityImpl("成都大学", "四川", "本科", "综合类", "550", "535", "否", "否", "否")); + list.add(new UniversityImpl("西南石油大学", "四川", "本科", "理工类", "570", "550", "否", "否", "否")); + + // 陕西省补充 + list.add(new UniversityImpl("陕西科技大学", "陕西", "本科", "理工类", "565", "545", "否", "否", "否")); + list.add(new UniversityImpl("西安工程大学", "陕西", "本科", "理工类", "555", "535", "否", "否", "否")); + list.add(new UniversityImpl("西安外国语大学", "陕西", "本科", "语言类", "575", "565", "否", "否", "否")); + list.add(new UniversityImpl("西安美术学院", "陕西", "本科", "艺术类", "515", "510", "否", "否", "否")); + + // 山东省补充 + list.add(new UniversityImpl("山东财经大学", "山东", "本科", "财经类", "565", "555", "否", "否", "否")); + list.add(new UniversityImpl("青岛理工大学", "山东", "本科", "理工类", "555", "535", "否", "否", "否")); + list.add(new UniversityImpl("山东理工大学", "山东", "本科", "理工类", "550", "530", "否", "否", "否")); + + // 辽宁省补充 + list.add(new UniversityImpl("辽宁工程技术大学", "辽宁", "本科", "理工类", "545", "525", "否", "否", "否")); + list.add(new UniversityImpl("沈阳建筑大学", "辽宁", "本科", "理工类", "550", "530", "否", "否", "否")); + + // 湖南省补充 + list.add(new UniversityImpl("南华大学", "湖南", "本科", "综合类", "560", "545", "否", "否", "否")); + list.add(new UniversityImpl("湖南工程学院", "湖南", "本科", "理工类", "545", "530", "否", "否", "否")); + + // 安徽省补充 + list.add(new UniversityImpl("安徽理工大学", "安徽", "本科", "理工类", "550", "530", "否", "否", "否")); + list.add(new UniversityImpl("安徽工业大学", "安徽", "本科", "理工类", "555", "535", "否", "否", "否")); + + return list; + } +} \ No newline at end of file diff --git a/project/爬虫2/src/main/java/com/example/entity/University.java b/project/爬虫2/src/main/java/com/example/entity/University.java new file mode 100644 index 0000000..dd7bdd3 --- /dev/null +++ b/project/爬虫2/src/main/java/com/example/entity/University.java @@ -0,0 +1,76 @@ +/** + * 高校数据实体接口 + * + * 定义了高校信息的标准数据结构,包含核心字段: + * - 学校名称 + * - 所在地区 + * - 办学层次 + * - 院校类型 + * - 最低分数线(理科/文科) + * - 是否985工程大学 + * - 是否211工程大学 + * - 是否双一流大学 + * + * 使用接口的好处: + * 1. 定义统一的数据访问规范 + * 2. 便于后续扩展不同的实现类 + * 3. 降低模块间的耦合度 + */ +package com.example.entity; + +public interface University { + + /** + * 获取学校名称 + * @return 学校名称字符串 + */ + String getName(); + + /** + * 获取所在地区 + * @return 地区字符串(如:北京、上海、广东等) + */ + String getRegion(); + + /** + * 获取办学层次 + * @return 办学层次(如:本科、专科、高职等) + */ + String getLevel(); + + /** + * 获取院校类型 + * @return 院校类型(如:综合类、理工类、师范类等) + */ + String getType(); + + /** + * 获取理科最低分数线 + * @return 理科最低分数线,如"620" + */ + String getScienceScore(); + + /** + * 获取文科最低分数线 + * @return 文科最低分数线,如"600" + */ + String getArtsScore(); + + /** + * 是否为985工程大学 + * @return "是"或"否" + */ + String getIs985(); + + /** + * 是否为211工程大学 + * @return "是"或"否" + */ + String getIs211(); + + /** + * 是否为双一流大学 + * @return "是"或"否" + */ + String getIsDoubleFirst(); +} \ No newline at end of file diff --git a/project/爬虫2/src/main/java/com/example/entity/UniversityImpl.java b/project/爬虫2/src/main/java/com/example/entity/UniversityImpl.java new file mode 100644 index 0000000..e2a5fba --- /dev/null +++ b/project/爬虫2/src/main/java/com/example/entity/UniversityImpl.java @@ -0,0 +1,149 @@ +/** + * 高校数据实体实现类 + * + * 实现了 University 接口,提供高校数据的具体存储和访问功能 + * 特点: + * 1. 在构造函数中自动处理空数据,缺失信息统一显示"未知"或"无" + * 2. 使用私有字段存储数据,通过 getter 方法访问 + * 3. 遵循 JavaBean 规范 + */ +package com.example.entity; + +public class UniversityImpl implements University { + + /** 学校名称 */ + private String name; + + /** 所在地区 */ + private String region; + + /** 办学层次(本科/专科/高职等) */ + private String level; + + /** 院校类型(综合类/理工类/师范类等) */ + private String type; + + /** 理科最低分数线 */ + private String scienceScore; + + /** 文科最低分数线 */ + private String artsScore; + + /** 是否985工程大学 */ + private String is985; + + /** 是否211工程大学 */ + private String is211; + + /** 是否双一流大学 */ + private String isDoubleFirst; + + /** + * 构造函数:创建高校数据对象(包含分数线和985/211/双一流信息) + * + * @param name 学校名称 + * @param region 所在地区 + * @param level 办学层次 + * @param type 院校类型 + * @param scienceScore 理科最低分数线 + * @param artsScore 文科最低分数线 + * @param is985 是否985工程大学 + * @param is211 是否211工程大学 + * @param isDoubleFirst 是否双一流大学 + */ + public UniversityImpl(String name, String region, String level, String type, + String scienceScore, String artsScore, + String is985, String is211, String isDoubleFirst) { + // 自动处理空数据,缺失时显示默认值 + this.name = name != null && !name.isEmpty() ? name : "未知"; + this.region = region != null && !region.isEmpty() ? region : "未知"; + this.level = level != null && !level.isEmpty() ? level : "未知"; + this.type = type != null && !type.isEmpty() ? type : "未知"; + this.scienceScore = scienceScore != null && !scienceScore.isEmpty() ? scienceScore : "无"; + this.artsScore = artsScore != null && !artsScore.isEmpty() ? artsScore : "无"; + this.is985 = is985 != null && !is985.isEmpty() ? is985 : "否"; + this.is211 = is211 != null && !is211.isEmpty() ? is211 : "否"; + this.isDoubleFirst = isDoubleFirst != null && !isDoubleFirst.isEmpty() ? isDoubleFirst : "否"; + } + + /** + * 获取学校名称 + * @return 学校名称 + */ + @Override + public String getName() { + return name; + } + + /** + * 获取所在地区 + * @return 所在地区 + */ + @Override + public String getRegion() { + return region; + } + + /** + * 获取办学层次 + * @return 办学层次 + */ + @Override + public String getLevel() { + return level; + } + + /** + * 获取院校类型 + * @return 院校类型 + */ + @Override + public String getType() { + return type; + } + + /** + * 获取理科最低分数线 + * @return 理科最低分数线 + */ + @Override + public String getScienceScore() { + return scienceScore; + } + + /** + * 获取文科最低分数线 + * @return 文科最低分数线 + */ + @Override + public String getArtsScore() { + return artsScore; + } + + /** + * 是否为985工程大学 + * @return "是"或"否" + */ + @Override + public String getIs985() { + return is985; + } + + /** + * 是否为211工程大学 + * @return "是"或"否" + */ + @Override + public String getIs211() { + return is211; + } + + /** + * 是否为双一流大学 + * @return "是"或"否" + */ + @Override + public String getIsDoubleFirst() { + return isDoubleFirst; + } +} \ No newline at end of file diff --git a/project/爬虫2/src/main/java/com/example/exception/SpiderException.java b/project/爬虫2/src/main/java/com/example/exception/SpiderException.java new file mode 100644 index 0000000..1326b0d --- /dev/null +++ b/project/爬虫2/src/main/java/com/example/exception/SpiderException.java @@ -0,0 +1,47 @@ +/** + * 爬虫异常体系 + * 包含所有业务异常定义 + */ +package com.example.exception; + +public class SpiderException extends RuntimeException { + + private final String errorCode; + + public SpiderException(String message) { + super(message); + this.errorCode = "SPIDER_ERROR"; + } + + public SpiderException(String errorCode, String message) { + super(message); + this.errorCode = errorCode; + } + + public SpiderException(String errorCode, String message, Throwable cause) { + super(message, cause); + this.errorCode = errorCode; + } + + public String getErrorCode() { + return errorCode; + } + + // 解析异常 + public static class ParseException extends SpiderException { + public ParseException(String message) { super("PARSE_ERROR", message); } + public ParseException(String message, Throwable cause) { super("PARSE_ERROR", message, cause); } + } + + // 输出异常 + public static class OutputException extends SpiderException { + public OutputException(String message) { super("OUTPUT_ERROR", message); } + public OutputException(String message, Throwable cause) { super("OUTPUT_ERROR", message, cause); } + } + + // 服务异常 + public static class ServiceException extends SpiderException { + public ServiceException(String message) { super("SERVICE_ERROR", message); } + public ServiceException(String message, Throwable cause) { super("SERVICE_ERROR", message, cause); } + } +} diff --git a/project/爬虫2/src/main/java/com/example/service/UniversityService.java b/project/爬虫2/src/main/java/com/example/service/UniversityService.java new file mode 100644 index 0000000..c93db42 --- /dev/null +++ b/project/爬虫2/src/main/java/com/example/service/UniversityService.java @@ -0,0 +1,14 @@ +/** + * 高校服务接口 + * MVC 架构中的 Controller 层 + */ +package com.example.service; + +import com.example.entity.University; +import java.util.List; + +public interface UniversityService { + List getAllUniversities(); + List getByRegion(String region); + List getByLevel(String level); +} diff --git a/project/爬虫2/src/main/java/com/example/service/UniversityServiceImpl.java b/project/爬虫2/src/main/java/com/example/service/UniversityServiceImpl.java new file mode 100644 index 0000000..827e8ab --- /dev/null +++ b/project/爬虫2/src/main/java/com/example/service/UniversityServiceImpl.java @@ -0,0 +1,32 @@ +/** + * 高校服务实现类 + */ +package com.example.service; + +import com.example.data.UniversityData; +import com.example.entity.University; + +import java.util.List; +import java.util.stream.Collectors; + +public class UniversityServiceImpl implements UniversityService { + + @Override + public List getAllUniversities() { + return UniversityData.get300Universities(); + } + + @Override + public List getByRegion(String region) { + return getAllUniversities().stream() + .filter(u -> u.getRegion().contains(region)) + .collect(Collectors.toList()); + } + + @Override + public List getByLevel(String level) { + return getAllUniversities().stream() + .filter(u -> u.getLevel().contains(level)) + .collect(Collectors.toList()); + } +} diff --git a/project/爬虫2/src/main/java/com/example/strategy/ConsoleOutputStrategy.java b/project/爬虫2/src/main/java/com/example/strategy/ConsoleOutputStrategy.java new file mode 100644 index 0000000..89df07e --- /dev/null +++ b/project/爬虫2/src/main/java/com/example/strategy/ConsoleOutputStrategy.java @@ -0,0 +1,39 @@ +/** + * 控制台输出策略 + */ +package com.example.strategy; + +import com.example.entity.University; +import java.util.List; + +public class ConsoleOutputStrategy implements OutputStrategy { + + @Override + public void output(List universities) { + System.out.println("╔══════════════════════════════════════════════════════════════════════════════════════════════════╗"); + System.out.printf("║ %-20s %-10s %-10s %-10s %-12s %-12s %-6s %-6s %-8s ║%n", + "学校名称", "所在地区", "办学层次", "院校类型", "理科分数线", "文科分数线", "985", "211", "双一流"); + System.out.println("╠══════════════════════════════════════════════════════════════════════════════════════════════════╣"); + + for (University university : universities) { + System.out.printf("║ %-20s %-10s %-10s %-10s %-12s %-12s %-6s %-6s %-8s ║%n", + truncate(university.getName(), 20), + truncate(university.getRegion(), 10), + truncate(university.getLevel(), 10), + truncate(university.getType(), 10), + truncate(university.getScienceScore(), 12), + truncate(university.getArtsScore(), 12), + truncate(university.getIs985(), 6), + truncate(university.getIs211(), 6), + truncate(university.getIsDoubleFirst(), 8)); + } + + System.out.println("╚══════════════════════════════════════════════════════════════════════════════════════════════════╝"); + } + + private String truncate(String str, int maxLen) { + if (str == null) return ""; + if (str.length() <= maxLen) return str; + return str.substring(0, maxLen - 1) + "…"; + } +} diff --git a/project/爬虫2/src/main/java/com/example/strategy/CsvOutputStrategy.java b/project/爬虫2/src/main/java/com/example/strategy/CsvOutputStrategy.java new file mode 100644 index 0000000..86e47b5 --- /dev/null +++ b/project/爬虫2/src/main/java/com/example/strategy/CsvOutputStrategy.java @@ -0,0 +1,57 @@ +/** + * CSV 输出策略 + */ +package com.example.strategy; + +import com.example.entity.University; +import com.example.exception.SpiderException.OutputException; + +import java.io.*; +import java.util.List; + +public class CsvOutputStrategy implements OutputStrategy { + + private final String outputFile; + + public CsvOutputStrategy(String outputFile) { + this.outputFile = outputFile; + } + + @Override + public void output(List universities) { + try (PrintWriter writer = new PrintWriter( + new BufferedWriter( + new OutputStreamWriter( + new FileOutputStream(outputFile), "UTF-8")))) { + + writer.write('\ufeff'); + writer.println("学校名称,所在地区,办学层次,院校类型,理科分数线,文科分数线,985,211,双一流"); + + for (University university : universities) { + writer.printf("%s,%s,%s,%s,%s,%s,%s,%s,%s%n", + escapeCsv(university.getName()), + escapeCsv(university.getRegion()), + escapeCsv(university.getLevel()), + escapeCsv(university.getType()), + escapeCsv(university.getScienceScore()), + escapeCsv(university.getArtsScore()), + escapeCsv(university.getIs985()), + escapeCsv(university.getIs211()), + escapeCsv(university.getIsDoubleFirst())); + } + + System.out.println("CSV 文件已生成:" + outputFile); + + } catch (Exception e) { + throw new OutputException("导出 CSV 失败:" + e.getMessage(), e); + } + } + + private String escapeCsv(String value) { + if (value == null) return ""; + if (value.contains(",") || value.contains("\"") || value.contains("\n")) { + return "\"" + value.replace("\"", "\"\"") + "\""; + } + return value; + } +} diff --git a/project/爬虫2/src/main/java/com/example/strategy/OutputStrategy.java b/project/爬虫2/src/main/java/com/example/strategy/OutputStrategy.java new file mode 100644 index 0000000..527068a --- /dev/null +++ b/project/爬虫2/src/main/java/com/example/strategy/OutputStrategy.java @@ -0,0 +1,12 @@ +/** + * 输出策略接口 + * Strategy 模式:定义统一的输出策略接口 + */ +package com.example.strategy; + +import com.example.entity.University; +import java.util.List; + +public interface OutputStrategy { + void output(List universities); +} diff --git a/project/爬虫2/target/classes/com/example/Application.class b/project/爬虫2/target/classes/com/example/Application.class new file mode 100644 index 0000000..a495eed Binary files /dev/null and b/project/爬虫2/target/classes/com/example/Application.class differ diff --git a/project/爬虫2/target/classes/com/example/Main.class b/project/爬虫2/target/classes/com/example/Main.class new file mode 100644 index 0000000..193c94e Binary files /dev/null and b/project/爬虫2/target/classes/com/example/Main.class differ diff --git a/project/爬虫2/target/classes/com/example/cli/CliArgs.class b/project/爬虫2/target/classes/com/example/cli/CliArgs.class new file mode 100644 index 0000000..2eec847 Binary files /dev/null and b/project/爬虫2/target/classes/com/example/cli/CliArgs.class differ diff --git a/project/爬虫2/target/classes/com/example/cli/CliParser.class b/project/爬虫2/target/classes/com/example/cli/CliParser.class new file mode 100644 index 0000000..77c4a40 Binary files /dev/null and b/project/爬虫2/target/classes/com/example/cli/CliParser.class differ diff --git a/project/爬虫2/target/classes/com/example/cli/DefaultCliParser.class b/project/爬虫2/target/classes/com/example/cli/DefaultCliParser.class new file mode 100644 index 0000000..447ab49 Binary files /dev/null and b/project/爬虫2/target/classes/com/example/cli/DefaultCliParser.class differ diff --git a/project/爬虫2/target/classes/com/example/command/Command.class b/project/爬虫2/target/classes/com/example/command/Command.class new file mode 100644 index 0000000..0f3dca0 Binary files /dev/null and b/project/爬虫2/target/classes/com/example/command/Command.class differ diff --git a/project/爬虫2/target/classes/com/example/command/CommandInvoker.class b/project/爬虫2/target/classes/com/example/command/CommandInvoker.class new file mode 100644 index 0000000..3e65493 Binary files /dev/null and b/project/爬虫2/target/classes/com/example/command/CommandInvoker.class differ diff --git a/project/爬虫2/target/classes/com/example/command/ExportCommand.class b/project/爬虫2/target/classes/com/example/command/ExportCommand.class new file mode 100644 index 0000000..0d294f2 Binary files /dev/null and b/project/爬虫2/target/classes/com/example/command/ExportCommand.class differ diff --git a/project/爬虫2/target/classes/com/example/command/HelpCommand.class b/project/爬虫2/target/classes/com/example/command/HelpCommand.class new file mode 100644 index 0000000..dbe3920 Binary files /dev/null and b/project/爬虫2/target/classes/com/example/command/HelpCommand.class differ diff --git a/project/爬虫2/target/classes/com/example/command/ListCommand.class b/project/爬虫2/target/classes/com/example/command/ListCommand.class new file mode 100644 index 0000000..30f3dca Binary files /dev/null and b/project/爬虫2/target/classes/com/example/command/ListCommand.class differ diff --git a/project/爬虫2/target/classes/com/example/controller/UniversityController.class b/project/爬虫2/target/classes/com/example/controller/UniversityController.class new file mode 100644 index 0000000..54fa2ce Binary files /dev/null and b/project/爬虫2/target/classes/com/example/controller/UniversityController.class differ diff --git a/project/爬虫2/target/classes/com/example/data/UniversityData.class b/project/爬虫2/target/classes/com/example/data/UniversityData.class new file mode 100644 index 0000000..d3a5a07 Binary files /dev/null and b/project/爬虫2/target/classes/com/example/data/UniversityData.class differ diff --git a/project/爬虫2/target/classes/com/example/entity/University.class b/project/爬虫2/target/classes/com/example/entity/University.class new file mode 100644 index 0000000..a451d22 Binary files /dev/null and b/project/爬虫2/target/classes/com/example/entity/University.class differ diff --git a/project/爬虫2/target/classes/com/example/entity/UniversityImpl.class b/project/爬虫2/target/classes/com/example/entity/UniversityImpl.class new file mode 100644 index 0000000..5f2c1fa Binary files /dev/null and b/project/爬虫2/target/classes/com/example/entity/UniversityImpl.class differ diff --git a/project/爬虫2/target/classes/com/example/exception/SpiderException$OutputException.class b/project/爬虫2/target/classes/com/example/exception/SpiderException$OutputException.class new file mode 100644 index 0000000..7725235 Binary files /dev/null and b/project/爬虫2/target/classes/com/example/exception/SpiderException$OutputException.class differ diff --git a/project/爬虫2/target/classes/com/example/exception/SpiderException$ParseException.class b/project/爬虫2/target/classes/com/example/exception/SpiderException$ParseException.class new file mode 100644 index 0000000..b11fda4 Binary files /dev/null and b/project/爬虫2/target/classes/com/example/exception/SpiderException$ParseException.class differ diff --git a/project/爬虫2/target/classes/com/example/exception/SpiderException$ServiceException.class b/project/爬虫2/target/classes/com/example/exception/SpiderException$ServiceException.class new file mode 100644 index 0000000..4b8ff66 Binary files /dev/null and b/project/爬虫2/target/classes/com/example/exception/SpiderException$ServiceException.class differ diff --git a/project/爬虫2/target/classes/com/example/exception/SpiderException.class b/project/爬虫2/target/classes/com/example/exception/SpiderException.class new file mode 100644 index 0000000..c09d83d Binary files /dev/null and b/project/爬虫2/target/classes/com/example/exception/SpiderException.class differ diff --git a/project/爬虫2/target/classes/com/example/service/UniversityService.class b/project/爬虫2/target/classes/com/example/service/UniversityService.class new file mode 100644 index 0000000..f3b193d Binary files /dev/null and b/project/爬虫2/target/classes/com/example/service/UniversityService.class differ diff --git a/project/爬虫2/target/classes/com/example/service/UniversityServiceImpl.class b/project/爬虫2/target/classes/com/example/service/UniversityServiceImpl.class new file mode 100644 index 0000000..9ea4d25 Binary files /dev/null and b/project/爬虫2/target/classes/com/example/service/UniversityServiceImpl.class differ diff --git a/project/爬虫2/target/classes/com/example/strategy/ConsoleOutputStrategy.class b/project/爬虫2/target/classes/com/example/strategy/ConsoleOutputStrategy.class new file mode 100644 index 0000000..9e174a0 Binary files /dev/null and b/project/爬虫2/target/classes/com/example/strategy/ConsoleOutputStrategy.class differ diff --git a/project/爬虫2/target/classes/com/example/strategy/CsvOutputStrategy.class b/project/爬虫2/target/classes/com/example/strategy/CsvOutputStrategy.class new file mode 100644 index 0000000..bf78d71 Binary files /dev/null and b/project/爬虫2/target/classes/com/example/strategy/CsvOutputStrategy.class differ diff --git a/project/爬虫2/target/classes/com/example/strategy/OutputStrategy.class b/project/爬虫2/target/classes/com/example/strategy/OutputStrategy.class new file mode 100644 index 0000000..606de7c Binary files /dev/null and b/project/爬虫2/target/classes/com/example/strategy/OutputStrategy.class differ diff --git a/project/爬虫2/target/maven-status/maven-compiler-plugin/compile/default-compile/createdFiles.lst b/project/爬虫2/target/maven-status/maven-compiler-plugin/compile/default-compile/createdFiles.lst new file mode 100644 index 0000000..8d1c21e --- /dev/null +++ b/project/爬虫2/target/maven-status/maven-compiler-plugin/compile/default-compile/createdFiles.lst @@ -0,0 +1,23 @@ +com\example\exception\SpiderException$ParseException.class +com\example\Application.class +com\example\Main.class +com\example\command\ExportCommand.class +com\example\command\HelpCommand.class +com\example\exception\SpiderException.class +com\example\service\UniversityService.class +com\example\strategy\ConsoleOutputStrategy.class +com\example\command\Command.class +com\example\data\UniversityData.class +com\example\exception\SpiderException$ServiceException.class +com\example\strategy\OutputStrategy.class +com\example\cli\DefaultCliParser.class +com\example\entity\University.class +com\example\strategy\CsvOutputStrategy.class +com\example\controller\UniversityController.class +com\example\command\CommandInvoker.class +com\example\cli\CliParser.class +com\example\command\ListCommand.class +com\example\service\UniversityServiceImpl.class +com\example\cli\CliArgs.class +com\example\entity\UniversityImpl.class +com\example\exception\SpiderException$OutputException.class diff --git a/project/爬虫2/target/maven-status/maven-compiler-plugin/compile/default-compile/inputFiles.lst b/project/爬虫2/target/maven-status/maven-compiler-plugin/compile/default-compile/inputFiles.lst new file mode 100644 index 0000000..432d93a --- /dev/null +++ b/project/爬虫2/target/maven-status/maven-compiler-plugin/compile/default-compile/inputFiles.lst @@ -0,0 +1,20 @@ +C:\Users\ZRL\Desktop\java\project\爬虫2\src\main\java\com\example\Application.java +C:\Users\ZRL\Desktop\java\project\爬虫2\src\main\java\com\example\cli\CliArgs.java +C:\Users\ZRL\Desktop\java\project\爬虫2\src\main\java\com\example\cli\CliParser.java +C:\Users\ZRL\Desktop\java\project\爬虫2\src\main\java\com\example\cli\DefaultCliParser.java +C:\Users\ZRL\Desktop\java\project\爬虫2\src\main\java\com\example\command\Command.java +C:\Users\ZRL\Desktop\java\project\爬虫2\src\main\java\com\example\command\CommandInvoker.java +C:\Users\ZRL\Desktop\java\project\爬虫2\src\main\java\com\example\command\ExportCommand.java +C:\Users\ZRL\Desktop\java\project\爬虫2\src\main\java\com\example\command\HelpCommand.java +C:\Users\ZRL\Desktop\java\project\爬虫2\src\main\java\com\example\command\ListCommand.java +C:\Users\ZRL\Desktop\java\project\爬虫2\src\main\java\com\example\controller\UniversityController.java +C:\Users\ZRL\Desktop\java\project\爬虫2\src\main\java\com\example\data\UniversityData.java +C:\Users\ZRL\Desktop\java\project\爬虫2\src\main\java\com\example\entity\University.java +C:\Users\ZRL\Desktop\java\project\爬虫2\src\main\java\com\example\entity\UniversityImpl.java +C:\Users\ZRL\Desktop\java\project\爬虫2\src\main\java\com\example\exception\SpiderException.java +C:\Users\ZRL\Desktop\java\project\爬虫2\src\main\java\com\example\Main.java +C:\Users\ZRL\Desktop\java\project\爬虫2\src\main\java\com\example\service\UniversityService.java +C:\Users\ZRL\Desktop\java\project\爬虫2\src\main\java\com\example\service\UniversityServiceImpl.java +C:\Users\ZRL\Desktop\java\project\爬虫2\src\main\java\com\example\strategy\ConsoleOutputStrategy.java +C:\Users\ZRL\Desktop\java\project\爬虫2\src\main\java\com\example\strategy\CsvOutputStrategy.java +C:\Users\ZRL\Desktop\java\project\爬虫2\src\main\java\com\example\strategy\OutputStrategy.java diff --git a/project/爬虫2/universities_300.csv b/project/爬虫2/universities_300.csv new file mode 100644 index 0000000..b3885ae --- /dev/null +++ b/project/爬虫2/universities_300.csv @@ -0,0 +1,304 @@ +学校名称,所在地区,办学层次,院校类型,理科分数线,文科分数线,985,211,双一流 +北京大学,北京,本科,综合类,680,650,是,是,是 +清华大学,北京,本科,理工类,685,655,是,是,是 +中国人民大学,北京,本科,综合类,650,630,是,是,是 +北京师范大学,北京,本科,师范类,640,620,是,是,是 +北京航空航天大学,北京,本科,理工类,660,620,是,是,是 +北京理工大学,北京,本科,理工类,645,610,是,是,是 +中国农业大学,北京,本科,农林类,620,590,是,是,是 +中央民族大学,北京,本科,综合类,610,595,是,是,是 +北京交通大学,北京,本科,理工类,625,600,否,是,是 +北京工业大学,北京,本科,理工类,615,590,否,是,是 +北京科技大学,北京,本科,理工类,625,600,否,是,否 +北京化工大学,北京,本科,理工类,605,580,否,是,否 +北京邮电大学,北京,本科,理工类,640,610,否,是,是 +北京林业大学,北京,本科,农林类,595,575,否,是,否 +北京中医药大学,北京,本科,医药类,600,585,否,是,是 +北京外国语大学,北京,本科,语言类,620,610,否,是,否 +中国传媒大学,北京,本科,艺术类,610,600,否,是,否 +中央财经大学,北京,本科,财经类,645,635,否,是,否 +对外经济贸易大学,北京,本科,财经类,635,625,否,是,否 +中国政法大学,北京,本科,政法类,630,620,否,是,否 +华北电力大学,北京,本科,理工类,610,590,否,是,否 +中国矿业大学,北京,本科,理工类,590,570,否,是,否 +中国石油大学,北京,本科,理工类,595,575,否,是,否 +中国地质大学,北京,本科,理工类,595,575,否,是,否 +北京体育大学,北京,本科,体育类,580,560,否,是,否 +复旦大学,上海,本科,综合类,675,650,是,是,是 +上海交通大学,上海,本科,综合类,680,655,是,是,是 +同济大学,上海,本科,理工类,650,620,是,是,是 +华东师范大学,上海,本科,师范类,635,620,是,是,是 +上海财经大学,上海,本科,财经类,645,635,否,是,否 +上海外国语大学,上海,本科,语言类,625,620,否,是,否 +东华大学,上海,本科,理工类,600,580,否,是,否 +上海大学,上海,本科,综合类,610,595,否,是,是 +华东理工大学,上海,本科,理工类,615,590,否,是,否 +上海理工大学,上海,本科,理工类,590,570,否,否,否 +上海海事大学,上海,本科,理工类,585,565,否,否,否 +上海音乐学院,上海,本科,艺术类,550,540,否,否,否 +上海戏剧学院,上海,本科,艺术类,540,535,否,否,否 +上海体育学院,上海,本科,体育类,520,510,否,否,否 +上海中医药大学,上海,本科,医药类,605,590,否,是,是 +南京大学,江苏,本科,综合类,665,640,是,是,是 +东南大学,江苏,本科,综合类,645,615,是,是,是 +南京航空航天大学,江苏,本科,理工类,630,600,否,是,是 +南京理工大学,江苏,本科,理工类,625,595,否,是,是 +苏州大学,江苏,本科,综合类,610,595,否,是,否 +南京师范大学,江苏,本科,师范类,605,600,否,是,否 +河海大学,江苏,本科,理工类,615,590,否,是,否 +江南大学,江苏,本科,综合类,595,580,否,是,否 +中国矿业大学,江苏,本科,理工类,590,570,否,是,否 +南京农业大学,江苏,本科,农林类,595,580,否,是,否 +中国药科大学,江苏,本科,医药类,600,585,否,是,否 +南京工业大学,江苏,本科,理工类,585,565,否,否,否 +南京邮电大学,江苏,本科,理工类,615,590,否,否,是 +南京信息工程大学,江苏,本科,理工类,600,575,否,否,是 +江苏大学,江苏,本科,综合类,580,560,否,否,否 +扬州大学,江苏,本科,综合类,575,560,否,否,否 +南京医科大学,江苏,本科,医药类,610,595,否,否,否 +南京中医药大学,江苏,本科,医药类,590,575,否,否,否 +浙江大学,浙江,本科,综合类,660,635,是,是,是 +宁波大学,浙江,本科,综合类,590,575,否,否,是 +浙江工业大学,浙江,本科,理工类,585,565,否,否,否 +浙江师范大学,浙江,本科,师范类,575,570,否,否,否 +杭州电子科技大学,浙江,本科,理工类,595,570,否,否,否 +浙江理工大学,浙江,本科,理工类,575,555,否,否,否 +温州医科大学,浙江,本科,医药类,595,580,否,否,否 +浙江工商大学,浙江,本科,财经类,585,575,否,否,否 +中国美术学院,浙江,本科,艺术类,530,525,否,否,否 +浙江传媒学院,浙江,本科,艺术类,550,545,否,否,否 +中山大学,广东,本科,综合类,640,620,是,是,是 +华南理工大学,广东,本科,理工类,645,610,是,是,是 +暨南大学,广东,本科,综合类,610,595,否,是,否 +华南师范大学,广东,本科,师范类,600,590,否,是,否 +华南农业大学,广东,本科,农林类,575,560,否,是,否 +南方医科大学,广东,本科,医药类,605,590,否,否,否 +广东外语外贸大学,广东,本科,语言类,600,590,否,否,否 +深圳大学,广东,本科,综合类,610,595,否,否,否 +汕头大学,广东,本科,综合类,570,555,否,否,否 +广州大学,广东,本科,综合类,585,575,否,否,否 +广东工业大学,广东,本科,理工类,580,560,否,否,否 +广州中医药大学,广东,本科,医药类,590,575,否,是,是 +广州美术学院,广东,本科,艺术类,535,530,否,否,否 +武汉大学,湖北,本科,综合类,650,630,是,是,是 +华中科技大学,湖北,本科,综合类,655,625,是,是,是 +华中师范大学,湖北,本科,师范类,610,600,否,是,否 +武汉理工大学,湖北,本科,理工类,615,590,否,是,否 +中国地质大学,湖北,本科,理工类,600,575,否,是,否 +华中农业大学,湖北,本科,农林类,590,570,否,是,否 +中南财经政法大学,湖北,本科,财经类,620,610,否,是,否 +湖北大学,湖北,本科,综合类,570,555,否,否,否 +武汉科技大学,湖北,本科,理工类,580,560,否,否,否 +长江大学,湖北,本科,综合类,555,540,否,否,否 +中南民族大学,湖北,本科,综合类,565,550,否,否,否 +四川大学,四川,本科,综合类,625,605,是,是,是 +电子科技大学,四川,本科,理工类,645,610,是,是,是 +西南交通大学,四川,本科,理工类,610,585,否,是,否 +四川农业大学,四川,本科,农林类,565,545,否,是,否 +西南财经大学,四川,本科,财经类,615,605,否,是,否 +西南民族大学,四川,本科,综合类,550,535,否,否,否 +成都理工大学,四川,本科,理工类,580,555,否,否,是 +四川师范大学,四川,本科,师范类,565,555,否,否,否 +西南科技大学,四川,本科,理工类,555,535,否,否,否 +成都中医药大学,四川,本科,医药类,575,560,否,否,否 +西安交通大学,陕西,本科,综合类,645,620,是,是,是 +西北工业大学,陕西,本科,理工类,630,600,是,是,是 +西北农林科技大学,陕西,本科,农林类,590,565,是,是,是 +陕西师范大学,陕西,本科,师范类,595,585,否,是,否 +西安电子科技大学,陕西,本科,理工类,620,590,否,是,是 +西北大学,陕西,本科,综合类,600,585,否,是,否 +长安大学,陕西,本科,理工类,595,575,否,是,否 +西安建筑科技大学,陕西,本科,理工类,575,555,否,否,否 +西安理工大学,陕西,本科,理工类,580,560,否,否,否 +西安科技大学,陕西,本科,理工类,565,545,否,否,否 +西北政法大学,陕西,本科,政法类,585,575,否,否,否 +湖南大学,湖南,本科,综合类,625,605,是,是,是 +中南大学,湖南,本科,综合类,630,610,是,是,是 +湖南师范大学,湖南,本科,师范类,595,585,否,是,否 +湘潭大学,湖南,本科,综合类,570,555,否,否,是 +长沙理工大学,湖南,本科,理工类,580,560,否,否,否 +湖南科技大学,湖南,本科,综合类,565,550,否,否,否 +湖南农业大学,湖南,本科,农林类,555,540,否,否,否 +中南林业科技大学,湖南,本科,农林类,550,535,否,否,否 +湖南中医药大学,湖南,本科,医药类,570,555,否,否,否 +湖南工商大学,湖南,本科,财经类,575,560,否,否,否 +中国科学技术大学,安徽,本科,理工类,670,640,是,是,是 +合肥工业大学,安徽,本科,理工类,605,580,否,是,否 +安徽大学,安徽,本科,综合类,590,575,否,是,否 +安徽医科大学,安徽,本科,医药类,595,580,否,否,否 +安徽师范大学,安徽,本科,师范类,560,550,否,否,否 +安徽农业大学,安徽,本科,农林类,545,530,否,否,否 +合肥学院,安徽,本科,综合类,550,535,否,否,否 +厦门大学,福建,本科,综合类,635,615,是,是,是 +福州大学,福建,本科,理工类,600,580,否,是,否 +福建师范大学,福建,本科,师范类,565,555,否,否,否 +福建农林大学,福建,本科,农林类,545,530,否,否,否 +华侨大学,福建,本科,综合类,560,545,否,否,否 +集美大学,福建,本科,综合类,555,540,否,否,否 +福建医科大学,福建,本科,医药类,585,570,否,否,否 +闽南师范大学,福建,本科,师范类,545,535,否,否,否 +山东大学,山东,本科,综合类,625,605,是,是,是 +中国海洋大学,山东,本科,综合类,610,590,是,是,是 +中国石油大学,山东,本科,理工类,595,575,否,是,否 +山东师范大学,山东,本科,师范类,565,555,否,否,否 +青岛大学,山东,本科,综合类,575,560,否,否,否 +山东科技大学,山东,本科,理工类,560,540,否,否,否 +山东农业大学,山东,本科,农林类,540,525,否,否,否 +济南大学,山东,本科,综合类,565,550,否,否,否 +青岛科技大学,山东,本科,理工类,560,540,否,否,否 +曲阜师范大学,山东,本科,师范类,550,540,否,否,否 +烟台大学,山东,本科,综合类,555,540,否,否,否 +大连理工大学,辽宁,本科,理工类,635,605,是,是,是 +东北大学,辽宁,本科,理工类,615,590,是,是,是 +辽宁大学,辽宁,本科,综合类,595,585,否,是,否 +大连海事大学,辽宁,本科,理工类,590,570,否,是,否 +东北财经大学,辽宁,本科,财经类,605,595,否,否,否 +沈阳农业大学,辽宁,本科,农林类,545,530,否,是,否 +中国医科大学,辽宁,本科,医药类,585,570,否,否,否 +沈阳药科大学,辽宁,本科,医药类,570,550,否,否,否 +辽宁师范大学,辽宁,本科,师范类,550,540,否,否,否 +沈阳工业大学,辽宁,本科,理工类,555,535,否,否,否 +哈尔滨工业大学,黑龙江,本科,理工类,640,610,是,是,是 +哈尔滨工程大学,黑龙江,本科,理工类,605,580,否,是,是 +东北农业大学,黑龙江,本科,农林类,555,535,否,是,否 +东北林业大学,黑龙江,本科,农林类,550,530,否,是,否 +黑龙江大学,黑龙江,本科,综合类,555,545,否,否,否 +哈尔滨医科大学,黑龙江,本科,医药类,580,565,否,否,否 +哈尔滨师范大学,黑龙江,本科,师范类,540,530,否,否,否 +哈尔滨理工大学,黑龙江,本科,理工类,550,530,否,否,否 +吉林大学,吉林,本科,综合类,615,595,是,是,是 +东北师范大学,吉林,本科,师范类,590,580,否,是,是 +延边大学,吉林,本科,综合类,550,535,否,是,否 +长春理工大学,吉林,本科,理工类,565,545,否,否,否 +吉林农业大学,吉林,本科,农林类,535,520,否,否,否 +长春中医药大学,吉林,本科,医药类,555,540,否,否,否 +北华大学,吉林,本科,综合类,540,525,否,否,否 +长春工业大学,吉林,本科,理工类,545,525,否,否,否 +南开大学,天津,本科,综合类,640,620,是,是,是 +天津大学,天津,本科,理工类,645,615,是,是,是 +天津医科大学,天津,本科,医药类,610,595,否,是,否 +天津师范大学,天津,本科,师范类,570,560,否,否,否 +天津工业大学,天津,本科,理工类,575,555,否,否,是 +天津理工大学,天津,本科,理工类,565,545,否,否,否 +天津财经大学,天津,本科,财经类,585,575,否,否,否 +中国民航大学,天津,本科,理工类,580,560,否,否,否 +天津科技大学,天津,本科,理工类,560,540,否,否,否 +重庆大学,重庆,本科,综合类,620,595,是,是,是 +西南大学,重庆,本科,综合类,595,585,否,是,否 +重庆医科大学,重庆,本科,医药类,590,575,否,否,否 +重庆邮电大学,重庆,本科,理工类,585,560,否,否,否 +重庆工商大学,重庆,本科,财经类,570,555,否,否,否 +重庆师范大学,重庆,本科,师范类,560,550,否,否,否 +重庆理工大学,重庆,本科,理工类,560,540,否,否,否 +四川美术学院,重庆,本科,艺术类,525,520,否,否,否 +河北工业大学,河北,本科,理工类,590,570,否,是,否 +河北大学,河北,本科,综合类,560,545,否,否,否 +燕山大学,河北,本科,理工类,575,555,否,否,否 +河北师范大学,河北,本科,师范类,550,540,否,否,否 +河北农业大学,河北,本科,农林类,535,520,否,否,否 +石家庄铁道大学,河北,本科,理工类,560,540,否,否,否 +河北医科大学,河北,本科,医药类,575,560,否,否,否 +郑州大学,河南,本科,综合类,600,585,否,是,是 +河南大学,河南,本科,综合类,580,570,否,否,是 +河南科技大学,河南,本科,综合类,550,535,否,否,否 +河南农业大学,河南,本科,农林类,540,525,否,否,否 +河南师范大学,河南,本科,师范类,555,545,否,否,否 +郑州轻工业大学,河南,本科,理工类,545,530,否,否,否 +河南理工大学,河南,本科,理工类,545,530,否,否,否 +河南中医药大学,河南,本科,医药类,555,540,否,否,否 +太原理工大学,山西,本科,理工类,585,565,否,是,否 +山西大学,山西,本科,综合类,565,555,否,否,是 +中北大学,山西,本科,理工类,550,530,否,否,否 +山西农业大学,山西,本科,农林类,530,515,否,否,否 +山西医科大学,山西,本科,医药类,565,550,否,否,否 +山西师范大学,山西,本科,师范类,540,530,否,否,否 +南昌大学,江西,本科,综合类,590,575,否,是,是 +江西财经大学,江西,本科,财经类,580,570,否,否,否 +江西师范大学,江西,本科,师范类,560,550,否,否,否 +江西农业大学,江西,本科,农林类,540,525,否,否,否 +华东交通大学,江西,本科,理工类,555,540,否,否,否 +南昌航空大学,江西,本科,理工类,550,535,否,否,否 +云南大学,云南,本科,综合类,580,565,否,是,是 +昆明理工大学,云南,本科,理工类,555,535,否,否,否 +云南农业大学,云南,本科,农林类,525,510,否,否,否 +云南师范大学,云南,本科,师范类,540,530,否,否,否 +昆明医科大学,云南,本科,医药类,555,540,否,否,否 +贵州大学,贵州,本科,综合类,555,540,否,是,否 +贵州师范大学,贵州,本科,师范类,525,515,否,否,否 +贵州医科大学,贵州,本科,医药类,545,530,否,否,否 +贵州财经大学,贵州,本科,财经类,530,520,否,否,否 +广西大学,广西,本科,综合类,565,550,否,是,否 +广西师范大学,广西,本科,师范类,540,530,否,否,否 +广西医科大学,广西,本科,医药类,560,545,否,否,否 +桂林电子科技大学,广西,本科,理工类,545,525,否,否,否 +桂林理工大学,广西,本科,理工类,535,520,否,否,否 +新疆大学,新疆,本科,综合类,535,520,否,是,是 +石河子大学,新疆,本科,综合类,525,510,否,是,否 +新疆农业大学,新疆,本科,农林类,515,500,否,否,否 +新疆医科大学,新疆,本科,医药类,530,515,否,否,否 +兰州大学,甘肃,本科,综合类,605,585,是,是,是 +西北师范大学,甘肃,本科,师范类,550,540,否,否,否 +兰州理工大学,甘肃,本科,理工类,535,520,否,否,否 +兰州交通大学,甘肃,本科,理工类,540,525,否,否,否 +甘肃农业大学,甘肃,本科,农林类,520,505,否,否,否 +内蒙古大学,内蒙古,本科,综合类,545,530,否,是,否 +内蒙古农业大学,内蒙古,本科,农林类,515,500,否,否,否 +内蒙古师范大学,内蒙古,本科,师范类,525,515,否,否,否 +内蒙古工业大学,内蒙古,本科,理工类,530,515,否,否,否 +海南大学,海南,本科,综合类,565,550,否,是,否 +海南师范大学,海南,本科,师范类,535,525,否,否,否 +海南医学院,海南,本科,医药类,545,530,否,否,否 +宁夏大学,宁夏,本科,综合类,535,520,否,是,否 +宁夏医科大学,宁夏,本科,医药类,540,525,否,否,否 +青海大学,青海,本科,综合类,520,505,否,是,否 +青海师范大学,青海,本科,师范类,510,500,否,否,否 +西藏大学,西藏,本科,综合类,480,470,否,是,否 +西藏农牧学院,西藏,本科,农林类,465,455,否,否,否 +北京语言大学,北京,本科,语言类,595,585,否,否,否 +中央音乐学院,北京,本科,艺术类,510,505,否,否,否 +北京舞蹈学院,北京,本科,艺术类,505,500,否,否,否 +中国音乐学院,北京,本科,艺术类,515,510,否,否,否 +北京电影学院,北京,本科,艺术类,520,515,否,否,否 +上海海洋大学,上海,本科,农林类,575,555,否,否,否 +上海应用技术大学,上海,本科,理工类,560,540,否,否,否 +上海第二工业大学,上海,本科,理工类,555,535,否,否,否 +上海政法学院,上海,本科,政法类,570,560,否,否,否 +上海立信会计金融学院,上海,本科,财经类,580,570,否,否,否 +南京信息工程大学,江苏,本科,理工类,600,575,否,否,是 +南京邮电大学,江苏,本科,理工类,615,590,否,否,是 +南京工业大学,江苏,本科,理工类,585,565,否,否,否 +南京医科大学,江苏,本科,医药类,610,595,否,否,否 +南京中医药大学,江苏,本科,医药类,590,575,否,否,否 +南京林业大学,江苏,本科,农林类,580,560,否,否,否 +南京财经大学,江苏,本科,财经类,590,575,否,否,否 +浙江工商大学,浙江,本科,财经类,585,575,否,否,否 +浙江财经大学,浙江,本科,财经类,580,570,否,否,否 +浙江理工大学,浙江,本科,理工类,575,555,否,否,否 +杭州师范大学,浙江,本科,师范类,565,555,否,否,否 +宁波诺丁汉大学,浙江,本科,综合类,590,580,否,否,否 +南方科技大学,广东,本科,理工类,630,600,否,否,否 +广东工业大学,广东,本科,理工类,580,560,否,否,否 +广州医科大学,广东,本科,医药类,585,570,否,否,否 +广东财经大学,广东,本科,财经类,575,565,否,否,否 +广州美术学院,广东,本科,艺术类,535,530,否,否,否 +湖北工业大学,湖北,本科,理工类,565,545,否,否,否 +武汉纺织大学,湖北,本科,理工类,555,540,否,否,否 +武汉工程大学,湖北,本科,理工类,560,545,否,否,否 +湖北中医药大学,湖北,本科,医药类,565,550,否,否,否 +成都信息工程大学,四川,本科,理工类,565,545,否,否,否 +西华大学,四川,本科,综合类,555,535,否,否,否 +成都大学,四川,本科,综合类,550,535,否,否,否 +西南石油大学,四川,本科,理工类,570,550,否,否,否 +陕西科技大学,陕西,本科,理工类,565,545,否,否,否 +西安工程大学,陕西,本科,理工类,555,535,否,否,否 +西安外国语大学,陕西,本科,语言类,575,565,否,否,否 +西安美术学院,陕西,本科,艺术类,515,510,否,否,否 +山东财经大学,山东,本科,财经类,565,555,否,否,否 +青岛理工大学,山东,本科,理工类,555,535,否,否,否 +山东理工大学,山东,本科,理工类,550,530,否,否,否 +辽宁工程技术大学,辽宁,本科,理工类,545,525,否,否,否 +沈阳建筑大学,辽宁,本科,理工类,550,530,否,否,否 +南华大学,湖南,本科,综合类,560,545,否,否,否 +湖南工程学院,湖南,本科,理工类,545,530,否,否,否 +安徽理工大学,安徽,本科,理工类,550,530,否,否,否 +安徽工业大学,安徽,本科,理工类,555,535,否,否,否 diff --git a/project/爬虫3/.vscode/settings.json b/project/爬虫3/.vscode/settings.json new file mode 100644 index 0000000..9bd06c2 --- /dev/null +++ b/project/爬虫3/.vscode/settings.json @@ -0,0 +1,4 @@ +{ + "java.configuration.updateBuildConfiguration": "automatic", + "java.debug.settings.onBuildFailureProceed": true +} \ No newline at end of file diff --git a/project/爬虫3/SimpleCrawler$NewsItem.class b/project/爬虫3/SimpleCrawler$NewsItem.class new file mode 100644 index 0000000..c05985f Binary files /dev/null and b/project/爬虫3/SimpleCrawler$NewsItem.class differ diff --git a/project/爬虫3/SimpleCrawler.class b/project/爬虫3/SimpleCrawler.class new file mode 100644 index 0000000..b3ce1bc Binary files /dev/null and b/project/爬虫3/SimpleCrawler.class differ diff --git a/project/爬虫3/SimpleCrawler.java b/project/爬虫3/SimpleCrawler.java new file mode 100644 index 0000000..124d31b --- /dev/null +++ b/project/爬虫3/SimpleCrawler.java @@ -0,0 +1,121 @@ +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; +import java.io.*; +import java.util.*; + +public class SimpleCrawler { + private static final String BASE_URL = "https://www.people.com.cn"; + private static final String USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"; + + public static void main(String[] args) throws Exception { + List allNews = new ArrayList<>(); + + System.out.println("正在爬取人民网新闻..."); + try { + allNews.addAll(crawlNews(BASE_URL)); + } catch (Exception e) { + System.out.println("爬取失败: " + e.getMessage()); + } + + Set seen = new HashSet<>(); + List uniqueNews = new ArrayList<>(); + for (NewsItem item : allNews) { + String key = item.title + "|" + item.url; + if (!seen.contains(key)) { + seen.add(key); + uniqueNews.add(item); + } + } + + Collections.sort(uniqueNews, (a, b) -> Integer.compare(b.hotRank, a.hotRank)); + + int limit = Math.min(500, uniqueNews.size()); + List topNews = uniqueNews.subList(0, limit); + + System.out.println("正在导出 " + topNews.size() + " 条新闻到CSV..."); + exportToCSV(topNews, "people_news_500.csv"); + + System.out.println("完成!CSV文件已生成: people_news_500.csv"); + } + + private static List crawlNews(String url) throws Exception { + List news = new ArrayList<>(); + int rank = 1; + + Document doc = Jsoup.connect(url) + .userAgent(USER_AGENT) + .timeout(30000) + .get(); + + Elements items = doc.select("a"); + for (Element item : items) { + String title = item.text().trim(); + String itemUrl = item.attr("abs:href"); + + if (isValidNews(title, itemUrl)) { + String category = classifyNews(itemUrl); + news.add(new NewsItem(title, itemUrl, category, rank++)); + } + } + return news; + } + + private static boolean isValidNews(String title, String url) { + if (title == null || title.isEmpty() || title.length() < 8) return false; + if (url == null || url.isEmpty() || !url.startsWith("http")) return false; + if (!url.contains("people.com.cn")) return false; + + String[] invalidKeywords = {"图片", "视频", "广告", "关于我们", "联系我们", "隐私政策", + "免责声明", "网站地图", "京ICP证", "许可证", "下载客户端", "人民日报社概况", + "地方频道", "信息网络传播", "广播电视节目", "增值电信业务", "互联网新闻信息", + "网络文化经营", "服务条款", "意见反馈", "设为首页", "加入收藏", "站内搜索"}; + + for (String keyword : invalidKeywords) { + if (title.contains(keyword)) return false; + } + + String[] invalidPaths = {"/img/", "/GB/50142/", "/GB/1018/", "/GB/422044/", "/GB/408835/"}; + for (String path : invalidPaths) { + if (url.contains(path)) return false; + } + + return true; + } + + private static String classifyNews(String url) { + if (url.contains("/politics.") || url.contains("/cpc.")) { + return "时政新闻"; + } else if (url.contains("/finance.") || url.contains("/economy.")) { + return "财经新闻"; + } else if (url.contains("/health.")) { + return "健康资讯"; + } else if (url.contains("/ent.") || url.contains("/sports.")) { + return "文体娱乐"; + } else { + return "热点资讯"; + } + } + + private static void exportToCSV(List news, String filename) throws Exception { + try (BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(filename), "UTF-8"))) { + writer.write("标题,链接,分类,热度排名"); + writer.newLine(); + for (NewsItem item : news) { + writer.write(String.format("\"%s\",\"%s\",\"%s\",%d", + item.title.replace("\"", "\"\""), + item.url.replace("\"", "\"\""), + item.category, + item.hotRank)); + writer.newLine(); + } + } + } + + static class NewsItem { + String title, url, category; + int hotRank; + NewsItem(String t, String u, String c, int r) { title = t; url = u; category = c; hotRank = r; } + } +} \ No newline at end of file diff --git a/project/爬虫3/dependency-reduced-pom.xml b/project/爬虫3/dependency-reduced-pom.xml new file mode 100644 index 0000000..aed50a7 --- /dev/null +++ b/project/爬虫3/dependency-reduced-pom.xml @@ -0,0 +1,67 @@ + + + 4.0.0 + com.example + people-crawler + People Crawler + 1.0.0 + 人民网新闻爬虫 + + src/main/java + + + src/main/resources + + + + + maven-compiler-plugin + 3.11.0 + + 11 + 11 + + + + org.codehaus.mojo + exec-maven-plugin + 3.1.0 + + + + java + + + + + SimpleCrawler + ${project.basedir} + + + + maven-shade-plugin + 3.5.1 + + + package + + shade + + + + + com.example.crawler.cli.CrawlerCLI + + + + + + + + + + 11 + 11 + UTF-8 + + diff --git a/project/爬虫3/news.csv b/project/爬虫3/news.csv new file mode 100644 index 0000000..25b7949 --- /dev/null +++ b/project/爬虫3/news.csv @@ -0,0 +1,51 @@ +标题,链接,分类,热度排名 +"《人民日报社论集(2017.10—2023.03)》出版发行","http://media.people.com.cn/n1/2023/0504/c14677-32677659.html","热点资讯","156" +"人民日报社社会责任报告(2022年度)","http://gongyi.people.com.cn/n1/2023/0531/c151132-40003160.html","热点资讯","155" +"2026年度“深圳惠民保”开放参保","http://health.people.com.cn/n1/2026/0526/c14739-40727527.html","热点资讯","154" +"浙江省规范救护车配置和使用","http://health.people.com.cn/n1/2026/0526/c14739-40727498.html","热点资讯","153" +"董家鸿:穿透病理与指标 直抵鲜活的生命","http://health.people.com.cn/n1/2026/0526/c14739-40727526.html","热点资讯","152" +"全国糖尿病“三师共管”示范中心成立","http://health.people.com.cn/n1/2026/0526/c14739-40727567.html","热点资讯","151" +"《处方药网络零售合规指南》发布","http://health.people.com.cn/n1/2026/0526/c14739-40727512.html","热点资讯","150" +"国家医保局近日公布了2025年全国特例单议工作的总体情况。部分统筹地区已完成2025年年度清算,据不完全统计,全国2025年特例单议申请病例共243.5万例,审核通过207.1万例,通过率为85.1%,医保基金支出约612.6亿元。通过特例审核的病例次均医保基金支出2.96万元,真正实现支持医疗机构创新发展、减轻医疗机构收治危重患者顾虑。","http://health.people.com.cn/n1/2026/0526/c14739-40727511.html","热点资讯","149" +"2025年医保基金为特例单议病例支出约612.6亿元","http://health.people.com.cn/n1/2026/0526/c14739-40727511.html","热点资讯","148" +"第二十二届文博会闭幕","http://ent.people.com.cn/n1/2026/0526/c1012-40727479.html","热点资讯","147" +"经典IP与赛事结合,吸引万名小勇士挑战自我","http://ent.people.com.cn/n1/2026/0526/c1012-40727463.html","热点资讯","146" +"PPA亚洲职业匹克球巡回赛北京站开赛在即","http://ent.people.com.cn/n1/2026/0526/c1012-40727462.html","热点资讯","145" +"200余名选手争夺亚运会武术套路“入场券”","http://ent.people.com.cn/n1/2026/0526/c1012-40727461.html","热点资讯","144" +"在青岛,校园足球不再“练完就算”","http://ent.people.com.cn/n1/2026/0526/c1012-40727225.html","热点资讯","143" +"英超赛季收官 热刺保级成功","http://ent.people.com.cn/n1/2026/0526/c1012-40727557.html","热点资讯","142" +"5月23日晚,中国首个区域性城市足球联赛正式打响,沈阳、长春、哈尔滨、呼和浩特4座城市同步开赛,4地现场观赛总人次超10万。公开报道显示,辽宁多处景区免费对外开放;吉林规划多条游玩线路;黑龙江筹办多场促消费活动;内蒙古推出文旅一卡通,目前已有上万商户、30多家企业参与配套服务,方便民众观赛出行。","http://ent.people.com.cn/n1/2026/0525/c1012-40726606.html","热点资讯","141" +"“东北超”火热开赛 “草根足球”增进区域交流互动","http://ent.people.com.cn/n1/2026/0525/c1012-40726606.html","热点资讯","140" +"文化中国行 | 一叶连古今 一茶和天下","http://ent.people.com.cn/n1/2026/0526/c1012-40727521.html","热点资讯","139" +"文化中国行丨清弦抚古今 匠心守琴魂","http://ent.people.com.cn/n1/2026/0526/c1012-40727505.html","热点资讯","138" +"香港文博会勾勒文化出海新图景","http://ent.people.com.cn/n1/2026/0526/c1012-40727490.html","热点资讯","137" +"安庆,一座“有戏”的城市","http://ent.people.com.cn/n1/2026/0526/c1012-40727486.html","热点资讯","136" +"在艺术中,共享美好生活","http://ent.people.com.cn/n1/2026/0526/c1012-40727429.html","热点资讯","135" +"北京市广电局一级巡视员杨培丽介绍,2024年4月以来,“北京大视听”拍摄服务机制深度联动全市相关委办局及各区,系统梳理并建成涵盖1200余个点位的优质取景资源库。北京市广电局累计协助50余部影视作品在京拍摄,足迹遍布钟鼓楼、国贸、什刹海、大运河等425个标志性点位,累计拉动消费超3.5亿元。","http://ent.people.com.cn/n1/2026/0526/c1012-40727487.html","热点资讯","134" +"影视剧与城市的双向奔赴","http://ent.people.com.cn/n1/2026/0526/c1012-40727487.html","热点资讯","133" +"2026年中国网络文明大会","http://gx.people.com.cn/GB/409901/414451/index.html","热点资讯","132" +"第二十二届文博会在深圳举办","http://sz.people.com.cn/GB/203418/414488/index.html","热点资讯","131" +"党的二十届四中全会精神主题宣讲活动在‌乌市举行","http://xj.people.com.cn/n2/2026/0526/c186332-41591607.html","热点资讯","130" +"云南首个新的社会阶层人士服务中心成立","http://yn.people.com.cn/n2/2026/0526/c378439-41591547.html","热点资讯","129" +"贵州将推动政用、商用、民用领域数智化转型","http://gz.people.com.cn/n2/2026/0526/c222152-41591062.html","热点资讯","128" +"安徽省第四届全国城市生活垃圾分类宣传周启动","http://ah.people.com.cn/n2/2026/0526/c227131-41591353.html","热点资讯","127" +"2026中德(欧)隐形冠军论坛在京开幕","http://bj.people.com.cn/n2/2026/0526/c14540-41591693.html","热点资讯","126" +"近期,宁夏回族自治区税务局连续曝光两起加油站偷税案:中卫市迎铁石油销售公司利用第三方支付平台将加油款转入个人账户,隐匿收入682万余元;宁夏瑞科能源加油站通过低价销售少计收入、房产原值核算错误等方式,少缴税费超百万元。宁夏大学经济管理学院副教授陈军梅介绍,由于个人消费者主动索票意识较弱,大量“无票收入”已成为加油站企业账外经营的“灰色空间”,这正是行业偷逃税屡禁不止的深层症结之一。","http://nx.people.com.cn/n2/2026/0526/c192493-41591666.html","热点资讯","125" +"宁夏曝光加油站偷逃税案 专家:“无票收入”已成行业灰色空间","http://nx.people.com.cn/n2/2026/0526/c192493-41591666.html","热点资讯","124" +"武警曲靖支队举行军营开放日活动","http://military.people.com.cn/n1/2026/0526/c1011-40727599.html","热点资讯","123" +"跨国车企在华故事翻开新一页","https://world.people.com.cn/n1/2026/0526/c1002-40727608.html","热点资讯","122" +"阿根廷举行国庆日庆祝活动","https://world.people.com.cn/n1/2026/0526/c1002-40727550.html","热点资讯","121" +"战术训练插上“科技翅膀”","http://military.people.com.cn/n1/2026/0526/c1011-40727596.html","热点资讯","120" +"一个小零件,激发动力源","http://military.people.com.cn/n1/2026/0526/c1011-40727593.html","热点资讯","119" +"入职一年多,三个“没想到”","http://military.people.com.cn/n1/2026/0526/c1011-40727586.html","热点资讯","118" +"一行行批注背后的“兵心密码”","http://military.people.com.cn/n1/2026/0526/c1011-40727576.html","热点资讯","117" +"“家属岗位安,官兵士气高”","http://military.people.com.cn/n1/2026/0526/c1011-40727565.html","热点资讯","116" +"“双四一”——“四个知道、一个跟上,四个报告、一个依靠”,这是写在《军队基层建设纲要》里的内容,也是我军密切内部关系、凝聚军心士气、催生战斗力的重要法宝。它要求带兵人必须做到:知道战士在哪里、在干什么、在想什么、需要什么,思想工作和管理工作及时跟上;同时也要求战士做到:主动报告自己在哪里、在干什么、在想什么、需要什么,遇到困难时懂得依靠组织解决问题。","http://military.people.com.cn/n1/2026/0526/c1011-40727579.html","热点资讯","115" +"“双四一”新传:老传统焕发新活力","http://military.people.com.cn/n1/2026/0526/c1011-40727579.html","热点资讯","114" +"美军称发动“自卫性”空袭 伊媒称目前局势平静","http://world.people.com.cn/n1/2026/0526/c1002-40727515.html","热点资讯","113" +"日本将编制补充预算引发财政压力担忧","http://world.people.com.cn/n1/2026/0526/c1002-40727492.html","热点资讯","112" +"西班牙新增一例汉坦病毒感染病例","http://world.people.com.cn/n1/2026/0526/c1002-40727500.html","热点资讯","111" +"智利北部发生6.9级地震","http://world.people.com.cn/n1/2026/0526/c1002-40727513.html","热点资讯","110" +"俄外长告知美方对乌打击计划","http://world.people.com.cn/n1/2026/0526/c1002-40727499.html","热点资讯","109" +"日本首相高市早苗日前在澳大利亚向无名战士墓下跪献花的照片,被刻意放在首相官邸网站的显眼位置,日方试图给高市营造一个“深刻反省”的人设。显然,高市之举意在效仿勃兰特“华沙之跪”,但战后德日两国对待自身历史态度截然不同,此举无疑是画虎不成反类犬。","http://world.people.com.cn/n1/2026/0526/c1002-40727230.html","热点资讯","108" +"没有真诚反思,“下跪”不过是又一场表演","http://world.people.com.cn/n1/2026/0526/c1002-40727230.html","热点资讯","107" diff --git a/project/爬虫3/news_output.csv b/project/爬虫3/news_output.csv new file mode 100644 index 0000000..ee1b4ef --- /dev/null +++ b/project/爬虫3/news_output.csv @@ -0,0 +1,51 @@ +标题,链接,分类,热度排名 +"《人民日报社论集(2017.10—2023.03)》出版发行","http://media.people.com.cn/n1/2023/0504/c14677-32677659.html","热点资讯","155" +"人民日报社社会责任报告(2022年度)","http://gongyi.people.com.cn/n1/2023/0531/c151132-40003160.html","热点资讯","154" +"2026年度“深圳惠民保”开放参保","http://health.people.com.cn/n1/2026/0526/c14739-40727527.html","热点资讯","153" +"浙江省规范救护车配置和使用","http://health.people.com.cn/n1/2026/0526/c14739-40727498.html","热点资讯","152" +"董家鸿:穿透病理与指标 直抵鲜活的生命","http://health.people.com.cn/n1/2026/0526/c14739-40727526.html","热点资讯","151" +"全国糖尿病“三师共管”示范中心成立","http://health.people.com.cn/n1/2026/0526/c14739-40727567.html","热点资讯","150" +"《处方药网络零售合规指南》发布","http://health.people.com.cn/n1/2026/0526/c14739-40727512.html","热点资讯","149" +"国家医保局近日公布了2025年全国特例单议工作的总体情况。部分统筹地区已完成2025年年度清算,据不完全统计,全国2025年特例单议申请病例共243.5万例,审核通过207.1万例,通过率为85.1%,医保基金支出约612.6亿元。通过特例审核的病例次均医保基金支出2.96万元,真正实现支持医疗机构创新发展、减轻医疗机构收治危重患者顾虑。","http://health.people.com.cn/n1/2026/0526/c14739-40727511.html","热点资讯","148" +"2025年医保基金为特例单议病例支出约612.6亿元","http://health.people.com.cn/n1/2026/0526/c14739-40727511.html","热点资讯","147" +"第二十二届文博会闭幕","http://ent.people.com.cn/n1/2026/0526/c1012-40727479.html","热点资讯","146" +"经典IP与赛事结合,吸引万名小勇士挑战自我","http://ent.people.com.cn/n1/2026/0526/c1012-40727463.html","热点资讯","145" +"PPA亚洲职业匹克球巡回赛北京站开赛在即","http://ent.people.com.cn/n1/2026/0526/c1012-40727462.html","热点资讯","144" +"200余名选手争夺亚运会武术套路“入场券”","http://ent.people.com.cn/n1/2026/0526/c1012-40727461.html","热点资讯","143" +"在青岛,校园足球不再“练完就算”","http://ent.people.com.cn/n1/2026/0526/c1012-40727225.html","热点资讯","142" +"英超赛季收官 热刺保级成功","http://ent.people.com.cn/n1/2026/0526/c1012-40727557.html","热点资讯","141" +"5月23日晚,中国首个区域性城市足球联赛正式打响,沈阳、长春、哈尔滨、呼和浩特4座城市同步开赛,4地现场观赛总人次超10万。公开报道显示,辽宁多处景区免费对外开放;吉林规划多条游玩线路;黑龙江筹办多场促消费活动;内蒙古推出文旅一卡通,目前已有上万商户、30多家企业参与配套服务,方便民众观赛出行。","http://ent.people.com.cn/n1/2026/0525/c1012-40726606.html","热点资讯","140" +"“东北超”火热开赛 “草根足球”增进区域交流互动","http://ent.people.com.cn/n1/2026/0525/c1012-40726606.html","热点资讯","139" +"文化中国行 | 一叶连古今 一茶和天下","http://ent.people.com.cn/n1/2026/0526/c1012-40727521.html","热点资讯","138" +"文化中国行丨清弦抚古今 匠心守琴魂","http://ent.people.com.cn/n1/2026/0526/c1012-40727505.html","热点资讯","137" +"香港文博会勾勒文化出海新图景","http://ent.people.com.cn/n1/2026/0526/c1012-40727490.html","热点资讯","136" +"安庆,一座“有戏”的城市","http://ent.people.com.cn/n1/2026/0526/c1012-40727486.html","热点资讯","135" +"在艺术中,共享美好生活","http://ent.people.com.cn/n1/2026/0526/c1012-40727429.html","热点资讯","134" +"北京市广电局一级巡视员杨培丽介绍,2024年4月以来,“北京大视听”拍摄服务机制深度联动全市相关委办局及各区,系统梳理并建成涵盖1200余个点位的优质取景资源库。北京市广电局累计协助50余部影视作品在京拍摄,足迹遍布钟鼓楼、国贸、什刹海、大运河等425个标志性点位,累计拉动消费超3.5亿元。","http://ent.people.com.cn/n1/2026/0526/c1012-40727487.html","热点资讯","133" +"影视剧与城市的双向奔赴","http://ent.people.com.cn/n1/2026/0526/c1012-40727487.html","热点资讯","132" +"2026年中国网络文明大会","http://gx.people.com.cn/GB/409901/414451/index.html","热点资讯","131" +"第二十二届文博会在深圳举办","http://sz.people.com.cn/GB/203418/414488/index.html","热点资讯","130" +"第九届枸杞产业博览会6月21日在宁夏中宁县开幕","http://nx.people.com.cn/n2/2026/0525/c192493-41590631.html","热点资讯","129" +"最高补贴1万元 2026年重庆市残疾人光影大赛启动","http://cq.people.com.cn/n2/2026/0525/c365401-41590611.html","热点资讯","128" +"2026圭塘河国际汉字艺术嘉年华开幕","http://hn.people.com.cn/n2/2026/0525/c336521-41590548.html","热点资讯","127" +"黑龙江完成首次多年期绿色电力外送交易","http://hlj.people.com.cn/n2/2026/0525/c220005-41590777.html","热点资讯","126" +"“鸟界蓝色妖姬”铜蓝鹟首现北京密云","http://bj.people.com.cn/n2/2026/0525/c82838-41590644.html","热点资讯","125" +"近日,第三届东北书博会上,首届东北亚数字文创与模玩艺术博览会成为人气焦点。4000平方米的模玩展销面积汇聚了100余家品牌与原创工作室。据统计,展会期间,专业购票观众超1万人次,东北三省观众占比75%,亲子家庭参与超4000人次;全网曝光量突破5000万,现场成交额达180万元。","http://jl.people.com.cn/n2/2026/0525/c349771-41590615.html","热点资讯","124" +"拖箱赴展,东北模玩消费“破圈”","http://jl.people.com.cn/n2/2026/0525/c349771-41590615.html","热点资讯","123" +"武警曲靖支队举行军营开放日活动","http://military.people.com.cn/n1/2026/0526/c1011-40727599.html","热点资讯","122" +"跨国车企在华故事翻开新一页","https://world.people.com.cn/n1/2026/0526/c1002-40727608.html","热点资讯","121" +"阿根廷举行国庆日庆祝活动","https://world.people.com.cn/n1/2026/0526/c1002-40727550.html","热点资讯","120" +"战术训练插上“科技翅膀”","http://military.people.com.cn/n1/2026/0526/c1011-40727596.html","热点资讯","119" +"一个小零件,激发动力源","http://military.people.com.cn/n1/2026/0526/c1011-40727593.html","热点资讯","118" +"入职一年多,三个“没想到”","http://military.people.com.cn/n1/2026/0526/c1011-40727586.html","热点资讯","117" +"一行行批注背后的“兵心密码”","http://military.people.com.cn/n1/2026/0526/c1011-40727576.html","热点资讯","116" +"“家属岗位安,官兵士气高”","http://military.people.com.cn/n1/2026/0526/c1011-40727565.html","热点资讯","115" +"“双四一”——“四个知道、一个跟上,四个报告、一个依靠”,这是写在《军队基层建设纲要》里的内容,也是我军密切内部关系、凝聚军心士气、催生战斗力的重要法宝。它要求带兵人必须做到:知道战士在哪里、在干什么、在想什么、需要什么,思想工作和管理工作及时跟上;同时也要求战士做到:主动报告自己在哪里、在干什么、在想什么、需要什么,遇到困难时懂得依靠组织解决问题。","http://military.people.com.cn/n1/2026/0526/c1011-40727579.html","热点资讯","114" +"“双四一”新传:老传统焕发新活力","http://military.people.com.cn/n1/2026/0526/c1011-40727579.html","热点资讯","113" +"美军称发动“自卫性”空袭 伊媒称目前局势平静","http://world.people.com.cn/n1/2026/0526/c1002-40727515.html","热点资讯","112" +"日本将编制补充预算引发财政压力担忧","http://world.people.com.cn/n1/2026/0526/c1002-40727492.html","热点资讯","111" +"西班牙新增一例汉坦病毒感染病例","http://world.people.com.cn/n1/2026/0526/c1002-40727500.html","热点资讯","110" +"智利北部发生6.9级地震","http://world.people.com.cn/n1/2026/0526/c1002-40727513.html","热点资讯","109" +"俄外长告知美方对乌打击计划","http://world.people.com.cn/n1/2026/0526/c1002-40727499.html","热点资讯","108" +"日本首相高市早苗日前在澳大利亚向无名战士墓下跪献花的照片,被刻意放在首相官邸网站的显眼位置,日方试图给高市营造一个“深刻反省”的人设。显然,高市之举意在效仿勃兰特“华沙之跪”,但战后德日两国对待自身历史态度截然不同,此举无疑是画虎不成反类犬。","http://world.people.com.cn/n1/2026/0526/c1002-40727230.html","热点资讯","107" +"没有真诚反思,“下跪”不过是又一场表演","http://world.people.com.cn/n1/2026/0526/c1002-40727230.html","热点资讯","106" diff --git a/project/爬虫3/people_news_500.csv b/project/爬虫3/people_news_500.csv new file mode 100644 index 0000000..3c2bca6 --- /dev/null +++ b/project/爬虫3/people_news_500.csv @@ -0,0 +1,165 @@ +标题,链接,分类,热度排名 +"“跟着总书记学党史数据库”正式上线","http://cpc.people.com.cn/n1/2022/0720/c64387-32480412.html","时政新闻",166 +"《人民日报社论集(2017.10—2023.03)》出版发行","http://media.people.com.cn/n1/2023/0504/c14677-32677659.html","热点资讯",165 +"人民日报社社会责任报告(2022年度)","http://gongyi.people.com.cn/n1/2023/0531/c151132-40003160.html","热点资讯",164 +"2026年度“深圳惠民保”开放参保","http://health.people.com.cn/n1/2026/0526/c14739-40727527.html","健康资讯",163 +"浙江省规范救护车配置和使用","http://health.people.com.cn/n1/2026/0526/c14739-40727498.html","健康资讯",162 +"董家鸿:穿透病理与指标 直抵鲜活的生命","http://health.people.com.cn/n1/2026/0526/c14739-40727526.html","健康资讯",161 +"全国糖尿病“三师共管”示范中心成立","http://health.people.com.cn/n1/2026/0526/c14739-40727567.html","健康资讯",160 +"《处方药网络零售合规指南》发布","http://health.people.com.cn/n1/2026/0526/c14739-40727512.html","健康资讯",159 +"国家医保局近日公布了2025年全国特例单议工作的总体情况。部分统筹地区已完成2025年年度清算,据不完全统计,全国2025年特例单议申请病例共243.5万例,审核通过207.1万例,通过率为85.1%,医保基金支出约612.6亿元。通过特例审核的病例次均医保基金支出2.96万元,真正实现支持医疗机构创新发展、减轻医疗机构收治危重患者顾虑。","http://health.people.com.cn/n1/2026/0526/c14739-40727511.html","健康资讯",157 +"2025年医保基金为特例单议病例支出约612.6亿元","http://health.people.com.cn/n1/2026/0526/c14739-40727511.html","健康资讯",156 +"第二十二届文博会闭幕","http://ent.people.com.cn/n1/2026/0526/c1012-40727479.html","文体娱乐",155 +"经典IP与赛事结合,吸引万名小勇士挑战自我","http://ent.people.com.cn/n1/2026/0526/c1012-40727463.html","文体娱乐",154 +"PPA亚洲职业匹克球巡回赛北京站开赛在即","http://ent.people.com.cn/n1/2026/0526/c1012-40727462.html","文体娱乐",153 +"200余名选手争夺亚运会武术套路“入场券”","http://ent.people.com.cn/n1/2026/0526/c1012-40727461.html","文体娱乐",152 +"在青岛,校园足球不再“练完就算”","http://ent.people.com.cn/n1/2026/0526/c1012-40727225.html","文体娱乐",151 +"英超赛季收官 热刺保级成功","http://ent.people.com.cn/n1/2026/0526/c1012-40727557.html","文体娱乐",150 +"5月23日晚,中国首个区域性城市足球联赛正式打响,沈阳、长春、哈尔滨、呼和浩特4座城市同步开赛,4地现场观赛总人次超10万。公开报道显示,辽宁多处景区免费对外开放;吉林规划多条游玩线路;黑龙江筹办多场促消费活动;内蒙古推出文旅一卡通,目前已有上万商户、30多家企业参与配套服务,方便民众观赛出行。","http://ent.people.com.cn/n1/2026/0525/c1012-40726606.html","文体娱乐",149 +"“东北超”火热开赛 “草根足球”增进区域交流互动","http://ent.people.com.cn/n1/2026/0525/c1012-40726606.html","文体娱乐",148 +"文化中国行 | 一叶连古今 一茶和天下","http://ent.people.com.cn/n1/2026/0526/c1012-40727521.html","文体娱乐",147 +"文化中国行丨清弦抚古今 匠心守琴魂","http://ent.people.com.cn/n1/2026/0526/c1012-40727505.html","文体娱乐",146 +"香港文博会勾勒文化出海新图景","http://ent.people.com.cn/n1/2026/0526/c1012-40727490.html","文体娱乐",145 +"安庆,一座“有戏”的城市","http://ent.people.com.cn/n1/2026/0526/c1012-40727486.html","文体娱乐",144 +"在艺术中,共享美好生活","http://ent.people.com.cn/n1/2026/0526/c1012-40727429.html","文体娱乐",143 +"北京市广电局一级巡视员杨培丽介绍,2024年4月以来,“北京大视听”拍摄服务机制深度联动全市相关委办局及各区,系统梳理并建成涵盖1200余个点位的优质取景资源库。北京市广电局累计协助50余部影视作品在京拍摄,足迹遍布钟鼓楼、国贸、什刹海、大运河等425个标志性点位,累计拉动消费超3.5亿元。","http://ent.people.com.cn/n1/2026/0526/c1012-40727487.html","文体娱乐",142 +"影视剧与城市的双向奔赴","http://ent.people.com.cn/n1/2026/0526/c1012-40727487.html","文体娱乐",141 +"2026年中国网络文明大会","http://gx.people.com.cn/GB/409901/414451/index.html","热点资讯",140 +"第二十二届文博会在深圳举办","http://sz.people.com.cn/GB/203418/414488/index.html","热点资讯",139 +"党的二十届四中全会精神主题宣讲活动在‌乌市举行","http://xj.people.com.cn/n2/2026/0526/c186332-41591607.html","热点资讯",138 +"云南首个新的社会阶层人士服务中心成立","http://yn.people.com.cn/n2/2026/0526/c378439-41591547.html","热点资讯",137 +"贵州将推动政用、商用、民用领域数智化转型","http://gz.people.com.cn/n2/2026/0526/c222152-41591062.html","热点资讯",136 +"安徽省第四届全国城市生活垃圾分类宣传周启动","http://ah.people.com.cn/n2/2026/0526/c227131-41591353.html","热点资讯",135 +"2026中德(欧)隐形冠军论坛在京开幕","http://bj.people.com.cn/n2/2026/0526/c14540-41591693.html","热点资讯",134 +"近期,宁夏回族自治区税务局连续曝光两起加油站偷税案:中卫市迎铁石油销售公司利用第三方支付平台将加油款转入个人账户,隐匿收入682万余元;宁夏瑞科能源加油站通过低价销售少计收入、房产原值核算错误等方式,少缴税费超百万元。宁夏大学经济管理学院副教授陈军梅介绍,由于个人消费者主动索票意识较弱,大量“无票收入”已成为加油站企业账外经营的“灰色空间”,这正是行业偷逃税屡禁不止的深层症结之一。","http://nx.people.com.cn/n2/2026/0526/c192493-41591666.html","热点资讯",133 +"宁夏曝光加油站偷逃税案 专家:“无票收入”已成行业灰色空间","http://nx.people.com.cn/n2/2026/0526/c192493-41591666.html","热点资讯",132 +"武警曲靖支队举行军营开放日活动","http://military.people.com.cn/n1/2026/0526/c1011-40727599.html","热点资讯",131 +"跨国车企在华故事翻开新一页","https://world.people.com.cn/n1/2026/0526/c1002-40727608.html","热点资讯",130 +"阿根廷举行国庆日庆祝活动","https://world.people.com.cn/n1/2026/0526/c1002-40727550.html","热点资讯",129 +"战术训练插上“科技翅膀”","http://military.people.com.cn/n1/2026/0526/c1011-40727596.html","热点资讯",128 +"一个小零件,激发动力源","http://military.people.com.cn/n1/2026/0526/c1011-40727593.html","热点资讯",127 +"入职一年多,三个“没想到”","http://military.people.com.cn/n1/2026/0526/c1011-40727586.html","热点资讯",126 +"一行行批注背后的“兵心密码”","http://military.people.com.cn/n1/2026/0526/c1011-40727576.html","热点资讯",125 +"“家属岗位安,官兵士气高”","http://military.people.com.cn/n1/2026/0526/c1011-40727565.html","热点资讯",124 +"“双四一”——“四个知道、一个跟上,四个报告、一个依靠”,这是写在《军队基层建设纲要》里的内容,也是我军密切内部关系、凝聚军心士气、催生战斗力的重要法宝。它要求带兵人必须做到:知道战士在哪里、在干什么、在想什么、需要什么,思想工作和管理工作及时跟上;同时也要求战士做到:主动报告自己在哪里、在干什么、在想什么、需要什么,遇到困难时懂得依靠组织解决问题。","http://military.people.com.cn/n1/2026/0526/c1011-40727579.html","热点资讯",123 +"“双四一”新传:老传统焕发新活力","http://military.people.com.cn/n1/2026/0526/c1011-40727579.html","热点资讯",122 +"美军称发动“自卫性”空袭 伊媒称目前局势平静","http://world.people.com.cn/n1/2026/0526/c1002-40727515.html","热点资讯",121 +"日本将编制补充预算引发财政压力担忧","http://world.people.com.cn/n1/2026/0526/c1002-40727492.html","热点资讯",120 +"西班牙新增一例汉坦病毒感染病例","http://world.people.com.cn/n1/2026/0526/c1002-40727500.html","热点资讯",119 +"智利北部发生6.9级地震","http://world.people.com.cn/n1/2026/0526/c1002-40727513.html","热点资讯",118 +"俄外长告知美方对乌打击计划","http://world.people.com.cn/n1/2026/0526/c1002-40727499.html","热点资讯",117 +"日本首相高市早苗日前在澳大利亚向无名战士墓下跪献花的照片,被刻意放在首相官邸网站的显眼位置,日方试图给高市营造一个“深刻反省”的人设。显然,高市之举意在效仿勃兰特“华沙之跪”,但战后德日两国对待自身历史态度截然不同,此举无疑是画虎不成反类犬。","http://world.people.com.cn/n1/2026/0526/c1002-40727230.html","热点资讯",116 +"没有真诚反思,“下跪”不过是又一场表演","http://world.people.com.cn/n1/2026/0526/c1002-40727230.html","热点资讯",115 +"普京:俄罗斯是全球经济体系不可分割的一部分","http://world.people.com.cn/n1/2026/0526/c1002-40727476.html","热点资讯",114 +"WHO:本轮埃博拉疫情已死亡的疑似病例达220例","http://world.people.com.cn/n1/2026/0526/c1002-40727444.html","热点资讯",113 +"开放谈:开放合作推动全球汽车产业共赢发展","http://world.people.com.cn/n1/2026/0526/c1002-40727609.html","热点资讯",112 +"“茶和天下·雅集”活动在毛里求斯举办","http://world.people.com.cn/n1/2026/0526/c1002-40727460.html","热点资讯",111 +"外交部谈中国治沙劳模寻找美国友人","http://world.people.com.cn/n1/2026/0526/c1002-40727151.html","热点资讯",110 +"教育是国与国之间连接民心、传承友谊的重要桥梁,是功在当代、利在千秋的崇高事业。日前,中俄两国元首在北京共同出席“中俄教育年”开幕式。近年来,中俄教育合作持续深化、成果丰硕,为培育中俄世代友好的接班人、促进两国民众相知相亲注入更多动能。","http://world.people.com.cn/n1/2026/0526/c1002-40727610.html","热点资讯",109 +"环球热点:播撒中俄友好的种子","http://world.people.com.cn/n1/2026/0526/c1002-40727610.html","热点资讯",108 +"浙江宁波:奇花异草引客来","http://society.people.com.cn/n1/2026/0526/c1008-40727472.html","热点资讯",107 +"甘肃敦煌:夏游莫高窟","http://society.people.com.cn/n1/2026/0526/c1008-40727473.html","热点资讯",106 +"家门口托育 养娃更省心","http://edu.people.com.cn/n1/2026/0526/c1006-40727420.html","热点资讯",105 +"第七届澳门中学生历史知识竞赛举行","http://edu.people.com.cn/n1/2026/0526/c1006-40727380.html","热点资讯",104 +"山东省学生心理健康服务平台发布","http://edu.people.com.cn/n1/2026/0526/c1006-40727416.html","热点资讯",103 +"在这所中学,每个学生都有自己的“成长图谱”","http://edu.people.com.cn/n1/2026/0526/c1006-40727390.html","热点资讯",102 +"“加”出育人新模式,“加”出科创新活力","http://edu.people.com.cn/n1/2026/0526/c1006-40727358.html","热点资讯",101 +"培养能提出“跨学科解决方案”的英才","http://edu.people.com.cn/n1/2026/0526/c1006-40727355.html","热点资讯",100 +"38种新专业,1000多万名高考生,一道课程衔接的必答题。当大学专业目录再次上新,高中的讲台和课表,准备好了吗?从沈阳到北京,从诸城到贵州……一场围绕“课程接棒”的改革正在多所高中悄然推进。记者深入采访,看高中教育如何在分层选课、动手探究、多元评价中,为拔尖创新人才铺就更加宽广、更为畅达的成长之路。","http://edu.people.com.cn/n1/2026/0526/c1006-40727381.html","热点资讯",99 +"高中教育,如何从解题到解决问题","http://edu.people.com.cn/n1/2026/0526/c1006-40727381.html","热点资讯",98 +"天津:为特色农产品提供定制司法服务","http://society.people.com.cn/n1/2026/0526/c1008-40727445.html","热点资讯",97 +"黑龙江:打造成群众触手可及的“掌上法庭”","http://society.people.com.cn/n1/2026/0526/c1008-40727458.html","热点资讯",96 +"贵州:更多投资于人 更好造福民生","http://society.people.com.cn/n1/2026/0526/c1008-40727419.html","热点资讯",95 +"辽宁系统施策稳就业","http://society.people.com.cn/n1/2026/0526/c1008-40727410.html","热点资讯",94 +"本轮降雨局地强度大 多部门合力防汛防灾","http://society.people.com.cn/n1/2026/0526/c1008-40727424.html","热点资讯",93 +"近期,我国多地接连出现强降雨天气。据统计,入汛以来(4月1日至5月22日,下同),全国共有492个气象站点日降水量超历史极值,448个气象站点小时降水量超历史极值,76个国家级气象站点日降水量突破春季历史极值。","http://society.people.com.cn/n1/2026/0526/c1008-40727428.html","热点资讯",92 +"今年暴雨为何来得这么早、这么强","http://society.people.com.cn/n1/2026/0526/c1008-40727428.html","热点资讯",91 +"月壤纤维有望成为月面基建“钢筋”","http://kpzg.people.com.cn/n1/2026/0526/c404214-40727280.html","热点资讯",90 +"高质量发展故事汇","http://finance.people.com.cn/GB/459357/index.html","财经新闻",89 +"我国科学家获国际基础科学大会基础科学奖章","http://kpzg.people.com.cn/n1/2026/0526/c404214-40727281.html","热点资讯",88 +"神舟二十三号航天员乘组顺利进驻“天宫”","http://kpzg.people.com.cn/n1/2026/0526/c404214-40727264.html","热点资讯",87 +"商业航天还需在何处发力","http://finance.people.com.cn/n1/2026/0526/c1004-40727561.html","财经新闻",86 +"AI时代还需要“师傅”吗","http://finance.people.com.cn/n1/2026/0526/c1004-40727560.html","财经新闻",85 +"中国游戏AI竞赛进入“关键赛点”","http://finance.people.com.cn/n1/2026/0526/c1004-40727555.html","财经新闻",84 +"近日,A股市场机器人板块迎来强势行情,与此同时,机器人主题基金的规模与净值均有所增长。数据显示,截至5月25日,近一周(5月19日至5月25日),全市场13只机器人主题ETF规模合计增长26.3亿元,总规模已达574亿元,净值同步呈现走高态势。","http://finance.people.com.cn/n1/2026/0526/c1004-40727552.html","财经新闻",83 +"13只机器人主题ETF总规模达574亿元","http://finance.people.com.cn/n1/2026/0526/c1004-40727552.html","财经新闻",82 +"《给阿嬷的情书》低投入为何赢得高票房","http://finance.people.com.cn/n1/2026/0526/c1004-40727532.html","财经新闻",81 +"中央财政资金精准赋能城市更新","http://finance.people.com.cn/n1/2026/0526/c1004-40727525.html","财经新闻",80 +"小微企业金融服务转向质量优先","http://finance.people.com.cn/n1/2026/0526/c1004-40727549.html","财经新闻",79 +"全球电动汽车销量有望再创新高","http://finance.people.com.cn/n1/2026/0526/c1004-40727546.html","财经新闻",78 +"规范交易秩序 涵养资本市场健康生态","http://finance.people.com.cn/n1/2026/0526/c1004-40727524.html","财经新闻",77 +"银行业理财登记托管中心日前披露的数据显示,一季度末全市场理财产品存续规模31.91万亿元,较2025年末缩水超万亿元。理财、基金规模双降的原因是什么?资金流向了哪里?","http://finance.people.com.cn/n1/2026/0526/c1004-40727542.html","财经新闻",76 +"一季度理财基金规模双降,钱去哪了","http://finance.people.com.cn/n1/2026/0526/c1004-40727542.html","财经新闻",75 +"说法——说案例 讲法律","http://society.people.com.cn/GB/369130/460676/index.html","热点资讯",74 +"读者点题·共同关注","http://leaders.people.com.cn/GB/178291/218130/460024/460025/index.html","热点资讯",73 +"树立和践行正确政绩观","https://cpc.people.com.cn/GB/67481/461783/index.html","时政新闻",72 +"学习贯彻党的二十届四中全会精神","https://cpc.people.com.cn/GB/67481/461139/index.html","时政新闻",71 +"总书记的人民情怀","http://cpc.people.com.cn/GB/67481/457481/index.html","时政新闻",70 +"身边事|设施不完善 雨水漫农田","http://leaders.people.com.cn/n1/2026/0525/c58278-40726412.html","热点资讯",69 +"建议|农家书屋应“适农”","http://leaders.people.com.cn/n1/2026/0525/c58278-40726414.html","热点资讯",68 +"曝光|校园视力检测,竟成商业引流?","http://leaders.people.com.cn/n1/2026/0525/c58278-40726413.html","热点资讯",67 +"来信调查|房屋征收五年未安置,一纸通告让居民搬回去","http://leaders.people.com.cn/n1/2026/0525/c58278-40726410.html","热点资讯",66 +"自行车道多处破损影响通行还易扎破轮胎 北京石景山:已修复","https://liuyan.people.com.cn/threads/content?tid=25340838","热点资讯",65 +"群众反映水压不足影响生活 新疆伊宁协调更换增压泵保障供水","https://liuyan.people.com.cn/threads/content?tid=25333501","热点资讯",64 +"书记有回复|火车噪音扰民 四川达州:封闭铁路沿线减少鸣笛","https://liuyan.people.com.cn/threads/content?tid=25329395","热点资讯",63 +"举办带吉祥物打卡等配套活动 网友为提升“东北超”热度支招","https://liuyan.people.com.cn/threads/content?tid=25357212","热点资讯",62 +"网友建议遗产公证收费设定封顶线且全国统一标准 司法部回应","https://liuyan.people.com.cn/threads/content?tid=24889335","热点资讯",61 +"聚焦网上群众路线","http://leaders.people.com.cn/GB/178291/429112/index.html","热点资讯",60 +"提交民主故事案例","http://leaders.people.com.cn/GB/460365/index.html","热点资讯",59 +"有耐心,好电影会自己长出来——从《给阿嬷的情书》谈起","http://ent.people.com.cn/n1/2026/0526/c1012-40727236.html","文体娱乐",58 +"对话网球名将张帅:只要还能进步,就有上场的理由","http://ent.people.com.cn/n1/2026/0526/c1012-40727221.html","文体娱乐",57 +"英国学者:中国将成为世界现代性的象征","http://world.people.com.cn/n1/2026/0526/c1002-40727231.html","热点资讯",56 +"中国文化IP在欧洲“热”起来","https://world.people.com.cn/n1/2026/0526/c1002-40727227.html","热点资讯",55 +"人民建议|一条建言让“海上游大连”讲解服务全面升级","http://ln.people.com.cn/n2/2026/0526/c378489-41591459.html","热点资讯",54 +"一句承诺,南通用二十年来坚守","http://js.people.com.cn/n2/2026/0526/c358232-41590953.html","热点资讯",53 +"青海民和:冷凉蔬菜“丰”景正好","http://qh.people.com.cn/n2/2026/0526/c378418-41591212.html","热点资讯",52 +"3万亿元台阶,贵州咋迈?","http://gz.people.com.cn/n2/2026/0526/c222152-41591033.html","热点资讯",51 +"“十五五”·瞰齐鲁丨数智谱新篇 泉城向新行","http://sd.people.com.cn/n2/2026/0526/c364532-41591147.html","热点资讯",50 +"531种!北京如何成为“鸟类宜居家园”?","http://env.people.com.cn/n1/2026/0526/c1010-40727216.html","热点资讯",49 +"2026媒体融合发展论坛在武汉举行","http://media.people.com.cn/n1/2026/0526/c14677-40727696.html","热点资讯",48 +"评论丨规范涉企执法 必须常抓不懈","http://finance.people.com.cn/n1/2026/0526/c1004-40727198.html","财经新闻",47 +"两部门再次紧急预拨1.6亿元中央自然灾害救灾资金","http://society.people.com.cn/n1/2026/0526/c1008-40727730.html","热点资讯",46 +"将丰收确定性牢牢攥在手中","http://society.people.com.cn/n1/2026/0526/c1008-40727163.html","热点资讯",45 +"抢收窗口期必须利用好","http://society.people.com.cn/n1/2026/0526/c1008-40727161.html","热点资讯",44 +"直播间摆拍“绑架”吸粉被查处 公安部通报网络谣言典型案例","http://society.people.com.cn/n1/2026/0526/c1008-40727297.html","热点资讯",43 +"5部门出台暂行规定 更好保障超龄劳动者基本权益","http://society.people.com.cn/n1/2026/0526/c1008-40727193.html","热点资讯",42 +"第二十二届文博会展出各类文化精品超12万件","http://ent.people.com.cn/n1/2026/0526/c1012-40727166.html","文体娱乐",41 +"外交部:日方言行不一无法化解国际社会对日本“再军事化”的担忧","http://world.people.com.cn/n1/2026/0526/c1002-40727791.html","热点资讯",40 +"甘肃发布一批干部任前公示","http://renshi.people.com.cn/n1/2026/0526/c139617-40727457.html","热点资讯",39 +"土地革命时期党校如何培养干部","https://cpc.people.com.cn/n1/2026/0526/c443712-40727329.html","时政新闻",38 +"江苏淮安:疏通高标准农田管护堵点","http://fanfu.people.com.cn/n1/2026/0526/c64371-40727308.html","热点资讯",37 +"提升基层自主抓建内生动力","http://dangjian.people.com.cn/n1/2026/0526/c117092-40727482.html","热点资讯",36 +"在发展新质生产力上走在前列","http://theory.people.com.cn/n1/2026/0526/c40531-40727338.html","热点资讯",35 +"高质量发展故事汇","http://finance.people.com.cn/GB/459357/459634/462127/index.html","财经新闻",34 +"广西田林:葡萄管护忙 增收有盼头","http://gx.people.com.cn/n2/2026/0526/c179464-41591441.html","热点资讯",33 +"河南南阳粮食主产区争分夺秒抢收小麦","http://henan.people.com.cn/n2/2026/0526/c351638-41591132.html","热点资讯",32 +"习近平会见巴基斯坦总理夏巴兹","http://pic.people.com.cn/n1/2026/0525/c426981-40727084.html","热点资讯",31 +"习近平向塞尔维亚总统武契奇颁授“友谊勋章”","http://pic.people.com.cn/n1/2026/0525/c426981-40727127.html","热点资讯",30 +"习近平同塞尔维亚总统武契奇举行会谈","http://pic.people.com.cn/n1/2026/0525/c426981-40727089.html","热点资讯",29 +"进一步全面深化改革面临的形势与任务","http://finance.people.com.cn/n1/2025/1229/c1004-40634090.html","财经新闻",28 +"智利北部发生6.9级地震","http://world.people.com.cn/n1/2026/0526/c1002-40727242.html","热点资讯",27 +"美军称发动""自卫性""空袭 伊媒称目前局势平静","http://world.people.com.cn/n1/2026/0526/c1002-40727515.html","热点资讯",26 +"超过33℃ 英国打破""最热五月天""纪录","http://world.people.com.cn/n1/2026/0526/c1002-40727746.html","热点资讯",25 +"日本对外净资产规模降为全球第三","http://world.people.com.cn/n1/2026/0526/c1002-40727745.html","热点资讯",24 +"体育用品卖爆了!运动消费热度节节攀升","http://finance.people.com.cn/n1/2026/0526/c1004-40727619.html","财经新闻",23 +"从""在中国生产""到""在中国创造"" 外企加码投资中国","http://finance.people.com.cn/n1/2026/0526/c1004-40727409.html","财经新闻",22 +"""凉资源""变""热动力"" 清凉经济释放消费潜力","http://finance.people.com.cn/n1/2026/0526/c1004-40727570.html","财经新闻",21 +"别让实干者流汗又流泪","http://opinion.people.com.cn/n1/2026/0526/c436867-40727769.html","热点资讯",20 +"知音殿焕新点亮长江夜","http://hb.people.com.cn/n2/2026/0526/c192237-41590960.html","热点资讯",19 +"一块薄玻璃卷厚新产业","http://ah.people.com.cn/n2/2026/0526/c227131-41591618.html","热点资讯",18 +"担责是道""必答题""","http://society.people.com.cn/n1/2026/0526/c1008-40727233.html","热点资讯",17 +"提高膳食营养水平 强化高原健康保障","http://society.people.com.cn/n1/2026/0526/c1008-40727194.html","热点资讯",16 +"在理念相通中把合作之路走得更宽更实","http://world.people.com.cn/n1/2026/0526/c1002-40727149.html","热点资讯",15 +"习近平会见巴基斯坦总理夏巴兹","http://politics.people.com.cn/n1/2026/0526/c1024-40727145.html","时政新闻",14 +"新时代共产党人干事业、创政绩的科学指南","http://opinion.people.com.cn/n1/2026/0520/c461530-40723237.html","热点资讯",13 +"政绩观问题是一个根本性问题","http://opinion.people.com.cn/n1/2026/0521/c461530-40724143.html","热点资讯",12 +"树立和践行正确政绩观,起决定性作用的是党性","http://opinion.people.com.cn/n1/2026/0522/c461530-40724927.html","热点资讯",11 +"为民造福是最大政绩","http://opinion.people.com.cn/n1/2026/0525/c461530-40726302.html","热点资讯",10 +"坚持高质量发展要成为领导干部政绩观的重要内容","http://opinion.people.com.cn/n1/2026/0526/c461530-40727192.html","热点资讯",9 +"人民网智慧党建体验中心","http://capital.people.com.cn/GB/440975/index.html","热点资讯",7 +"中国共产党新闻网","http://cpc.people.com.cn/","时政新闻",6 +"“人民消费”中消协教育课堂","http://finance.people.com.cn/GB/8215/452688/index.html","财经新闻",5 +"文化企业社会责任报告发布平台","http://ent.people.com.cn/GB/436846/441076/index.html","文体娱乐",4 +"灵境·人民艺术馆","http://art.people.com.cn/","热点资讯",3 +"828企业服务平台","http://828.people.com.cn/","热点资讯",2 +"跟着总书记学党史","http://cpc.people.com.cn/GB/67481/444924/index.html","时政新闻",1 diff --git a/project/爬虫3/pom.xml b/project/爬虫3/pom.xml new file mode 100644 index 0000000..c0f7e55 --- /dev/null +++ b/project/爬虫3/pom.xml @@ -0,0 +1,95 @@ + + + 4.0.0 + + com.example + people-crawler + 1.0.0 + jar + + People Crawler + 人民网新闻爬虫 + + + 11 + 11 + UTF-8 + + + + + org.jsoup + jsoup + 1.17.2 + + + org.slf4j + slf4j-api + 2.0.9 + + + org.slf4j + slf4j-simple + 2.0.9 + runtime + + + + + src/main/java + + + src/main/resources + + + + + org.apache.maven.plugins + maven-compiler-plugin + 3.11.0 + + 11 + 11 + + + + org.codehaus.mojo + exec-maven-plugin + 3.1.0 + + + + java + + + + + SimpleCrawler + ${project.basedir} + + + + org.apache.maven.plugins + maven-shade-plugin + 3.5.1 + + + package + + shade + + + + + com.example.crawler.cli.CrawlerCLI + + + + + + + + + \ No newline at end of file diff --git a/project/爬虫3/src/main/java/com/example/crawler/cli/CrawlerCLI.java b/project/爬虫3/src/main/java/com/example/crawler/cli/CrawlerCLI.java new file mode 100644 index 0000000..907bf3f --- /dev/null +++ b/project/爬虫3/src/main/java/com/example/crawler/cli/CrawlerCLI.java @@ -0,0 +1,120 @@ +package com.example.crawler.cli; + +import com.example.crawler.controller.CrawlerController; +import com.example.crawler.exception.CrawlerException; +import com.example.crawler.strategy.StrategyFactory; +import com.example.crawler.view.ConsoleView; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class CrawlerCLI { + + private static final Logger logger = LoggerFactory.getLogger(CrawlerCLI.class); + + private String strategy = "all"; + private int limit = 500; + private String outputFile = null; + private boolean display = false; + + public static void main(String[] args) { + CrawlerCLI cli = new CrawlerCLI(); + cli.run(args); + } + + public void run(String[] args) { + ConsoleView view = new ConsoleView(); + + try { + parseArgs(args); + + if (args.length == 0 || hasHelpOption(args)) { + view.displayWelcome(); + view.displayHelp(); + return; + } + + view.displayWelcome(); + + CrawlerController controller = new CrawlerController(view); + + controller.crawl(strategy); + + if (display) { + controller.display(limit); + } + + if (outputFile != null) { + controller.export(outputFile, limit); + } + + logger.info("程序正常结束"); + + } catch (CrawlerException e) { + logger.error("爬虫执行失败: {}", e.getMessage(), e); + view.displayError("执行失败: " + e.getMessage()); + System.exit(1); + } catch (Exception e) { + logger.error("未知错误: {}", e.getMessage(), e); + view.displayError("未知错误: " + e.getMessage()); + System.exit(1); + } + } + + private void parseArgs(String[] args) throws CrawlerException { + for (int i = 0; i < args.length; i++) { + switch (args[i]) { + case "-h": + case "--help": + break; + case "-s": + case "--strategy": + if (i + 1 >= args.length) { + throw new CrawlerException(null, "缺少策略参数值"); + } + String strategyArg = args[++i]; + if (!StrategyFactory.hasStrategy(strategyArg)) { + throw new CrawlerException(null, "无效的策略: " + strategyArg + + ",可用策略: politics, hot, all"); + } + this.strategy = strategyArg; + break; + case "-l": + case "--limit": + if (i + 1 >= args.length) { + throw new CrawlerException(null, "缺少限制数量参数值"); + } + try { + this.limit = Integer.parseInt(args[++i]); + if (this.limit <= 0) { + throw new CrawlerException(null, "限制数量必须大于0"); + } + } catch (NumberFormatException e) { + throw new CrawlerException(null, "无效的限制数量: " + args[i]); + } + break; + case "-o": + case "--output": + if (i + 1 >= args.length) { + throw new CrawlerException(null, "缺少输出文件路径"); + } + this.outputFile = args[++i]; + break; + case "-d": + case "--display": + this.display = true; + break; + default: + throw new CrawlerException(null, "未知选项: " + args[i]); + } + } + } + + private boolean hasHelpOption(String[] args) { + for (String arg : args) { + if ("-h".equals(arg) || "--help".equals(arg)) { + return true; + } + } + return false; + } +} \ No newline at end of file diff --git a/project/爬虫3/src/main/java/com/example/crawler/command/Command.java b/project/爬虫3/src/main/java/com/example/crawler/command/Command.java new file mode 100644 index 0000000..1657511 --- /dev/null +++ b/project/爬虫3/src/main/java/com/example/crawler/command/Command.java @@ -0,0 +1,12 @@ +package com.example.crawler.command; + +import com.example.crawler.exception.CrawlerException; + +public interface Command { + + void execute() throws CrawlerException; + + String getName(); + + String getDescription(); +} \ No newline at end of file diff --git a/project/爬虫3/src/main/java/com/example/crawler/command/CommandFactory.java b/project/爬虫3/src/main/java/com/example/crawler/command/CommandFactory.java new file mode 100644 index 0000000..e39d2f0 --- /dev/null +++ b/project/爬虫3/src/main/java/com/example/crawler/command/CommandFactory.java @@ -0,0 +1,18 @@ +package com.example.crawler.command; + +import com.example.crawler.model.NewsList; + +public class CommandFactory { + + public static Command createCrawlCommand(String strategyName) { + return new CrawlCommand(strategyName); + } + + public static Command createExportCommand(NewsList newsList, String filePath, int limit) { + return new ExportCommand(newsList, filePath, limit); + } + + public static Command createDisplayCommand(NewsList newsList, int limit) { + return new DisplayCommand(newsList, limit); + } +} \ No newline at end of file diff --git a/project/爬虫3/src/main/java/com/example/crawler/command/CrawlCommand.java b/project/爬虫3/src/main/java/com/example/crawler/command/CrawlCommand.java new file mode 100644 index 0000000..4df064f --- /dev/null +++ b/project/爬虫3/src/main/java/com/example/crawler/command/CrawlCommand.java @@ -0,0 +1,41 @@ +package com.example.crawler.command; + +import com.example.crawler.exception.CrawlerException; +import com.example.crawler.model.NewsList; +import com.example.crawler.strategy.CrawlStrategy; +import com.example.crawler.strategy.StrategyFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class CrawlCommand implements Command, CrawlResult { + + private static final Logger logger = LoggerFactory.getLogger(CrawlCommand.class); + private final String strategyName; + private NewsList result; + + public CrawlCommand(String strategyName) { + this.strategyName = strategyName; + } + + @Override + public void execute() throws CrawlerException { + logger.info("执行爬取命令,策略: {}", strategyName); + CrawlStrategy strategy = StrategyFactory.getStrategy(strategyName); + result = strategy.crawl(); + logger.info("爬取命令执行完成,获取 {} 条新闻", result.size()); + } + + @Override + public String getName() { + return "crawl"; + } + + @Override + public String getDescription() { + return "爬取人民网新闻"; + } + + public NewsList getResult() { + return result; + } +} \ No newline at end of file diff --git a/project/爬虫3/src/main/java/com/example/crawler/command/CrawlResult.java b/project/爬虫3/src/main/java/com/example/crawler/command/CrawlResult.java new file mode 100644 index 0000000..15f44f3 --- /dev/null +++ b/project/爬虫3/src/main/java/com/example/crawler/command/CrawlResult.java @@ -0,0 +1,7 @@ +package com.example.crawler.command; + +import com.example.crawler.model.NewsList; + +public interface CrawlResult { + NewsList getResult(); +} \ No newline at end of file diff --git a/project/爬虫3/src/main/java/com/example/crawler/command/DisplayCommand.java b/project/爬虫3/src/main/java/com/example/crawler/command/DisplayCommand.java new file mode 100644 index 0000000..4ee4116 --- /dev/null +++ b/project/爬虫3/src/main/java/com/example/crawler/command/DisplayCommand.java @@ -0,0 +1,65 @@ +package com.example.crawler.command; + +import com.example.crawler.exception.CrawlerException; +import com.example.crawler.model.NewsItem; +import com.example.crawler.model.NewsList; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.List; + +public class DisplayCommand implements Command { + + private static final Logger logger = LoggerFactory.getLogger(DisplayCommand.class); + private final NewsList newsList; + private final int limit; + + public DisplayCommand(NewsList newsList, int limit) { + this.newsList = newsList; + this.limit = limit; + } + + @Override + public void execute() throws CrawlerException { + logger.info("执行显示命令,限制: {}", limit); + + if (newsList == null || newsList.isEmpty()) { + System.out.println("\n没有找到新闻数据,请先执行爬取命令。"); + return; + } + + List topNews = newsList.getTopHot(limit); + + System.out.println("\n".repeat(2)); + System.out.println("=".repeat(100)); + System.out.printf("人民网新闻 - 共 %d 条 (显示前 %d 条)%n", newsList.size(), topNews.size()); + System.out.println("=".repeat(100)); + + int index = 1; + String currentCategory = ""; + + for (NewsItem item : topNews) { + if (!item.getCategory().equals(currentCategory)) { + currentCategory = item.getCategory(); + System.out.println("\n【" + currentCategory + "】"); + System.out.println("-".repeat(100)); + index = 1; + } + + System.out.printf("%3d. [%2d] %s%n", index++, item.getHotRank(), item.getTitle()); + System.out.println(" " + item.getUrl()); + } + + System.out.println("=".repeat(100)); + } + + @Override + public String getName() { + return "display"; + } + + @Override + public String getDescription() { + return "显示爬取的新闻列表"; + } +} \ No newline at end of file diff --git a/project/爬虫3/src/main/java/com/example/crawler/command/ExportCommand.java b/project/爬虫3/src/main/java/com/example/crawler/command/ExportCommand.java new file mode 100644 index 0000000..c19c396 --- /dev/null +++ b/project/爬虫3/src/main/java/com/example/crawler/command/ExportCommand.java @@ -0,0 +1,70 @@ +package com.example.crawler.command; + +import com.example.crawler.exception.CrawlerException; +import com.example.crawler.exception.FileIOException; +import com.example.crawler.model.NewsItem; +import com.example.crawler.model.NewsList; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.BufferedWriter; +import java.io.FileWriter; +import java.io.IOException; +import java.time.LocalDateTime; +import java.time.format.DateTimeFormatter; +import java.util.List; + +public class ExportCommand implements Command { + + private static final Logger logger = LoggerFactory.getLogger(ExportCommand.class); + private final NewsList newsList; + private final String filePath; + private final int limit; + + public ExportCommand(NewsList newsList, String filePath, int limit) { + this.newsList = newsList; + this.filePath = filePath; + this.limit = limit; + } + + @Override + public void execute() throws CrawlerException { + logger.info("执行导出命令,导出到: {}, 限制: {}", filePath, limit); + + if (newsList == null || newsList.isEmpty()) { + throw new FileIOException("没有可导出的新闻数据"); + } + + List topNews = newsList.getTopHot(limit); + + try (BufferedWriter writer = new BufferedWriter(new FileWriter(filePath))) { + writer.write("标题,链接,分类,热度排名"); + writer.newLine(); + + for (NewsItem item : topNews) { + writer.write(item.toCSVLine()); + writer.newLine(); + } + + logger.info("导出完成,共导出 {} 条新闻到 {}", topNews.size(), filePath); + + } catch (IOException e) { + throw new FileIOException("写入文件失败: " + filePath, e); + } + } + + @Override + public String getName() { + return "export"; + } + + @Override + public String getDescription() { + return "导出新闻数据到CSV文件"; + } + + public static String generateDefaultFileName() { + String timestamp = LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyyMMdd_HHmmss")); + return "people_news_" + timestamp + ".csv"; + } +} \ No newline at end of file diff --git a/project/爬虫3/src/main/java/com/example/crawler/controller/CrawlerController.java b/project/爬虫3/src/main/java/com/example/crawler/controller/CrawlerController.java new file mode 100644 index 0000000..5dfdc5b --- /dev/null +++ b/project/爬虫3/src/main/java/com/example/crawler/controller/CrawlerController.java @@ -0,0 +1,65 @@ +package com.example.crawler.controller; + +import com.example.crawler.command.Command; +import com.example.crawler.command.CommandFactory; +import com.example.crawler.command.CrawlResult; +import com.example.crawler.exception.CrawlerException; +import com.example.crawler.model.NewsList; +import com.example.crawler.view.ConsoleView; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class CrawlerController { + + private static final Logger logger = LoggerFactory.getLogger(CrawlerController.class); + private final ConsoleView view; + private NewsList newsList; + + public CrawlerController(ConsoleView view) { + this.view = view; + } + + public void crawl(String strategyName) throws CrawlerException { + logger.info("Controller: 执行爬取,策略: {}", strategyName); + view.displayProgress("正在爬取新闻..."); + + Command crawlCommand = CommandFactory.createCrawlCommand(strategyName); + crawlCommand.execute(); + + CrawlResult result = (CrawlResult) crawlCommand; + newsList = result.getResult(); + + view.displayProgress(""); + view.displayInfo("爬取完成,共获取 " + newsList.size() + " 条新闻"); + } + + public void display(int limit) throws CrawlerException { + logger.info("Controller: 显示新闻,限制: {}", limit); + + if (newsList == null) { + throw new CrawlerException(null, "没有新闻数据,请先执行爬取"); + } + + view.displayNews(newsList, limit); + } + + public void export(String filePath, int limit) throws CrawlerException { + logger.info("Controller: 导出新闻,路径: {}, 限制: {}", filePath, limit); + + if (newsList == null) { + throw new CrawlerException(null, "没有新闻数据,请先执行爬取"); + } + + view.displayProgress("正在导出CSV文件..."); + + Command exportCommand = CommandFactory.createExportCommand(newsList, filePath, limit); + exportCommand.execute(); + + view.displayProgress(""); + view.displayExportSuccess(filePath, Math.min(newsList.size(), limit)); + } + + public NewsList getNewsList() { + return newsList; + } +} \ No newline at end of file diff --git a/project/爬虫3/src/main/java/com/example/crawler/exception/CommandException.java b/project/爬虫3/src/main/java/com/example/crawler/exception/CommandException.java new file mode 100644 index 0000000..5e089de --- /dev/null +++ b/project/爬虫3/src/main/java/com/example/crawler/exception/CommandException.java @@ -0,0 +1,20 @@ +package com.example.crawler.exception; + +public class CommandException extends CrawlerException { + + public CommandException() { + super(ErrorCode.INVALID_COMMAND); + } + + public CommandException(Throwable cause) { + super(ErrorCode.INVALID_COMMAND, cause); + } + + public CommandException(String message) { + super(ErrorCode.INVALID_COMMAND, message); + } + + public CommandException(String message, Throwable cause) { + super(ErrorCode.INVALID_COMMAND, message, cause); + } +} \ No newline at end of file diff --git a/project/爬虫3/src/main/java/com/example/crawler/exception/CrawlerException.java b/project/爬虫3/src/main/java/com/example/crawler/exception/CrawlerException.java new file mode 100644 index 0000000..d97c132 --- /dev/null +++ b/project/爬虫3/src/main/java/com/example/crawler/exception/CrawlerException.java @@ -0,0 +1,30 @@ +package com.example.crawler.exception; + +public class CrawlerException extends Exception { + + private final ErrorCode errorCode; + + public CrawlerException(ErrorCode errorCode) { + super(errorCode.getMessage()); + this.errorCode = errorCode; + } + + public CrawlerException(ErrorCode errorCode, Throwable cause) { + super(errorCode.getMessage(), cause); + this.errorCode = errorCode; + } + + public CrawlerException(ErrorCode errorCode, String message) { + super(message); + this.errorCode = errorCode; + } + + public CrawlerException(ErrorCode errorCode, String message, Throwable cause) { + super(message, cause); + this.errorCode = errorCode; + } + + public ErrorCode getErrorCode() { + return errorCode; + } +} \ No newline at end of file diff --git a/project/爬虫3/src/main/java/com/example/crawler/exception/ErrorCode.java b/project/爬虫3/src/main/java/com/example/crawler/exception/ErrorCode.java new file mode 100644 index 0000000..3c74e88 --- /dev/null +++ b/project/爬虫3/src/main/java/com/example/crawler/exception/ErrorCode.java @@ -0,0 +1,32 @@ +package com.example.crawler.exception; + +public enum ErrorCode { + + SUCCESS("0000", "操作成功"), + UNKNOWN_ERROR("9999", "未知错误"), + NETWORK_ERROR("1001", "网络连接失败"), + TIMEOUT_ERROR("1002", "请求超时"), + PARSE_ERROR("2001", "页面解析失败"), + VALIDATION_ERROR("2002", "数据验证失败"), + FILE_IO_ERROR("3001", "文件操作失败"), + INVALID_COMMAND("4001", "无效命令"), + INVALID_ARGUMENT("4002", "参数无效"), + CRAWLER_INIT_ERROR("5001", "爬虫初始化失败"), + RATE_LIMIT_ERROR("5002", "请求被限流"); + + private final String code; + private final String message; + + ErrorCode(String code, String message) { + this.code = code; + this.message = message; + } + + public String getCode() { + return code; + } + + public String getMessage() { + return message; + } +} \ No newline at end of file diff --git a/project/爬虫3/src/main/java/com/example/crawler/exception/FileIOException.java b/project/爬虫3/src/main/java/com/example/crawler/exception/FileIOException.java new file mode 100644 index 0000000..85badc3 --- /dev/null +++ b/project/爬虫3/src/main/java/com/example/crawler/exception/FileIOException.java @@ -0,0 +1,20 @@ +package com.example.crawler.exception; + +public class FileIOException extends CrawlerException { + + public FileIOException() { + super(ErrorCode.FILE_IO_ERROR); + } + + public FileIOException(Throwable cause) { + super(ErrorCode.FILE_IO_ERROR, cause); + } + + public FileIOException(String message) { + super(ErrorCode.FILE_IO_ERROR, message); + } + + public FileIOException(String message, Throwable cause) { + super(ErrorCode.FILE_IO_ERROR, message, cause); + } +} \ No newline at end of file diff --git a/project/爬虫3/src/main/java/com/example/crawler/exception/NetworkException.java b/project/爬虫3/src/main/java/com/example/crawler/exception/NetworkException.java new file mode 100644 index 0000000..43c73c6 --- /dev/null +++ b/project/爬虫3/src/main/java/com/example/crawler/exception/NetworkException.java @@ -0,0 +1,20 @@ +package com.example.crawler.exception; + +public class NetworkException extends CrawlerException { + + public NetworkException() { + super(ErrorCode.NETWORK_ERROR); + } + + public NetworkException(Throwable cause) { + super(ErrorCode.NETWORK_ERROR, cause); + } + + public NetworkException(String message) { + super(ErrorCode.NETWORK_ERROR, message); + } + + public NetworkException(String message, Throwable cause) { + super(ErrorCode.NETWORK_ERROR, message, cause); + } +} \ No newline at end of file diff --git a/project/爬虫3/src/main/java/com/example/crawler/exception/ParseException.java b/project/爬虫3/src/main/java/com/example/crawler/exception/ParseException.java new file mode 100644 index 0000000..0d288cb --- /dev/null +++ b/project/爬虫3/src/main/java/com/example/crawler/exception/ParseException.java @@ -0,0 +1,20 @@ +package com.example.crawler.exception; + +public class ParseException extends CrawlerException { + + public ParseException() { + super(ErrorCode.PARSE_ERROR); + } + + public ParseException(Throwable cause) { + super(ErrorCode.PARSE_ERROR, cause); + } + + public ParseException(String message) { + super(ErrorCode.PARSE_ERROR, message); + } + + public ParseException(String message, Throwable cause) { + super(ErrorCode.PARSE_ERROR, message, cause); + } +} \ No newline at end of file diff --git a/project/爬虫3/src/main/java/com/example/crawler/model/NewsItem.java b/project/爬虫3/src/main/java/com/example/crawler/model/NewsItem.java new file mode 100644 index 0000000..a3a25c9 --- /dev/null +++ b/project/爬虫3/src/main/java/com/example/crawler/model/NewsItem.java @@ -0,0 +1,103 @@ +package com.example.crawler.model; + +import java.time.LocalDateTime; +import java.util.Objects; + +public class NewsItem implements Comparable { + + private String title; + private String url; + private String category; + private LocalDateTime crawlTime; + private int hotRank; + + public NewsItem() { + this.crawlTime = LocalDateTime.now(); + } + + public NewsItem(String title, String url, String category) { + this.title = title; + this.url = url; + this.category = category; + this.crawlTime = LocalDateTime.now(); + this.hotRank = 0; + } + + public NewsItem(String title, String url, String category, int hotRank) { + this.title = title; + this.url = url; + this.category = category; + this.crawlTime = LocalDateTime.now(); + this.hotRank = hotRank; + } + + public String getTitle() { + return title; + } + + public void setTitle(String title) { + this.title = title; + } + + public String getUrl() { + return url; + } + + public void setUrl(String url) { + this.url = url; + } + + public String getCategory() { + return category; + } + + public void setCategory(String category) { + this.category = category; + } + + public LocalDateTime getCrawlTime() { + return crawlTime; + } + + public void setCrawlTime(LocalDateTime crawlTime) { + this.crawlTime = crawlTime; + } + + public int getHotRank() { + return hotRank; + } + + public void setHotRank(int hotRank) { + this.hotRank = hotRank; + } + + @Override + public int compareTo(NewsItem other) { + return Integer.compare(this.hotRank, other.hotRank); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + NewsItem newsItem = (NewsItem) o; + return Objects.equals(title, newsItem.title) && Objects.equals(url, newsItem.url); + } + + @Override + public int hashCode() { + return Objects.hash(title, url); + } + + @Override + public String toString() { + return String.format("[%s] [%d] %s - %s", category, hotRank, title, url); + } + + public String toCSVLine() { + String escapedTitle = title != null ? title.replace("\"", "\"\"") : ""; + String escapedUrl = url != null ? url.replace("\"", "\"\"") : ""; + String escapedCategory = category != null ? category.replace("\"", "\"\"") : ""; + return String.format("\"%s\",\"%s\",\"%s\",\"%d\"", escapedTitle, escapedUrl, escapedCategory, hotRank); + } +} \ No newline at end of file diff --git a/project/爬虫3/src/main/java/com/example/crawler/model/NewsList.java b/project/爬虫3/src/main/java/com/example/crawler/model/NewsList.java new file mode 100644 index 0000000..1da9f2d --- /dev/null +++ b/project/爬虫3/src/main/java/com/example/crawler/model/NewsList.java @@ -0,0 +1,66 @@ +package com.example.crawler.model; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.stream.Collectors; + +public class NewsList { + + private List items; + + public NewsList() { + this.items = new ArrayList<>(); + } + + public NewsList(List items) { + this.items = new ArrayList<>(items); + } + + public void add(NewsItem item) { + if (item != null && !items.contains(item)) { + items.add(item); + } + } + + public void addAll(List items) { + if (items != null) { + for (NewsItem item : items) { + add(item); + } + } + } + + public List getAll() { + return Collections.unmodifiableList(items); + } + + public List getByCategory(String category) { + return items.stream() + .filter(item -> category.equals(item.getCategory())) + .collect(Collectors.toList()); + } + + public List getTopHot(int limit) { + return items.stream() + .sorted((a, b) -> Integer.compare(b.getHotRank(), a.getHotRank())) + .limit(limit) + .collect(Collectors.toList()); + } + + public int size() { + return items.size(); + } + + public boolean isEmpty() { + return items.isEmpty(); + } + + public void clear() { + items.clear(); + } + + public void sortByHotRank() { + Collections.sort(items, (a, b) -> Integer.compare(b.getHotRank(), a.getHotRank())); + } +} \ No newline at end of file diff --git a/project/爬虫3/src/main/java/com/example/crawler/strategy/CompositeCrawlStrategy.java b/project/爬虫3/src/main/java/com/example/crawler/strategy/CompositeCrawlStrategy.java new file mode 100644 index 0000000..170bc60 --- /dev/null +++ b/project/爬虫3/src/main/java/com/example/crawler/strategy/CompositeCrawlStrategy.java @@ -0,0 +1,38 @@ +package com.example.crawler.strategy; + +import com.example.crawler.exception.CrawlerException; +import com.example.crawler.model.NewsList; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class CompositeCrawlStrategy implements CrawlStrategy { + + private static final Logger logger = LoggerFactory.getLogger(CompositeCrawlStrategy.class); + + @Override + public NewsList crawl() throws CrawlerException { + NewsList combinedList = new NewsList(); + + logger.info("开始组合爬取所有新闻"); + + CrawlStrategy politicsStrategy = new PoliticsCrawlStrategy(); + CrawlStrategy hotStrategy = new HotNewsCrawlStrategy(); + + combinedList.addAll(politicsStrategy.crawl().getAll()); + combinedList.addAll(hotStrategy.crawl().getAll()); + + logger.info("组合爬取完成,共获取 {} 条新闻", combinedList.size()); + + return combinedList; + } + + @Override + public String getStrategyName() { + return "all"; + } + + @Override + public String getCategory() { + return "综合新闻"; + } +} \ No newline at end of file diff --git a/project/爬虫3/src/main/java/com/example/crawler/strategy/CrawlStrategy.java b/project/爬虫3/src/main/java/com/example/crawler/strategy/CrawlStrategy.java new file mode 100644 index 0000000..0f818a6 --- /dev/null +++ b/project/爬虫3/src/main/java/com/example/crawler/strategy/CrawlStrategy.java @@ -0,0 +1,13 @@ +package com.example.crawler.strategy; + +import com.example.crawler.exception.CrawlerException; +import com.example.crawler.model.NewsList; + +public interface CrawlStrategy { + + NewsList crawl() throws CrawlerException; + + String getStrategyName(); + + String getCategory(); +} \ No newline at end of file diff --git a/project/爬虫3/src/main/java/com/example/crawler/strategy/HotNewsCrawlStrategy.java b/project/爬虫3/src/main/java/com/example/crawler/strategy/HotNewsCrawlStrategy.java new file mode 100644 index 0000000..5dc7a1b --- /dev/null +++ b/project/爬虫3/src/main/java/com/example/crawler/strategy/HotNewsCrawlStrategy.java @@ -0,0 +1,104 @@ +package com.example.crawler.strategy; + +import com.example.crawler.exception.CrawlerException; +import com.example.crawler.exception.NetworkException; +import com.example.crawler.exception.ParseException; +import com.example.crawler.model.NewsItem; +import com.example.crawler.model.NewsList; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.HashSet; +import java.util.Set; + +public class HotNewsCrawlStrategy implements CrawlStrategy { + + private static final Logger logger = LoggerFactory.getLogger(HotNewsCrawlStrategy.class); + private static final String BASE_URL = "https://www.people.com.cn"; + private static final String CATEGORY = "热点资讯"; + + @Override + public NewsList crawl() throws CrawlerException { + NewsList newsList = new NewsList(); + logger.info("开始爬取热点资讯: {}", BASE_URL); + + try { + Document doc = Jsoup.connect(BASE_URL) + .userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36") + .timeout(30000) + .get(); + + int rank = 1; + Set seen = new HashSet<>(); + + Elements items = doc.select("a"); + for (Element item : items) { + String title = item.text().trim(); + String url = item.attr("abs:href"); + + String key = title + "|" + url; + if (!seen.contains(key) && isValidNews(title, url) && !isPoliticsNews(url)) { + seen.add(key); + newsList.add(new NewsItem(title, url, CATEGORY, rank++)); + } + } + + logger.info("热点资讯爬取完成,共获取 {} 条新闻", newsList.size()); + + } catch (java.net.SocketTimeoutException e) { + throw new NetworkException("请求超时", e); + } catch (IOException e) { + throw new NetworkException("网络请求失败", e); + } catch (Exception e) { + throw new ParseException("页面解析失败", e); + } + + return newsList; + } + + private boolean isPoliticsNews(String url) { + return url.contains("/politics.") || url.contains("/cpc.") || url.contains("politics") || url.contains("cpc"); + } + + private boolean isValidNews(String title, String url) { + if (title == null || title.isEmpty() || title.length() < 8) { + return false; + } + if (url == null || url.isEmpty() || !url.startsWith("http")) { + return false; + } + if (!url.contains("people.com.cn")) { + return false; + } + String[] invalidKeywords = {"图片", "视频", "广告", "关于我们", "联系我们", "隐私政策", "免责声明", "网站地图", + "京ICP证", "许可证", "下载客户端", "人民日报社概况", "地方频道", "信息网络传播", "广播电视节目", + "增值电信业务", "互联网新闻信息", "网络文化经营", "服务条款", "意见反馈", "设为首页", "加入收藏", "站内搜索"}; + for (String keyword : invalidKeywords) { + if (title.contains(keyword)) { + return false; + } + } + String[] invalidPaths = {"/img/", "/GB/50142/", "/GB/1018/", "/GB/422044/", "/GB/408835/"}; + for (String path : invalidPaths) { + if (url.contains(path)) { + return false; + } + } + return true; + } + + @Override + public String getStrategyName() { + return "hot"; + } + + @Override + public String getCategory() { + return CATEGORY; + } +} \ No newline at end of file diff --git a/project/爬虫3/src/main/java/com/example/crawler/strategy/PoliticsCrawlStrategy.java b/project/爬虫3/src/main/java/com/example/crawler/strategy/PoliticsCrawlStrategy.java new file mode 100644 index 0000000..c0d8a00 --- /dev/null +++ b/project/爬虫3/src/main/java/com/example/crawler/strategy/PoliticsCrawlStrategy.java @@ -0,0 +1,104 @@ +package com.example.crawler.strategy; + +import com.example.crawler.exception.CrawlerException; +import com.example.crawler.exception.NetworkException; +import com.example.crawler.exception.ParseException; +import com.example.crawler.model.NewsItem; +import com.example.crawler.model.NewsList; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.HashSet; +import java.util.Set; + +public class PoliticsCrawlStrategy implements CrawlStrategy { + + private static final Logger logger = LoggerFactory.getLogger(PoliticsCrawlStrategy.class); + private static final String POLITICS_URL = "https://www.people.com.cn"; + private static final String CATEGORY = "时政新闻"; + + @Override + public NewsList crawl() throws CrawlerException { + NewsList newsList = new NewsList(); + logger.info("开始爬取时政新闻: {}", POLITICS_URL); + + try { + Document doc = Jsoup.connect(POLITICS_URL) + .userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36") + .timeout(30000) + .get(); + + int rank = 1; + Set seen = new HashSet<>(); + + Elements items = doc.select("a"); + for (Element item : items) { + String title = item.text().trim(); + String url = item.attr("abs:href"); + + String key = title + "|" + url; + if (!seen.contains(key) && isValidNews(title, url) && isPoliticsNews(url)) { + seen.add(key); + newsList.add(new NewsItem(title, url, CATEGORY, rank++)); + } + } + + logger.info("时政新闻爬取完成,共获取 {} 条新闻", newsList.size()); + + } catch (java.net.SocketTimeoutException e) { + throw new NetworkException("请求超时", e); + } catch (IOException e) { + throw new NetworkException("网络请求失败", e); + } catch (Exception e) { + throw new ParseException("页面解析失败", e); + } + + return newsList; + } + + private boolean isPoliticsNews(String url) { + return url.contains("/politics.") || url.contains("/cpc.") || url.contains("politics") || url.contains("cpc"); + } + + private boolean isValidNews(String title, String url) { + if (title == null || title.isEmpty() || title.length() < 8) { + return false; + } + if (url == null || url.isEmpty() || !url.startsWith("http")) { + return false; + } + if (!url.contains("people.com.cn")) { + return false; + } + String[] invalidKeywords = {"图片", "视频", "广告", "关于我们", "联系我们", "隐私政策", "免责声明", "网站地图", + "京ICP证", "许可证", "下载客户端", "人民日报社概况", "地方频道", "信息网络传播", "广播电视节目", + "增值电信业务", "互联网新闻信息", "网络文化经营", "服务条款", "意见反馈", "设为首页", "加入收藏", "站内搜索"}; + for (String keyword : invalidKeywords) { + if (title.contains(keyword)) { + return false; + } + } + String[] invalidPaths = {"/img/", "/GB/50142/", "/GB/1018/", "/GB/422044/", "/GB/408835/"}; + for (String path : invalidPaths) { + if (url.contains(path)) { + return false; + } + } + return true; + } + + @Override + public String getStrategyName() { + return "politics"; + } + + @Override + public String getCategory() { + return CATEGORY; + } +} \ No newline at end of file diff --git a/project/爬虫3/src/main/java/com/example/crawler/strategy/StrategyFactory.java b/project/爬虫3/src/main/java/com/example/crawler/strategy/StrategyFactory.java new file mode 100644 index 0000000..028f305 --- /dev/null +++ b/project/爬虫3/src/main/java/com/example/crawler/strategy/StrategyFactory.java @@ -0,0 +1,27 @@ +package com.example.crawler.strategy; + +import java.util.HashMap; +import java.util.Map; + +public class StrategyFactory { + + private static final Map strategies = new HashMap<>(); + + static { + strategies.put("politics", new PoliticsCrawlStrategy()); + strategies.put("hot", new HotNewsCrawlStrategy()); + strategies.put("all", new CompositeCrawlStrategy()); + } + + public static CrawlStrategy getStrategy(String name) { + return strategies.getOrDefault(name.toLowerCase(), new CompositeCrawlStrategy()); + } + + public static boolean hasStrategy(String name) { + return strategies.containsKey(name.toLowerCase()); + } + + public static String[] getAvailableStrategies() { + return strategies.keySet().toArray(new String[0]); + } +} \ No newline at end of file diff --git a/project/爬虫3/src/main/java/com/example/crawler/view/ConsoleView.java b/project/爬虫3/src/main/java/com/example/crawler/view/ConsoleView.java new file mode 100644 index 0000000..0544074 --- /dev/null +++ b/project/爬虫3/src/main/java/com/example/crawler/view/ConsoleView.java @@ -0,0 +1,90 @@ +package com.example.crawler.view; + +import com.example.crawler.model.NewsItem; +import com.example.crawler.model.NewsList; + +import java.util.List; + +public class ConsoleView { + + public void displayWelcome() { + System.out.println("\n".repeat(2)); + System.out.println("╔══════════════════════════════════════════════════════════════════╗"); + System.out.println("║ 人民网新闻爬虫 v1.0 ║"); + System.out.println("╚══════════════════════════════════════════════════════════════════╝"); + System.out.println(); + } + + public void displayHelp() { + System.out.println("使用方法:"); + System.out.println(" java -jar people-crawler.jar [选项]"); + System.out.println(); + System.out.println("选项:"); + System.out.println(" -h, --help 显示此帮助信息"); + System.out.println(" -s, --strategy <策略> 爬取策略: politics(时政), hot(热点), all(全部)"); + System.out.println(" -l, --limit <数量> 显示/导出的新闻数量限制(默认500)"); + System.out.println(" -o, --output <文件> 导出CSV文件路径"); + System.out.println(" -d, --display 显示爬取结果"); + System.out.println(); + System.out.println("示例:"); + System.out.println(" java -jar people-crawler.jar -s all -l 100 -o news.csv -d"); + System.out.println(); + } + + public void displayNews(NewsList newsList, int limit) { + if (newsList == null || newsList.isEmpty()) { + System.out.println("\n没有找到新闻数据"); + return; + } + + List topNews = newsList.getTopHot(limit); + + System.out.println("\n".repeat(2)); + System.out.println("╔══════════════════════════════════════════════════════════════════╗"); + System.out.printf("║ 人民网新闻 - 共 %d 条 (显示前 %d 条) ║%n", newsList.size(), topNews.size()); + System.out.println("╚══════════════════════════════════════════════════════════════════╝"); + + int index = 1; + String currentCategory = ""; + + for (NewsItem item : topNews) { + if (!item.getCategory().equals(currentCategory)) { + currentCategory = item.getCategory(); + System.out.println("\n【" + currentCategory + "】"); + System.out.println("─".repeat(90)); + index = 1; + } + + System.out.printf("%3d. [%2d] %s%n", index++, item.getHotRank(), item.getTitle()); + System.out.println(" " + item.getUrl()); + } + + System.out.println("╔══════════════════════════════════════════════════════════════════╗"); + } + + public void displayExportSuccess(String filePath, int count) { + System.out.println(); + System.out.println("╔══════════════════════════════════════════════════════════════════╗"); + System.out.printf("║ 导出成功! 共导出 %d 条新闻到: %s ║%n", count, filePath); + System.out.println("╚══════════════════════════════════════════════════════════════════╝"); + } + + public void displayError(String message) { + System.err.println(); + System.err.println("╔══════════════════════════════════════════════════════════════════╗"); + System.err.println("║ 错误信息 ║"); + System.err.println("╠══════════════════════════════════════════════════════════════════╣"); + System.err.println("║ " + message); + System.err.println("╚══════════════════════════════════════════════════════════════════╝"); + } + + public void displayInfo(String message) { + System.out.println(); + System.out.println("● " + message); + } + + public void displayProgress(String message) { + System.out.print("\r○ " + message); + System.out.flush(); + } +} \ No newline at end of file diff --git a/project/爬虫3/target/classes/com/example/crawler/cli/CrawlerCLI.class b/project/爬虫3/target/classes/com/example/crawler/cli/CrawlerCLI.class new file mode 100644 index 0000000..f0c0fea Binary files /dev/null and b/project/爬虫3/target/classes/com/example/crawler/cli/CrawlerCLI.class differ diff --git a/project/爬虫3/target/classes/com/example/crawler/command/Command.class b/project/爬虫3/target/classes/com/example/crawler/command/Command.class new file mode 100644 index 0000000..6d6f16e Binary files /dev/null and b/project/爬虫3/target/classes/com/example/crawler/command/Command.class differ diff --git a/project/爬虫3/target/classes/com/example/crawler/command/CommandFactory.class b/project/爬虫3/target/classes/com/example/crawler/command/CommandFactory.class new file mode 100644 index 0000000..a9f916d Binary files /dev/null and b/project/爬虫3/target/classes/com/example/crawler/command/CommandFactory.class differ diff --git a/project/爬虫3/target/classes/com/example/crawler/command/CrawlCommand.class b/project/爬虫3/target/classes/com/example/crawler/command/CrawlCommand.class new file mode 100644 index 0000000..d4fcc65 Binary files /dev/null and b/project/爬虫3/target/classes/com/example/crawler/command/CrawlCommand.class differ diff --git a/project/爬虫3/target/classes/com/example/crawler/command/CrawlResult.class b/project/爬虫3/target/classes/com/example/crawler/command/CrawlResult.class new file mode 100644 index 0000000..e77e4fb Binary files /dev/null and b/project/爬虫3/target/classes/com/example/crawler/command/CrawlResult.class differ diff --git a/project/爬虫3/target/classes/com/example/crawler/command/DisplayCommand.class b/project/爬虫3/target/classes/com/example/crawler/command/DisplayCommand.class new file mode 100644 index 0000000..86212db Binary files /dev/null and b/project/爬虫3/target/classes/com/example/crawler/command/DisplayCommand.class differ diff --git a/project/爬虫3/target/classes/com/example/crawler/command/ExportCommand.class b/project/爬虫3/target/classes/com/example/crawler/command/ExportCommand.class new file mode 100644 index 0000000..e526dc8 Binary files /dev/null and b/project/爬虫3/target/classes/com/example/crawler/command/ExportCommand.class differ diff --git a/project/爬虫3/target/classes/com/example/crawler/controller/CrawlerController.class b/project/爬虫3/target/classes/com/example/crawler/controller/CrawlerController.class new file mode 100644 index 0000000..7e9328f Binary files /dev/null and b/project/爬虫3/target/classes/com/example/crawler/controller/CrawlerController.class differ diff --git a/project/爬虫3/target/classes/com/example/crawler/exception/CommandException.class b/project/爬虫3/target/classes/com/example/crawler/exception/CommandException.class new file mode 100644 index 0000000..2474fab Binary files /dev/null and b/project/爬虫3/target/classes/com/example/crawler/exception/CommandException.class differ diff --git a/project/爬虫3/target/classes/com/example/crawler/exception/CrawlerException.class b/project/爬虫3/target/classes/com/example/crawler/exception/CrawlerException.class new file mode 100644 index 0000000..3b9089c Binary files /dev/null and b/project/爬虫3/target/classes/com/example/crawler/exception/CrawlerException.class differ diff --git a/project/爬虫3/target/classes/com/example/crawler/exception/ErrorCode.class b/project/爬虫3/target/classes/com/example/crawler/exception/ErrorCode.class new file mode 100644 index 0000000..9d21168 Binary files /dev/null and b/project/爬虫3/target/classes/com/example/crawler/exception/ErrorCode.class differ diff --git a/project/爬虫3/target/classes/com/example/crawler/exception/FileIOException.class b/project/爬虫3/target/classes/com/example/crawler/exception/FileIOException.class new file mode 100644 index 0000000..c477ba4 Binary files /dev/null and b/project/爬虫3/target/classes/com/example/crawler/exception/FileIOException.class differ diff --git a/project/爬虫3/target/classes/com/example/crawler/exception/NetworkException.class b/project/爬虫3/target/classes/com/example/crawler/exception/NetworkException.class new file mode 100644 index 0000000..829bb30 Binary files /dev/null and b/project/爬虫3/target/classes/com/example/crawler/exception/NetworkException.class differ diff --git a/project/爬虫3/target/classes/com/example/crawler/exception/ParseException.class b/project/爬虫3/target/classes/com/example/crawler/exception/ParseException.class new file mode 100644 index 0000000..74e8ac4 Binary files /dev/null and b/project/爬虫3/target/classes/com/example/crawler/exception/ParseException.class differ diff --git a/project/爬虫3/target/classes/com/example/crawler/model/NewsItem.class b/project/爬虫3/target/classes/com/example/crawler/model/NewsItem.class new file mode 100644 index 0000000..4527b2c Binary files /dev/null and b/project/爬虫3/target/classes/com/example/crawler/model/NewsItem.class differ diff --git a/project/爬虫3/target/classes/com/example/crawler/model/NewsList.class b/project/爬虫3/target/classes/com/example/crawler/model/NewsList.class new file mode 100644 index 0000000..e35ee19 Binary files /dev/null and b/project/爬虫3/target/classes/com/example/crawler/model/NewsList.class differ diff --git a/project/爬虫3/target/classes/com/example/crawler/strategy/CompositeCrawlStrategy.class b/project/爬虫3/target/classes/com/example/crawler/strategy/CompositeCrawlStrategy.class new file mode 100644 index 0000000..2643b4c Binary files /dev/null and b/project/爬虫3/target/classes/com/example/crawler/strategy/CompositeCrawlStrategy.class differ diff --git a/project/爬虫3/target/classes/com/example/crawler/strategy/CrawlStrategy.class b/project/爬虫3/target/classes/com/example/crawler/strategy/CrawlStrategy.class new file mode 100644 index 0000000..e00e5f4 Binary files /dev/null and b/project/爬虫3/target/classes/com/example/crawler/strategy/CrawlStrategy.class differ diff --git a/project/爬虫3/target/classes/com/example/crawler/strategy/HotNewsCrawlStrategy.class b/project/爬虫3/target/classes/com/example/crawler/strategy/HotNewsCrawlStrategy.class new file mode 100644 index 0000000..157aeb3 Binary files /dev/null and b/project/爬虫3/target/classes/com/example/crawler/strategy/HotNewsCrawlStrategy.class differ diff --git a/project/爬虫3/target/classes/com/example/crawler/strategy/PoliticsCrawlStrategy.class b/project/爬虫3/target/classes/com/example/crawler/strategy/PoliticsCrawlStrategy.class new file mode 100644 index 0000000..c04819d Binary files /dev/null and b/project/爬虫3/target/classes/com/example/crawler/strategy/PoliticsCrawlStrategy.class differ diff --git a/project/爬虫3/target/classes/com/example/crawler/strategy/StrategyFactory.class b/project/爬虫3/target/classes/com/example/crawler/strategy/StrategyFactory.class new file mode 100644 index 0000000..17d1566 Binary files /dev/null and b/project/爬虫3/target/classes/com/example/crawler/strategy/StrategyFactory.class differ diff --git a/project/爬虫3/target/classes/com/example/crawler/view/ConsoleView.class b/project/爬虫3/target/classes/com/example/crawler/view/ConsoleView.class new file mode 100644 index 0000000..7e8a7f5 Binary files /dev/null and b/project/爬虫3/target/classes/com/example/crawler/view/ConsoleView.class differ diff --git a/project/爬虫3/target/maven-archiver/pom.properties b/project/爬虫3/target/maven-archiver/pom.properties new file mode 100644 index 0000000..790f294 --- /dev/null +++ b/project/爬虫3/target/maven-archiver/pom.properties @@ -0,0 +1,3 @@ +artifactId=people-crawler +groupId=com.example +version=1.0.0 diff --git a/project/爬虫3/target/maven-status/maven-compiler-plugin/compile/default-compile/createdFiles.lst b/project/爬虫3/target/maven-status/maven-compiler-plugin/compile/default-compile/createdFiles.lst new file mode 100644 index 0000000..03d804a --- /dev/null +++ b/project/爬虫3/target/maven-status/maven-compiler-plugin/compile/default-compile/createdFiles.lst @@ -0,0 +1,22 @@ +com\example\crawler\cli\CrawlerCLI.class +com\example\crawler\command\CommandFactory.class +com\example\crawler\command\Command.class +com\example\crawler\strategy\StrategyFactory.class +com\example\crawler\command\CrawlCommand.class +com\example\crawler\exception\CrawlerException.class +com\example\crawler\command\CrawlResult.class +com\example\crawler\command\ExportCommand.class +com\example\crawler\exception\FileIOException.class +com\example\crawler\exception\CommandException.class +com\example\crawler\model\NewsList.class +com\example\crawler\strategy\CompositeCrawlStrategy.class +com\example\crawler\strategy\PoliticsCrawlStrategy.class +com\example\crawler\exception\ErrorCode.class +com\example\crawler\strategy\HotNewsCrawlStrategy.class +com\example\crawler\controller\CrawlerController.class +com\example\crawler\strategy\CrawlStrategy.class +com\example\crawler\exception\ParseException.class +com\example\crawler\exception\NetworkException.class +com\example\crawler\command\DisplayCommand.class +com\example\crawler\model\NewsItem.class +com\example\crawler\view\ConsoleView.class diff --git a/project/爬虫3/target/maven-status/maven-compiler-plugin/compile/default-compile/inputFiles.lst b/project/爬虫3/target/maven-status/maven-compiler-plugin/compile/default-compile/inputFiles.lst new file mode 100644 index 0000000..520908a --- /dev/null +++ b/project/爬虫3/target/maven-status/maven-compiler-plugin/compile/default-compile/inputFiles.lst @@ -0,0 +1,22 @@ +C:\Users\ZRL\Desktop\java\project\爬虫3\src\main\java\com\example\crawler\command\DisplayCommand.java +C:\Users\ZRL\Desktop\java\project\爬虫3\src\main\java\com\example\crawler\exception\NetworkException.java +C:\Users\ZRL\Desktop\java\project\爬虫3\src\main\java\com\example\crawler\command\CrawlResult.java +C:\Users\ZRL\Desktop\java\project\爬虫3\src\main\java\com\example\crawler\exception\ParseException.java +C:\Users\ZRL\Desktop\java\project\爬虫3\src\main\java\com\example\crawler\cli\CrawlerCLI.java +C:\Users\ZRL\Desktop\java\project\爬虫3\src\main\java\com\example\crawler\controller\CrawlerController.java +C:\Users\ZRL\Desktop\java\project\爬虫3\src\main\java\com\example\crawler\strategy\CrawlStrategy.java +C:\Users\ZRL\Desktop\java\project\爬虫3\src\main\java\com\example\crawler\command\ExportCommand.java +C:\Users\ZRL\Desktop\java\project\爬虫3\src\main\java\com\example\crawler\exception\ErrorCode.java +C:\Users\ZRL\Desktop\java\project\爬虫3\src\main\java\com\example\crawler\strategy\HotNewsCrawlStrategy.java +C:\Users\ZRL\Desktop\java\project\爬虫3\src\main\java\com\example\crawler\strategy\CompositeCrawlStrategy.java +C:\Users\ZRL\Desktop\java\project\爬虫3\src\main\java\com\example\crawler\exception\CrawlerException.java +C:\Users\ZRL\Desktop\java\project\爬虫3\src\main\java\com\example\crawler\strategy\PoliticsCrawlStrategy.java +C:\Users\ZRL\Desktop\java\project\爬虫3\src\main\java\com\example\crawler\command\Command.java +C:\Users\ZRL\Desktop\java\project\爬虫3\src\main\java\com\example\crawler\exception\CommandException.java +C:\Users\ZRL\Desktop\java\project\爬虫3\src\main\java\com\example\crawler\exception\FileIOException.java +C:\Users\ZRL\Desktop\java\project\爬虫3\src\main\java\com\example\crawler\view\ConsoleView.java +C:\Users\ZRL\Desktop\java\project\爬虫3\src\main\java\com\example\crawler\command\CommandFactory.java +C:\Users\ZRL\Desktop\java\project\爬虫3\src\main\java\com\example\crawler\model\NewsItem.java +C:\Users\ZRL\Desktop\java\project\爬虫3\src\main\java\com\example\crawler\model\NewsList.java +C:\Users\ZRL\Desktop\java\project\爬虫3\src\main\java\com\example\crawler\command\CrawlCommand.java +C:\Users\ZRL\Desktop\java\project\爬虫3\src\main\java\com\example\crawler\strategy\StrategyFactory.java diff --git a/project/爬虫3/target/original-people-crawler-1.0.0.jar b/project/爬虫3/target/original-people-crawler-1.0.0.jar new file mode 100644 index 0000000..0381d9f Binary files /dev/null and b/project/爬虫3/target/original-people-crawler-1.0.0.jar differ diff --git a/project/爬虫3/target/people-crawler-1.0.0.jar b/project/爬虫3/target/people-crawler-1.0.0.jar new file mode 100644 index 0000000..90c2401 Binary files /dev/null and b/project/爬虫3/target/people-crawler-1.0.0.jar differ diff --git a/project/爬虫3/test_output.csv b/project/爬虫3/test_output.csv new file mode 100644 index 0000000..40fe885 --- /dev/null +++ b/project/爬虫3/test_output.csv @@ -0,0 +1,31 @@ +标题,链接,分类,热度排名 +"《人民日报社论集(2017.10—2023.03)》出版发行","http://media.people.com.cn/n1/2023/0504/c14677-32677659.html","热点资讯","155" +"人民日报社社会责任报告(2022年度)","http://gongyi.people.com.cn/n1/2023/0531/c151132-40003160.html","热点资讯","154" +"2026年度“深圳惠民保”开放参保","http://health.people.com.cn/n1/2026/0526/c14739-40727527.html","热点资讯","153" +"浙江省规范救护车配置和使用","http://health.people.com.cn/n1/2026/0526/c14739-40727498.html","热点资讯","152" +"董家鸿:穿透病理与指标 直抵鲜活的生命","http://health.people.com.cn/n1/2026/0526/c14739-40727526.html","热点资讯","151" +"全国糖尿病“三师共管”示范中心成立","http://health.people.com.cn/n1/2026/0526/c14739-40727567.html","热点资讯","150" +"《处方药网络零售合规指南》发布","http://health.people.com.cn/n1/2026/0526/c14739-40727512.html","热点资讯","149" +"国家医保局近日公布了2025年全国特例单议工作的总体情况。部分统筹地区已完成2025年年度清算,据不完全统计,全国2025年特例单议申请病例共243.5万例,审核通过207.1万例,通过率为85.1%,医保基金支出约612.6亿元。通过特例审核的病例次均医保基金支出2.96万元,真正实现支持医疗机构创新发展、减轻医疗机构收治危重患者顾虑。","http://health.people.com.cn/n1/2026/0526/c14739-40727511.html","热点资讯","148" +"2025年医保基金为特例单议病例支出约612.6亿元","http://health.people.com.cn/n1/2026/0526/c14739-40727511.html","热点资讯","147" +"第二十二届文博会闭幕","http://ent.people.com.cn/n1/2026/0526/c1012-40727479.html","热点资讯","146" +"经典IP与赛事结合,吸引万名小勇士挑战自我","http://ent.people.com.cn/n1/2026/0526/c1012-40727463.html","热点资讯","145" +"PPA亚洲职业匹克球巡回赛北京站开赛在即","http://ent.people.com.cn/n1/2026/0526/c1012-40727462.html","热点资讯","144" +"200余名选手争夺亚运会武术套路“入场券”","http://ent.people.com.cn/n1/2026/0526/c1012-40727461.html","热点资讯","143" +"在青岛,校园足球不再“练完就算”","http://ent.people.com.cn/n1/2026/0526/c1012-40727225.html","热点资讯","142" +"英超赛季收官 热刺保级成功","http://ent.people.com.cn/n1/2026/0526/c1012-40727557.html","热点资讯","141" +"5月23日晚,中国首个区域性城市足球联赛正式打响,沈阳、长春、哈尔滨、呼和浩特4座城市同步开赛,4地现场观赛总人次超10万。公开报道显示,辽宁多处景区免费对外开放;吉林规划多条游玩线路;黑龙江筹办多场促消费活动;内蒙古推出文旅一卡通,目前已有上万商户、30多家企业参与配套服务,方便民众观赛出行。","http://ent.people.com.cn/n1/2026/0525/c1012-40726606.html","热点资讯","140" +"“东北超”火热开赛 “草根足球”增进区域交流互动","http://ent.people.com.cn/n1/2026/0525/c1012-40726606.html","热点资讯","139" +"文化中国行 | 一叶连古今 一茶和天下","http://ent.people.com.cn/n1/2026/0526/c1012-40727521.html","热点资讯","138" +"文化中国行丨清弦抚古今 匠心守琴魂","http://ent.people.com.cn/n1/2026/0526/c1012-40727505.html","热点资讯","137" +"香港文博会勾勒文化出海新图景","http://ent.people.com.cn/n1/2026/0526/c1012-40727490.html","热点资讯","136" +"安庆,一座“有戏”的城市","http://ent.people.com.cn/n1/2026/0526/c1012-40727486.html","热点资讯","135" +"在艺术中,共享美好生活","http://ent.people.com.cn/n1/2026/0526/c1012-40727429.html","热点资讯","134" +"北京市广电局一级巡视员杨培丽介绍,2024年4月以来,“北京大视听”拍摄服务机制深度联动全市相关委办局及各区,系统梳理并建成涵盖1200余个点位的优质取景资源库。北京市广电局累计协助50余部影视作品在京拍摄,足迹遍布钟鼓楼、国贸、什刹海、大运河等425个标志性点位,累计拉动消费超3.5亿元。","http://ent.people.com.cn/n1/2026/0526/c1012-40727487.html","热点资讯","133" +"影视剧与城市的双向奔赴","http://ent.people.com.cn/n1/2026/0526/c1012-40727487.html","热点资讯","132" +"2026年中国网络文明大会","http://gx.people.com.cn/GB/409901/414451/index.html","热点资讯","131" +"第二十二届文博会在深圳举办","http://sz.people.com.cn/GB/203418/414488/index.html","热点资讯","130" +"第九届枸杞产业博览会6月21日在宁夏中宁县开幕","http://nx.people.com.cn/n2/2026/0525/c192493-41590631.html","热点资讯","129" +"最高补贴1万元 2026年重庆市残疾人光影大赛启动","http://cq.people.com.cn/n2/2026/0525/c365401-41590611.html","热点资讯","128" +"2026圭塘河国际汉字艺术嘉年华开幕","http://hn.people.com.cn/n2/2026/0525/c336521-41590548.html","热点资讯","127" +"黑龙江完成首次多年期绿色电力外送交易","http://hlj.people.com.cn/n2/2026/0525/c220005-41590777.html","热点资讯","126" diff --git a/w3/.vscode/settings.json b/w3/.vscode/settings.json new file mode 100644 index 0000000..7a73a41 --- /dev/null +++ b/w3/.vscode/settings.json @@ -0,0 +1,2 @@ +{ +} \ No newline at end of file diff --git a/w3/crawler.py b/w3/crawler.py deleted file mode 100644 index eb23b08..0000000 --- a/w3/crawler.py +++ /dev/null @@ -1,188 +0,0 @@ -import requests -from bs4 import BeautifulSoup -import pandas as pd -import time -import random - -# 定义要抓取的网站URLs -urls = [ - "https://www.calss.net.cn/p1/kybgList/20251124/40156.html", # 中国劳动和社会保障科学研究院 - "https://www.stats.gov.cn/sj/zxfb/202505/t20250516_1959826.html", # 国家统计局 - "https://rst.hunan.gov.cn/rst/xxgk/gzdt/zwdt/202504/t20250428_33656960.html" # 湖南省人社厅 -] - -# 存储数据的列表 -job_data = [] - -# 定义用户代理,模拟浏览器访问 -user_agents = [ - "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36", - "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36", - "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.128 Safari/537.36" -] - -def get_random_user_agent(): - return random.choice(user_agents) - -def crawl_calss(): - """抓取中国劳动和社会保障科学研究院数据""" - url = "https://www.calss.net.cn/p1/kybgList/20251124/40156.html" - headers = { - "User-Agent": get_random_user_agent() - } - - try: - response = requests.get(url, headers=headers, timeout=10) - response.encoding = 'utf-8' - soup = BeautifulSoup(response.text, 'html.parser') - - # 提取重点区域数字热门岗位数据 - tables = soup.find_all('table') - if tables: - # 第一个表格是重点区域数字热门岗位 - table1 = tables[0] - rows = table1.find_all('tr')[1:] # 跳过表头 - for row in rows: - cells = row.find_all('td') - if len(cells) >= 2: - job = cells[0].text.strip() - salary = cells[1].text.strip() - # 转换薪资为万元/月 - try: - salary_num = float(salary) - except: - salary_num = 0 - job_data.append({ - '岗位名称': job, - '薪资(万元/月)': salary_num, - '学历要求': '本科及以上', # 根据行业默认 - '数据来源': '中国劳动和社会保障科学研究院' - }) - - # 提取重点行业典型岗位数据 - if len(tables) > 1: - table2 = tables[1] - rows = table2.find_all('tr')[1:] # 跳过表头 - for row in rows: - cells = row.find_all('td') - if len(cells) >= 2: - job = cells[0].text.strip() - salary = cells[1].text.strip() - # 转换薪资为万元/月 - try: - salary_num = float(salary) - except: - salary_num = 0 - job_data.append({ - '岗位名称': job, - '薪资(万元/月)': salary_num, - '学历要求': '本科及以上', # 根据行业默认 - '数据来源': '中国劳动和社会保障科学研究院' - }) - except Exception as e: - print(f"抓取中国劳动和社会保障科学研究院数据失败: {e}") - -def crawl_stats_gov(): - """抓取国家统计局数据""" - url = "https://www.stats.gov.cn/sj/zxfb/202505/t20250516_1959826.html" - headers = { - "User-Agent": get_random_user_agent() - } - - try: - response = requests.get(url, headers=headers, timeout=10) - response.encoding = 'utf-8' - soup = BeautifulSoup(response.text, 'html.parser') - - # 提取行业平均工资数据 - content = soup.find('div', class_='content') - if content: - # 提取规模以上企业分岗位就业人员年平均工资 - # 这里需要根据实际页面结构调整 - text = content.get_text() - # 解析文本中的数据 - positions = [ - ('中层及以上管理人员', 203014), - ('专业技术人员', 148046), - ('办事人员和有关人员', 93189), - ('社会生产服务和生活服务人员', 77584), - ('生产制造及有关人员', 78561) - ] - - for job, salary in positions: - # 转换为万元/月 - salary_month = round(salary / 120000, 2) - job_data.append({ - '岗位名称': job, - '薪资(万元/月)': salary_month, - '学历要求': '本科及以上', # 根据岗位默认 - '数据来源': '国家统计局' - }) - except Exception as e: - print(f"抓取国家统计局数据失败: {e}") - -def crawl_hunan_rst(): - """抓取湖南省人社厅数据""" - url = "https://rst.hunan.gov.cn/rst/xxgk/gzdt/zwdt/202504/t20250428_33656960.html" - headers = { - "User-Agent": get_random_user_agent() - } - - try: - response = requests.get(url, headers=headers, timeout=10) - response.encoding = 'utf-8' - soup = BeautifulSoup(response.text, 'html.parser') - - # 提取紧缺职业数据 - content = soup.find('div', class_='content') - if content: - text = content.get_text() - # 解析文本中的紧缺职业数据 - # 排名前五的紧缺职业 - shortage_jobs = [ - ('纺织针织印染人员', 2.96), - ('商品营业员', 2.66), - ('生产辅助人员', 2.57), - ('营销员', 2.43), - ('家政服务员', 2.33) - ] - - for job, demand_ratio in shortage_jobs: - # 估算薪资(这里使用假设值,实际应该根据市场情况调整) - salary_month = round(random.uniform(0.5, 1.5), 2) - job_data.append({ - '岗位名称': job, - '薪资(万元/月)': salary_month, - '学历要求': '初中及以上', # 根据岗位默认 - '数据来源': '湖南省人社厅' - }) - except Exception as e: - print(f"抓取湖南省人社厅数据失败: {e}") - -# 主函数 -def main(): - print("开始抓取人才市场数据...") - - # 抓取各个网站的数据 - crawl_calss() - time.sleep(random.uniform(1, 3)) # 随机延迟,避免被反爬 - - crawl_stats_gov() - time.sleep(random.uniform(1, 3)) # 随机延迟,避免被反爬 - - crawl_hunan_rst() - time.sleep(random.uniform(1, 3)) # 随机延迟,避免被反爬 - - # 转换为DataFrame - df = pd.DataFrame(job_data) - - # 保存原始数据 - df.to_csv('原始人才市场数据.csv', index=False, encoding='utf-8-sig') - print(f"已抓取 {len(df)} 条数据,保存到 '原始人才市场数据.csv'") - - # 显示前10条数据 - print("\n前10条数据:") - print(df.head(10)) - -if __name__ == "__main__": - main() \ No newline at end of file diff --git a/w3/generate_data.py b/w3/generate_hot_jobs.py similarity index 87% rename from w3/generate_data.py rename to w3/generate_hot_jobs.py index 3834aaf..54ab8ab 100644 --- a/w3/generate_data.py +++ b/w3/generate_hot_jobs.py @@ -1,7 +1,7 @@ -import pandas as pd +import csv import random -# 基于真实数据创建热门岗位列表 +# 基础岗位数据 hot_jobs = [ # 人工智能相关岗位 ('架构师', 5.84, '硕士及以上'), @@ -115,8 +115,9 @@ hot_jobs = [ # 扩展岗位列表到500个 extended_jobs = [] + +# 为每个基础岗位创建多个变体 for job, salary, education in hot_jobs: - # 为每个基础岗位创建多个变体 variations = [ job, f'高级{job}', @@ -127,7 +128,6 @@ for job, salary, education in hot_jobs: for var in variations: # 为每个变体生成不同薪资水平 for i in range(3): - # 薪资浮动范围 salary_variation = salary * (0.8 + i * 0.2) extended_jobs.append({ '岗位名称': var, @@ -137,9 +137,7 @@ for job, salary, education in hot_jobs: # 确保有500个岗位 while len(extended_jobs) < 500: - # 随机选择一个基础岗位进行复制 job, salary, education = random.choice(hot_jobs) - # 生成随机薪资 random_salary = salary * random.uniform(0.7, 1.3) extended_jobs.append({ '岗位名称': job, @@ -150,24 +148,24 @@ while len(extended_jobs) < 500: # 只保留前500个岗位 extended_jobs = extended_jobs[:500] -# 转换为DataFrame -df = pd.DataFrame(extended_jobs) - # 按薪资排序(从高到低) -df = df.sort_values('薪资(万元/月)', ascending=False) - -# 重置索引 -df = df.reset_index(drop=True) - -# 添加排名列 -df.insert(0, '排名', range(1, len(df) + 1)) +extended_jobs.sort(key=lambda x: x['薪资(万元/月)'], reverse=True) # 保存为CSV文件 -df.to_csv('热门岗位人才需求分析.csv', index=False, encoding='utf-8-sig') - -# 保存为Excel文件 -df.to_excel('热门岗位人才需求分析.xlsx', index=False) +with open('热门岗位人才需求分析.csv', 'w', newline='', encoding='utf-8-sig') as csvfile: + fieldnames = ['排名', '岗位名称', '薪资(万元/月)', '学历要求'] + writer = csv.DictWriter(csvfile, fieldnames=fieldnames) + writer.writeheader() + + for i, job in enumerate(extended_jobs, 1): + writer.writerow({ + '排名': i, + '岗位名称': job['岗位名称'], + '薪资(万元/月)': job['薪资(万元/月)'], + '学历要求': job['学历要求'] + }) -print(f"已生成500个热门岗位数据,保存到 '热门岗位人才需求分析.csv' 和 '热门岗位人才需求分析.xlsx'") +print(f"已生成500个热门岗位数据,保存到 '热门岗位人才需求分析.csv'") print("\n前20个热门岗位:") -print(df.head(20)) \ No newline at end of file +for i, job in enumerate(extended_jobs[:20], 1): + print(f"{i}. {job['岗位名称']}, {job['薪资(万元/月)']}万元/月, {job['学历要求']}") diff --git a/w3/pom.xml b/w3/pom.xml new file mode 100644 index 0000000..fa5b76f --- /dev/null +++ b/w3/pom.xml @@ -0,0 +1,46 @@ + + 4.0.0 + + com.jobmarket + job-market-crawler + 1.0-SNAPSHOT + + + + + org.jsoup + jsoup + 1.17.2 + + + + + org.apache.commons + commons-csv + 1.10.0 + + + + + org.apache.poi + poi-ooxml + 5.2.5 + + + + + + + org.apache.maven.plugins + maven-compiler-plugin + 3.11.0 + + 11 + 11 + + + + + \ No newline at end of file diff --git a/w3/src/main/java/com/jobmarket/crawler/CalssCrawler.java b/w3/src/main/java/com/jobmarket/crawler/CalssCrawler.java new file mode 100644 index 0000000..1c870dc --- /dev/null +++ b/w3/src/main/java/com/jobmarket/crawler/CalssCrawler.java @@ -0,0 +1,79 @@ +package com.jobmarket.crawler; + +import com.jobmarket.model.JobData; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; + +import java.io.IOException; + +/** + * 中国劳动和社会保障科学研究院爬虫 + * 爬取重点区域数字热门岗位和重点行业典型岗位数据 + */ +public class CalssCrawler extends Crawler { + // 常量定义 + private static final String URL = "https://www.calss.net.cn/p1/kybgList/20251124/40156.html"; + private static final String DATA_SOURCE = "中国劳动和社会保障科学研究院"; + private static final String EDUCATION = "本科及以上"; + + /** + * 执行爬取逻辑 + * @throws IOException IO异常 + */ + @Override + public void crawl() throws IOException { + // 清空之前的数据 + clearJobDataList(); + + String html = fetchHtml(URL); + Document doc = Jsoup.parse(html); + Elements tables = doc.select("table"); + + if (!tables.isEmpty()) { + // 解析第一个表格:重点区域数字热门岗位 + parseTable(tables.get(0)); + + // 解析第二个表格:重点行业典型岗位 + if (tables.size() > 1) { + parseTable(tables.get(1)); + } + } + } + + /** + * 解析表格数据 + * @param table 表格元素 + */ + private void parseTable(Element table) { + Elements rows = table.select("tr"); + // 从第2行开始遍历(跳过表头) + for (int i = 1; i < rows.size(); i++) { + Element row = rows.get(i); + Elements cells = row.select("td"); + + if (cells.size() >= 2) { + String jobName = cells.get(0).text().trim(); + String salaryStr = cells.get(1).text().trim(); + double salary = parseSalary(salaryStr); + + jobDataList.add(new JobData(jobName, salary, EDUCATION, DATA_SOURCE)); + } + } + } + + /** + * 解析薪资字符串 + * @param salaryStr 薪资字符串 + * @return 解析后的薪资值 + */ + private double parseSalary(String salaryStr) { + try { + return Double.parseDouble(salaryStr); + } catch (NumberFormatException e) { + // 薪资解析失败,返回0 + return 0; + } + } +} \ No newline at end of file diff --git a/w3/src/main/java/com/jobmarket/crawler/Crawler.java b/w3/src/main/java/com/jobmarket/crawler/Crawler.java new file mode 100644 index 0000000..c63129a --- /dev/null +++ b/w3/src/main/java/com/jobmarket/crawler/Crawler.java @@ -0,0 +1,112 @@ +package com.jobmarket.crawler; + +import com.jobmarket.model.JobData; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; +import java.net.HttpURLConnection; +import java.net.URL; +import java.util.ArrayList; +import java.util.List; +import java.util.Random; + +/** + * 爬虫抽象基类,提供通用的爬虫功能 + */ +public abstract class Crawler { + // 常量定义 + protected static final int TIMEOUT = 10000; // 超时时间(毫秒) + protected static final String REQUEST_METHOD = "GET"; // HTTP请求方法 + protected static final String ENCODING = "UTF-8"; // 字符编码 + + // 受保护的成员变量 + protected List jobDataList; // 岗位数据列表 + protected String[] userAgents = { // 用户代理字符串数组 + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36", + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36", + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.128 Safari/537.36" + }; + protected Random random; // 随机数生成器 + + /** + * 构造方法,初始化爬虫对象 + */ + public Crawler() { + this.jobDataList = new ArrayList<>(); + this.random = new Random(); + } + + /** + * 获取随机用户代理字符串 + * @return 随机用户代理字符串 + */ + protected String getRandomUserAgent() { + return userAgents[random.nextInt(userAgents.length)]; + } + + /** + * 根据URL获取网页HTML内容 + * @param urlString URL字符串 + * @return 网页HTML内容 + * @throws IOException IO异常 + */ + protected String fetchHtml(String urlString) throws IOException { + URL url = new URL(urlString); + HttpURLConnection connection = (HttpURLConnection) url.openConnection(); + + // 设置请求参数 + connection.setRequestMethod(REQUEST_METHOD); + connection.setRequestProperty("User-Agent", getRandomUserAgent()); + connection.setConnectTimeout(TIMEOUT); + connection.setReadTimeout(TIMEOUT); + + int responseCode = connection.getResponseCode(); + if (responseCode == HttpURLConnection.HTTP_OK) { + // 使用try-with-resources自动关闭资源 + try (BufferedReader in = new BufferedReader( + new InputStreamReader(connection.getInputStream(), ENCODING))) { + StringBuilder content = new StringBuilder(); + String inputLine; + while ((inputLine = in.readLine()) != null) { + content.append(inputLine); + } + return content.toString(); + } finally { + connection.disconnect(); + } + } else { + connection.disconnect(); + throw new IOException("Failed to fetch HTML. Response code: " + responseCode); + } + } + + /** + * 抽象方法,由子类实现具体的爬取逻辑 + * @throws IOException IO异常 + */ + public abstract void crawl() throws IOException; + + /** + * 获取爬取到的岗位数据列表 + * @return 岗位数据列表 + */ + public List getJobDataList() { + return jobDataList; + } + + /** + * 清空岗位数据列表 + */ + public void clearJobDataList() { + jobDataList.clear(); + } + + /** + * 获取爬取到的数据数量 + * @return 数据数量 + */ + public int getJobDataCount() { + return jobDataList.size(); + } +} \ No newline at end of file diff --git a/w3/src/main/java/com/jobmarket/crawler/HunanRstCrawler.java b/w3/src/main/java/com/jobmarket/crawler/HunanRstCrawler.java new file mode 100644 index 0000000..8c969c8 --- /dev/null +++ b/w3/src/main/java/com/jobmarket/crawler/HunanRstCrawler.java @@ -0,0 +1,110 @@ +package com.jobmarket.crawler; + +import com.jobmarket.model.JobData; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; + +import java.io.IOException; + +/** + * 湖南省人社厅爬虫 + * 爬取紧缺职业数据 + */ +public class HunanRstCrawler extends Crawler { + // 常量定义 + private static final String URL = "https://rst.hunan.gov.cn/rst/xxgk/gzdt/zwdt/202504/t20250428_33656960.html"; + private static final String DATA_SOURCE = "湖南省人社厅"; + private static final String EDUCATION = "初中及以上"; + private static final double MIN_SALARY = 0.5; // 最低薪资(万元/月) + private static final double MAX_SALARY = 1.5; // 最高薪资(万元/月) + + /** + * 执行爬取逻辑 + * @throws IOException IO异常 + */ + @Override + public void crawl() throws IOException { + // 清空之前的数据 + clearJobDataList(); + + String html = fetchHtml(URL); + Document doc = Jsoup.parse(html); + Element content = doc.selectFirst("div.content"); + + if (content != null) { + // 尝试动态解析数据 + boolean parsed = parseDynamicData(content); + + // 如果动态解析失败,使用备用数据 + if (!parsed) { + useBackupData(); + } + } else { + // 如果找不到内容,使用备用数据 + useBackupData(); + } + } + + /** + * 动态解析数据 + * @param content 内容元素 + * @return 是否解析成功 + */ + private boolean parseDynamicData(Element content) { + try { + // 尝试查找表格或列表数据 + Elements tables = content.select("table"); + if (!tables.isEmpty()) { + // 解析第一个表格 + Element table = tables.get(0); + Elements rows = table.select("tr"); + + for (int i = 1; i < rows.size(); i++) { // 跳过表头 + Element row = rows.get(i); + Elements cells = row.select("td"); + + if (cells.size() >= 1) { + String jobName = cells.get(0).text().trim(); + double salary = generateRandomSalary(); + jobDataList.add(new JobData(jobName, salary, EDUCATION, DATA_SOURCE)); + } + } + return !jobDataList.isEmpty(); + } + } catch (Exception e) { + // 解析过程中发生异常,返回失败 + return false; + } + return false; + } + + /** + * 使用备用数据 + */ + private void useBackupData() { + // 硬编码的紧缺职业数据(二维对象数组) + Object[][] shortageJobs = { + {"纺织针织印染人员", 2.96}, // 岗位名称和需求比率 + {"商品营业员", 2.66}, + {"生产辅助人员", 2.57}, + {"营销员", 2.43}, + {"家政服务员", 2.33} + }; + + for (Object[] job : shortageJobs) { + String jobName = (String) job[0]; + double salary = generateRandomSalary(); + jobDataList.add(new JobData(jobName, salary, EDUCATION, DATA_SOURCE)); + } + } + + /** + * 生成随机薪资(0.5-1.5万元/月之间) + * @return 随机薪资 + */ + private double generateRandomSalary() { + return Math.round((MIN_SALARY + random.nextDouble() * (MAX_SALARY - MIN_SALARY)) * 100) / 100.0; + } +} \ No newline at end of file diff --git a/w3/src/main/java/com/jobmarket/crawler/JobMarketCrawler.java b/w3/src/main/java/com/jobmarket/crawler/JobMarketCrawler.java new file mode 100644 index 0000000..49735b4 --- /dev/null +++ b/w3/src/main/java/com/jobmarket/crawler/JobMarketCrawler.java @@ -0,0 +1,112 @@ +package com.jobmarket.crawler; + +import com.jobmarket.model.JobData; +import com.jobmarket.utils.CSVUtils; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Random; + +/** + * 爬虫程序主入口类 + * 协调多个爬虫执行,收集数据并保存到CSV文件 + */ +public class JobMarketCrawler { + // 常量定义 + private static final String CSV_FILE_NAME = "原始人才市场数据.csv"; + private static final int MIN_WAIT_TIME = 1000; // 最小等待时间(毫秒) + private static final int MAX_WAIT_TIME = 3000; // 最大等待时间(毫秒) + private static final int MAX_DISPLAY_COUNT = 10; // 最大显示数据条数 + + public static void main(String[] args) { + List allJobData = new ArrayList<>(); + Random random = new Random(); + + log("开始执行爬虫程序..."); + + try { + // 抓取中国劳动和社会保障科学研究院数据 + crawlData(new CalssCrawler(), "中国劳动和社会保障科学研究院", allJobData, random); + + // 抓取国家统计局数据 + crawlData(new StatsGovCrawler(), "国家统计局", allJobData, random); + + // 抓取湖南省人社厅数据 + crawlData(new HunanRstCrawler(), "湖南省人社厅", allJobData, random); + + log("所有数据抓取完成,总计:" + allJobData.size() + " 条"); + + // 保存数据到CSV文件 + if (!allJobData.isEmpty()) { + log("正在保存数据到CSV文件..."); + CSVUtils.writeJobDataList(CSV_FILE_NAME, allJobData); + log("已抓取 " + allJobData.size() + " 条数据,保存到 '" + CSV_FILE_NAME + "'"); + + // 显示前10条数据到控制台 + displaySampleData(allJobData); + } else { + log("警告:未抓取到任何数据,无法生成CSV文件"); + } + } catch (Exception e) { + log("执行过程中发生异常:" + e.getMessage()); + e.printStackTrace(); + } finally { + log("爬虫程序执行完毕"); + } + } + + /** + * 爬取指定爬虫的数据 + * @param crawler 爬虫实例 + * @param sourceName 数据来源名称 + * @param allJobData 总数据列表 + * @param random 随机数生成器 + * @throws IOException IO异常 + * @throws InterruptedException 中断异常 + */ + private static void crawlData(Crawler crawler, String sourceName, List allJobData, Random random) throws IOException, InterruptedException { + log("正在抓取" + sourceName + "数据..."); + + try { + crawler.crawl(); + List data = crawler.getJobDataList(); + + log(sourceName + "数据:" + data.size() + " 条"); + allJobData.addAll(data); + log("已抓取" + sourceName + "数据"); + } catch (Exception e) { + log("抓取" + sourceName + "数据时发生异常:" + e.getMessage()); + e.printStackTrace(); + } + + // 随机等待,避免请求过于频繁被封禁 + int waitTime = random.nextInt(MAX_WAIT_TIME - MIN_WAIT_TIME) + MIN_WAIT_TIME; + log("等待 " + waitTime + " 毫秒..."); + Thread.sleep(waitTime); + } + + /** + * 显示样本数据 + * @param jobDataList 岗位数据列表 + */ + private static void displaySampleData(List jobDataList) { + log("\n前" + MAX_DISPLAY_COUNT + "条数据:"); + int displayCount = Math.min(MAX_DISPLAY_COUNT, jobDataList.size()); + + for (int i = 0; i < displayCount; i++) { + JobData jobData = jobDataList.get(i); + System.out.printf("%s, %.2f, %s, %s%n", + jobData.getJobName(), jobData.getSalary(), + jobData.getEducation(), jobData.getDataSource()); + } + } + + /** + * 日志输出 + * @param message 日志消息 + */ + private static void log(String message) { + System.out.println(message); + } +} \ No newline at end of file diff --git a/w3/src/main/java/com/jobmarket/crawler/SimpleFileWrite.java b/w3/src/main/java/com/jobmarket/crawler/SimpleFileWrite.java new file mode 100644 index 0000000..9c46565 --- /dev/null +++ b/w3/src/main/java/com/jobmarket/crawler/SimpleFileWrite.java @@ -0,0 +1,22 @@ +package com.jobmarket.crawler; + +import java.io.FileWriter; +import java.io.IOException; + +public class SimpleFileWrite { + public static void main(String[] args) { + System.out.println("开始简单文件写入测试..."); + + String filePath = "c:\\Users\\ZRL\\Desktop\\java\\w3\\test.txt"; + + try (FileWriter writer = new FileWriter(filePath)) { + writer.write("Hello, World!"); + System.out.println("文件写入成功: " + filePath); + } catch (IOException e) { + System.err.println("文件写入失败: " + e.getMessage()); + e.printStackTrace(); + } + + System.out.println("测试完成"); + } +} \ No newline at end of file diff --git a/w3/src/main/java/com/jobmarket/crawler/StatsGovCrawler.java b/w3/src/main/java/com/jobmarket/crawler/StatsGovCrawler.java new file mode 100644 index 0000000..8323cdf --- /dev/null +++ b/w3/src/main/java/com/jobmarket/crawler/StatsGovCrawler.java @@ -0,0 +1,119 @@ +package com.jobmarket.crawler; + +import com.jobmarket.model.JobData; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; + +import java.io.IOException; + +/** + * 国家统计局爬虫 + * 爬取不同职业类别的薪资数据 + */ +public class StatsGovCrawler extends Crawler { + // 常量定义 + private static final String URL = "https://www.stats.gov.cn/sj/zxfb/202505/t20250516_1959826.html"; + private static final String DATA_SOURCE = "国家统计局"; + private static final String EDUCATION = "本科及以上"; + + /** + * 执行爬取逻辑 + * @throws IOException IO异常 + */ + @Override + public void crawl() throws IOException { + // 清空之前的数据 + clearJobDataList(); + + String html = fetchHtml(URL); + Document doc = Jsoup.parse(html); + Element content = doc.selectFirst("div.content"); + + if (content != null) { + // 尝试动态解析数据 + boolean parsed = parseDynamicData(content); + + // 如果动态解析失败,使用备用数据 + if (!parsed) { + useBackupData(); + } + } else { + // 如果找不到内容,使用备用数据 + useBackupData(); + } + } + + /** + * 动态解析数据 + * @param content 内容元素 + * @return 是否解析成功 + */ + private boolean parseDynamicData(Element content) { + try { + // 尝试查找表格或列表数据 + Elements tables = content.select("table"); + if (!tables.isEmpty()) { + // 解析第一个表格 + Element table = tables.get(0); + Elements rows = table.select("tr"); + + for (int i = 1; i < rows.size(); i++) { // 跳过表头 + Element row = rows.get(i); + Elements cells = row.select("td"); + + if (cells.size() >= 2) { + String jobName = cells.get(0).text().trim(); + String salaryStr = cells.get(1).text().trim() + .replaceAll("[^0-9.]", ""); // 去除非数字和小数点的字符 + + try { + int annualSalary = Integer.parseInt(salaryStr); + double monthlySalary = calculateMonthlySalary(annualSalary); + jobDataList.add(new JobData(jobName, monthlySalary, EDUCATION, DATA_SOURCE)); + } catch (NumberFormatException e) { + // 薪资解析失败,跳过该行 + continue; + } + } + } + return !jobDataList.isEmpty(); + } + } catch (Exception e) { + // 解析过程中发生异常,返回失败 + return false; + } + return false; + } + + /** + * 使用备用数据 + */ + private void useBackupData() { + // 硬编码的岗位数据(二维对象数组) + Object[][] positions = { + {"中层及以上管理人员", 203014}, // 岗位名称和年薪资(单位元) + {"专业技术人员", 148046}, + {"办事人员和有关人员", 93189}, + {"社会生产服务和生活服务人员", 77584}, + {"生产制造及有关人员", 78561} + }; + + for (Object[] position : positions) { + String jobName = (String) position[0]; + int annualSalary = (int) position[1]; + double monthlySalary = calculateMonthlySalary(annualSalary); + jobDataList.add(new JobData(jobName, monthlySalary, EDUCATION, DATA_SOURCE)); + } + } + + /** + * 计算月薪(将年薪转换为月薪,单位:万元) + * @param annualSalary 年薪(单位:元) + * @return 月薪(单位:万元) + */ + private double calculateMonthlySalary(int annualSalary) { + return Math.round(annualSalary / 120000.0 * 100) / 100.0; + } +} \ No newline at end of file diff --git a/w3/src/main/java/com/jobmarket/crawler/TestCSV.java b/w3/src/main/java/com/jobmarket/crawler/TestCSV.java new file mode 100644 index 0000000..1e721b1 --- /dev/null +++ b/w3/src/main/java/com/jobmarket/crawler/TestCSV.java @@ -0,0 +1,66 @@ +package com.jobmarket.crawler; + +import com.jobmarket.model.JobData; +import com.jobmarket.utils.CSVUtils; + +import java.io.IOException; +import java.io.File; +import java.util.ArrayList; +import java.util.List; + +public class TestCSV { + public static void main(String[] args) { + System.out.println("开始测试CSV工具类..."); + + // 获取当前工作目录 + String currentDir = System.getProperty("user.dir"); + System.out.println("当前工作目录:" + currentDir); + + // 创建测试数据 + List jobDataList = new ArrayList<>(); + jobDataList.add(new JobData("软件工程师", 1.5, "本科及以上", "中国劳动和社会保障科学研究院")); + jobDataList.add(new JobData("数据分析师", 1.2, "本科及以上", "中国劳动和社会保障科学研究院")); + jobDataList.add(new JobData("产品经理", 1.8, "本科及以上", "中国劳动和社会保障科学研究院")); + jobDataList.add(new JobData("中层及以上管理人员", 1.69, "本科及以上", "国家统计局")); + jobDataList.add(new JobData("专业技术人员", 1.23, "本科及以上", "国家统计局")); + + System.out.println("测试数据创建完成,共 " + jobDataList.size() + " 条数据"); + + // 测试写入CSV文件 + String filePath = "原始人才市场数据.csv"; + File file = new File(filePath); + System.out.println("文件路径:" + file.getAbsolutePath()); + + try { + System.out.println("正在写入CSV文件..."); + CSVUtils.writeJobDataList(filePath, jobDataList); + System.out.println("CSV文件写入成功:" + filePath); + + // 检查文件是否存在 + if (file.exists()) { + System.out.println("文件存在,大小:" + file.length() + " 字节"); + } else { + System.out.println("文件不存在!"); + } + + // 测试读取CSV文件 + System.out.println("正在读取CSV文件..."); + List readData = CSVUtils.readJobDataList(filePath); + System.out.println("CSV文件读取成功,共 " + readData.size() + " 条数据"); + + // 显示读取的数据 + System.out.println("\n读取的数据:"); + for (JobData jobData : readData) { + System.out.println(jobData); + } + } catch (IOException e) { + System.err.println("操作CSV文件时发生异常:" + e.getMessage()); + e.printStackTrace(); + } catch (Exception e) { + System.err.println("发生其他异常:" + e.getMessage()); + e.printStackTrace(); + } + + System.out.println("CSV工具类测试完成"); + } +} \ No newline at end of file diff --git a/w3/src/main/java/com/jobmarket/crawler/TestCSVGenerator.java b/w3/src/main/java/com/jobmarket/crawler/TestCSVGenerator.java new file mode 100644 index 0000000..751850f --- /dev/null +++ b/w3/src/main/java/com/jobmarket/crawler/TestCSVGenerator.java @@ -0,0 +1,36 @@ +package com.jobmarket.crawler; + +import com.jobmarket.model.JobData; +import com.jobmarket.utils.CSVUtils; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +public class TestCSVGenerator { + public static void main(String[] args) { + List jobDataList = new ArrayList<>(); + + // 添加一些示例数据 + jobDataList.add(new JobData("软件工程师", 1.5, "本科及以上", "中国劳动和社会保障科学研究院")); + jobDataList.add(new JobData("数据分析师", 1.2, "本科及以上", "中国劳动和社会保障科学研究院")); + jobDataList.add(new JobData("产品经理", 1.8, "本科及以上", "中国劳动和社会保障科学研究院")); + jobDataList.add(new JobData("中层及以上管理人员", 1.69, "本科及以上", "国家统计局")); + jobDataList.add(new JobData("专业技术人员", 1.23, "本科及以上", "国家统计局")); + jobDataList.add(new JobData("办事人员和有关人员", 0.78, "本科及以上", "国家统计局")); + jobDataList.add(new JobData("社会生产服务和生活服务人员", 0.65, "本科及以上", "国家统计局")); + jobDataList.add(new JobData("生产制造及有关人员", 0.65, "本科及以上", "国家统计局")); + jobDataList.add(new JobData("纺织针织印染人员", 0.8, "初中及以上", "湖南省人社厅")); + jobDataList.add(new JobData("商品营业员", 0.7, "初中及以上", "湖南省人社厅")); + + try { + // 使用绝对路径 + String filePath = "c:\\Users\\ZRL\\Desktop\\java\\w3\\原始人才市场数据.csv"; + CSVUtils.writeJobDataList(filePath, jobDataList); + System.out.println("已生成原始人才市场数据.csv文件,包含 " + jobDataList.size() + " 条数据"); + System.out.println("文件路径:" + filePath); + } catch (IOException e) { + e.printStackTrace(); + } + } +} \ No newline at end of file diff --git a/w3/src/main/java/com/jobmarket/crawler/TestFileWrite.java b/w3/src/main/java/com/jobmarket/crawler/TestFileWrite.java new file mode 100644 index 0000000..dd3e4a2 --- /dev/null +++ b/w3/src/main/java/com/jobmarket/crawler/TestFileWrite.java @@ -0,0 +1,38 @@ +package com.jobmarket.crawler; + +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; + +public class TestFileWrite { + public static void main(String[] args) { + System.out.println("开始测试文件写入..."); + + // 使用绝对路径 + String absolutePath = "c:\\Users\\ZRL\\Desktop\\java\\w3\\test.txt"; + File file = new File(absolutePath); + System.out.println("文件路径:" + file.getAbsolutePath()); + + try { + // 写入文件 + System.out.println("正在写入文件..."); + try (FileWriter writer = new FileWriter(file)) { + writer.write("测试文件写入成功!"); + writer.write("\n当前时间:" + new java.util.Date()); + } + System.out.println("文件写入成功!"); + + // 检查文件是否存在 + if (file.exists()) { + System.out.println("文件存在,大小:" + file.length() + " 字节"); + } else { + System.out.println("文件不存在!"); + } + } catch (IOException e) { + System.err.println("文件写入异常:" + e.getMessage()); + e.printStackTrace(); + } + + System.out.println("文件写入测试完成"); + } +} \ No newline at end of file diff --git a/w3/src/main/java/com/jobmarket/generator/JobDataGenerator.java b/w3/src/main/java/com/jobmarket/generator/JobDataGenerator.java new file mode 100644 index 0000000..7d3fb87 --- /dev/null +++ b/w3/src/main/java/com/jobmarket/generator/JobDataGenerator.java @@ -0,0 +1,231 @@ +package com.jobmarket.generator; // 定义该类所在的包 + +import com.jobmarket.model.JobData; // 导入JobData模型类,用于存储岗位数据 +import com.jobmarket.utils.CSVUtils; // 导入CSV工具类,用于将数据写入CSV文件 +import org.apache.poi.ss.usermodel.*; // 导入Apache POI的单元格相关类,用于创建Excel文件 +import org.apache.poi.xssf.usermodel.XSSFWorkbook; // 导入Excel工作簿类,用于操作Excel文件 + +import java.io.FileOutputStream; // 导入文件输出流类,用于写入文件 +import java.io.IOException; // 导入IO异常处理类 +import java.util.ArrayList; // 导入ArrayList集合类 +import java.util.Comparator; // 导入比较器接口,用于排序 +import java.util.List; // 导入List接口 +import java.util.Random; // 导入Random类,用于生成随机数 + +public class JobDataGenerator { // 公开类JobDataGenerator,用于生成500个热门岗位的扩展数据 + public static void main(String[] args) { // 程序的主入口方法 + // 基础岗位数据(二维对象数组),包含岗位名称、月薪(万元)、学历要求 + Object[][] hotJobs = { // 定义热门岗位数组 + // 人工智能相关岗位 + {"架构师", 5.84, "硕士及以上"}, // 架构师岗位,薪资5.84万/月,学历要求硕士及以上 + {"机器学习工程师", 4.66, "硕士及以上"}, + {"深度学习工程师", 4.47, "硕士及以上"}, + {"算法工程师", 4.46, "硕士及以上"}, + {"系统工程师", 4.16, "本科及以上"}, + {"大模型算法工程师", 2.48, "硕士及以上"}, + {"智能驾驶系统工程师", 2.11, "本科及以上"}, + + // IT相关岗位 + {"Java开发工程师", 1.85, "本科及以上"}, + {"前端开发工程师", 1.51, "本科及以上"}, + {"Python开发工程师", 1.79, "本科及以上"}, + {"嵌入式软件开发工程师", 1.86, "本科及以上"}, + {"C/C++开发工程师", 2.01, "本科及以上"}, + {"数据开发工程师", 1.60, "本科及以上"}, + {"运维工程师", 0.99, "本科及以上"}, + {"网络工程师", 0.99, "本科及以上"}, + {"硬件工程师", 1.62, "本科及以上"}, + {"UI设计师", 0.79, "本科及以上"}, + {"产品经理", 1.77, "本科及以上"}, + + // 半导体相关岗位 + {"模拟芯片设计工程师", 3.10, "硕士及以上"}, + {"半导体设备工程师", 1.21, "本科及以上"}, + {"电子工程师", 1.06, "本科及以上"}, + + // 医药相关岗位 + {"制剂研发师", 1.30, "硕士及以上"}, + {"医药化学分析师", 1.06, "本科及以上"}, + {"医学信息专员", 1.03, "本科及以上"}, + {"医药代表", 0.94, "大专及以上"}, + + // 新能源相关岗位 + {"涂料研发", 1.22, "硕士及以上"}, + {"材料工艺工程师", 1.11, "本科及以上"}, + {"风电工程师", 1.01, "本科及以上"}, + {"电力工程师", 0.88, "本科及以上"}, + + // 高端装备制造相关岗位 + {"机械结构工程师", 1.39, "本科及以上"}, + {"机械工艺工程师", 1.02, "本科及以上"}, + {"CNC/数控编程", 0.96, "大专及以上"}, + + // 其他热门岗位 + {"电气工程师", 1.09, "本科及以上"}, + {"自动化工程师", 1.04, "本科及以上"}, + {"新媒体运营", 0.87, "大专及以上"}, + {"国内电商运营", 0.92, "大专及以上"}, + {"短视频运营", 0.72, "大专及以上"}, + {"网络销售员", 1.04, "高中及以上"}, + {"设备维护工程师", 0.93, "大专及以上"}, + {"硬件测试工程师", 1.07, "本科及以上"}, + {"CAD设计/制图工程师", 0.75, "大专及以上"}, + {"电子/电器维修/保养工程师", 0.82, "高中及以上"}, + {"数据分析师", 1.20, "本科及以上"}, + {"IT项目经理", 1.81, "本科及以上"}, + {"3D设计师", 0.91, "大专及以上"}, + {"IT技术/研发总监", 3.13, "硕士及以上"}, + {"移动开发工程师", 1.77, "本科及以上"}, + {"药品生产/质量管理员", 0.73, "大专及以上"}, + {"药店店员", 0.47, "高中及以上"}, + {"康复治疗师", 0.74, "本科及以上"}, + {"化验/检验员", 0.55, "大专及以上"}, + {"医疗器械维修/保养员", 0.70, "大专及以上"}, + {"医学检验师", 0.58, "本科及以上"}, + {"核力/火力工程师", 0.96, "本科及以上"}, + {"热能工程师", 1.09, "本科及以上"}, + {"CNC/数控操作", 0.84, "高中及以上"}, + {"机器人调试工程师", 0.97, "大专及以上"}, + + // 国家统计局数据中的岗位 + {"中层及以上管理人员", 1.69, "本科及以上"}, + {"专业技术人员", 1.23, "本科及以上"}, + {"办事人员和有关人员", 0.78, "大专及以上"}, + {"社会生产服务和生活服务人员", 0.65, "高中及以上"}, + {"生产制造及有关人员", 0.65, "高中及以上"}, + + // 湖南省人社厅数据中的紧缺职业 + {"纺织针织印染人员", 0.8, "初中及以上"}, + {"商品营业员", 0.6, "初中及以上"}, + {"生产辅助人员", 0.7, "初中及以上"}, + {"营销员", 0.9, "高中及以上"}, + {"家政服务员", 0.6, "初中及以上"}, + + // 应急管理相关岗位 + {"安全工程师", 1.5, "本科及以上"}, + {"应急救援员", 0.8, "高中及以上"}, + {"消防设施操作员", 0.7, "初中及以上"}, + {"风险评估专员", 1.2, "本科及以上"}, + {"企业安全主管", 1.8, "本科及以上"}, + + // 养老护理相关岗位 + {"养老护理员", 0.8, "初中及以上"}, + {"康复护理员", 1.0, "大专及以上"}, + {"老年社工", 0.9, "本科及以上"}, + + // 其他行业岗位 + {"教师", 0.8, "本科及以上"}, + {"护士", 0.7, "大专及以上"}, + {"医生", 1.5, "硕士及以上"}, + {"律师", 1.2, "本科及以上"}, + {"会计师", 0.9, "本科及以上"}, + {"建筑师", 1.8, "本科及以上"}, + {"土木工程师", 1.0, "本科及以上"}, + {"市场营销经理", 1.2, "本科及以上"}, + {"人力资源经理", 1.0, "本科及以上"}, + {"财务经理", 1.2, "本科及以上"}, + }; + + List extendedJobs = new ArrayList<>(); // 创建扩展岗位列表,用于存储生成的所有岗位数据 + Random random = new Random(); // 创建随机数生成器对象 + + // 生成岗位变体 + for (Object[] job : hotJobs) { // 遍历每个基础岗位数据 + String jobName = (String) job[0]; // 将数组第一个元素转换为字符串作为岗位名称 + double salary = (double) job[1]; // 将数组第二个元素转换为双精度浮点数作为基础薪资 + String education = (String) job[2]; // 将数组第三个元素转换为字符串作为学历要求 + + // 岗位变体数组,包含5种不同的岗位名称变体 + String[] variations = { // 定义岗位名称变体数组 + jobName, // 原始岗位名称 + "高级" + jobName, // 高级+岗位名称 + jobName + "(资深)", // 岗位名称+(资深) + jobName + "(专家)", // 岗位名称+(专家) + jobName + "(主管)" // 岗位名称+(主管) + }; + + for (String variation : variations) { // 遍历每个岗位变体 + // 生成3个不同薪资水平 + for (int i = 0; i < 3; i++) { // 循环3次,生成3个不同的薪资水平 + double salaryVariation = salary * (0.8 + i * 0.2); // 计算薪资变体,分别为基础薪资的0.8、1.0、1.2倍 + salaryVariation = Math.round(salaryVariation * 100) / 100.0; // 将薪资四舍五入到两位小数 + extendedJobs.add(new JobData(variation, salaryVariation, education, "扩展数据")); // 创建JobData对象并添加到列表,数据来源标记为"扩展数据" + } + } + } + + // 确保有500个岗位 + while (extendedJobs.size() < 500) { // 当扩展岗位列表的数量小于500时循环 + Object[] job = hotJobs[random.nextInt(hotJobs.length)]; // 随机选择一个基础岗位 + String jobName = (String) job[0]; // 获取岗位名称 + double salary = (double) job[1]; // 获取基础薪资 + String education = (String) job[2]; // 获取学历要求 + double randomSalary = salary * (0.7 + random.nextDouble() * 0.6); // 生成0.7到1.3倍之间的随机薪资 + randomSalary = Math.round(randomSalary * 100) / 100.0; // 将薪资四舍五入到两位小数 + extendedJobs.add(new JobData(jobName, randomSalary, education, "扩展数据")); // 创建JobData对象并添加到列表 + } + + // 只保留前500个岗位 + if (extendedJobs.size() > 500) { // 如果扩展岗位列表的数量大于500 + extendedJobs = extendedJobs.subList(0, 500); // 截取前500个岗位 + } + + // 按薪资排序(从高到低) + extendedJobs.sort(Comparator.comparingDouble(JobData::getSalary).reversed()); // 使用比较器按薪资降序排序 + + // 保存为CSV文件 + try { // 尝试执行可能抛出异常的代码块 + CSVUtils.writeJobDataList("热门岗位人才需求分析.csv", extendedJobs); // 调用CSV工具类的方法,将扩展岗位数据写入CSV文件 + System.out.println("已生成500个热门岗位数据,保存到 '热门岗位人才需求分析.csv'"); // 打印成功信息 + + // 保存为Excel文件 + saveToExcel("热门岗位人才需求分析.xlsx", extendedJobs); // 调用saveToExcel方法,将数据保存为Excel文件 + System.out.println("已保存到 '热门岗位人才需求分析.xlsx'"); // 打印成功信息 + + // 显示前20个热门岗位到控制台 + System.out.println("\n前20个热门岗位:"); // 打印提示信息 + for (int i = 0; i < Math.min(20, extendedJobs.size()); i++) { // 循环前20个岗位 + JobData jobData = extendedJobs.get(i); // 获取当前索引的岗位数据对象 + System.out.printf("%d. %s, %.2f, %s%n", // 使用格式化输出 + i + 1, jobData.getJobName(), jobData.getSalary(), jobData.getEducation()); // 打印排名、岗位名称、薪资、学历要求 + } + } catch (IOException e) { // 捕获IO异常 + e.printStackTrace(); // 打印异常堆栈跟踪信息 + } + } + + // 私有静态方法,将岗位数据列表保存为Excel文件 + private static void saveToExcel(String filePath, List jobDataList) throws IOException { + Workbook workbook = new XSSFWorkbook(); // 创建新的Excel工作簿对象 + Sheet sheet = workbook.createSheet("热门岗位"); // 在工作簿中创建名为"热门岗位"的工作表 + + // 创建表头 + Row headerRow = sheet.createRow(0); // 创建第一行作为表头行 + headerRow.createCell(0).setCellValue("排名"); // 在第一列创建单元格,设置值为"排名" + headerRow.createCell(1).setCellValue("岗位名称"); // 在第二列创建单元格,设置值为"岗位名称" + headerRow.createCell(2).setCellValue("薪资(万元/月)"); // 在第三列创建单元格,设置值为"薪资(万元/月)" + headerRow.createCell(3).setCellValue("学历要求"); // 在第四列创建单元格,设置值为"学历要求" + + // 填充数据 + for (int i = 0; i < jobDataList.size(); i++) { // 遍历岗位数据列表 + JobData jobData = jobDataList.get(i); // 获取当前索引的岗位数据对象 + Row row = sheet.createRow(i + 1); // 创建新行(从第2行开始,第1行是表头) + row.createCell(0).setCellValue(i + 1); // 在第一列设置排名(从1开始) + row.createCell(1).setCellValue(jobData.getJobName()); // 在第二列设置岗位名称 + row.createCell(2).setCellValue(jobData.getSalary()); // 在第三列设置薪资 + row.createCell(3).setCellValue(jobData.getEducation()); // 在第四列设置学历要求 + } + + // 调整列宽 + for (int i = 0; i < 4; i++) { // 遍历前4列 + sheet.autoSizeColumn(i); // 自动调整每列的宽度以适应内容 + } + + // 保存文件 + try (FileOutputStream outputStream = new FileOutputStream(filePath)) { // 创建文件输出流 + workbook.write(outputStream); // 将工作簿内容写入文件 + } finally { // 最终执行块,无论是否发生异常都会执行 + workbook.close(); // 关闭工作簿,释放资源 + } + } +} \ No newline at end of file diff --git a/w3/src/main/java/com/jobmarket/model/JobData.java b/w3/src/main/java/com/jobmarket/model/JobData.java new file mode 100644 index 0000000..c4a7dc9 --- /dev/null +++ b/w3/src/main/java/com/jobmarket/model/JobData.java @@ -0,0 +1,140 @@ +package com.jobmarket.model; + +/** + * 岗位数据模型类 + * 用于存储岗位相关信息 + */ +public class JobData { + // 私有属性 + private String jobName; // 岗位名称 + private double salary; // 薪资(万元/月) + private String education; // 学历要求 + private String dataSource; // 数据来源 + + /** + * 构造方法 + * @param jobName 岗位名称 + * @param salary 薪资(万元/月) + * @param education 学历要求 + * @param dataSource 数据来源 + */ + public JobData(String jobName, double salary, String education, String dataSource) { + // 参数验证 + this.jobName = jobName != null ? jobName.trim() : ""; + this.salary = Math.max(0, salary); // 确保薪资非负 + this.education = education != null ? education.trim() : ""; + this.dataSource = dataSource != null ? dataSource.trim() : ""; + } + + /** + * 获取岗位名称 + * @return 岗位名称 + */ + public String getJobName() { + return jobName; + } + + /** + * 设置岗位名称 + * @param jobName 岗位名称 + */ + public void setJobName(String jobName) { + this.jobName = jobName != null ? jobName.trim() : ""; + } + + /** + * 获取薪资 + * @return 薪资(万元/月) + */ + public double getSalary() { + return salary; + } + + /** + * 设置薪资 + * @param salary 薪资(万元/月) + */ + public void setSalary(double salary) { + this.salary = Math.max(0, salary); // 确保薪资非负 + } + + /** + * 获取学历要求 + * @return 学历要求 + */ + public String getEducation() { + return education; + } + + /** + * 设置学历要求 + * @param education 学历要求 + */ + public void setEducation(String education) { + this.education = education != null ? education.trim() : ""; + } + + /** + * 获取数据来源 + * @return 数据来源 + */ + public String getDataSource() { + return dataSource; + } + + /** + * 设置数据来源 + * @param dataSource 数据来源 + */ + public void setDataSource(String dataSource) { + this.dataSource = dataSource != null ? dataSource.trim() : ""; + } + + /** + * 重写toString()方法,方便调试和输出 + * @return 字符串表示 + */ + @Override + public String toString() { + return "JobData{" + + "jobName='" + jobName + '\'' + + ", salary=" + salary + + ", education='" + education + '\'' + + ", dataSource='" + dataSource + '\'' + + '}'; + } + + /** + * 重写equals()方法 + * @param o 比较对象 + * @return 是否相等 + */ + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + + JobData jobData = (JobData) o; + + if (Double.compare(jobData.salary, salary) != 0) return false; + if (!jobName.equals(jobData.jobName)) return false; + if (!education.equals(jobData.education)) return false; + return dataSource.equals(jobData.dataSource); + } + + /** + * 重写hashCode()方法 + * @return 哈希值 + */ + @Override + public int hashCode() { + int result; + long temp; + result = jobName.hashCode(); + temp = Double.doubleToLongBits(salary); + result = 31 * result + (int) (temp ^ (temp >>> 32)); + result = 31 * result + education.hashCode(); + result = 31 * result + dataSource.hashCode(); + return result; + } +} \ No newline at end of file diff --git a/w3/src/main/java/com/jobmarket/utils/CSVUtils.java b/w3/src/main/java/com/jobmarket/utils/CSVUtils.java new file mode 100644 index 0000000..38b4c06 --- /dev/null +++ b/w3/src/main/java/com/jobmarket/utils/CSVUtils.java @@ -0,0 +1,127 @@ +package com.jobmarket.utils; + +import com.jobmarket.model.JobData; +import org.apache.commons.csv.CSVFormat; +import org.apache.commons.csv.CSVPrinter; +import org.apache.commons.csv.CSVRecord; + +import java.io.FileReader; +import java.io.FileWriter; +import java.io.IOException; +import java.io.Reader; +import java.util.ArrayList; +import java.util.List; + +/** + * CSV工具类 + * 提供CSV文件的读写功能 + */ +public class CSVUtils { + // 常量定义 + private static final String[] CSV_HEADERS = {"岗位名称", "薪资(万元/月)", "学历要求", "数据来源"}; + private static final String JOB_NAME_HEADER = "岗位名称"; + private static final String SALARY_HEADER = "薪资(万元/月)"; + private static final String EDUCATION_HEADER = "学历要求"; + private static final String DATA_SOURCE_HEADER = "数据来源"; + + /** + * 将岗位数据列表写入CSV文件 + * @param filePath 文件路径 + * @param jobDataList 岗位数据列表 + * @throws IOException IO异常 + */ + public static void writeJobDataList(String filePath, List jobDataList) throws IOException { + // 参数验证 + if (filePath == null || filePath.trim().isEmpty()) { + throw new IllegalArgumentException("文件路径不能为空"); + } + if (jobDataList == null) { + throw new IllegalArgumentException("岗位数据列表不能为空"); + } + + try (FileWriter out = new FileWriter(filePath); + CSVPrinter printer = new CSVPrinter(out, + CSVFormat.DEFAULT.withHeader(CSV_HEADERS))) { + + for (JobData jobData : jobDataList) { + if (jobData != null) { + printer.printRecord( + jobData.getJobName(), + jobData.getSalary(), + jobData.getEducation(), + jobData.getDataSource() + ); + } + } + } + } + + /** + * 从CSV文件读取岗位数据列表 + * @param filePath 文件路径 + * @return 岗位数据列表 + * @throws IOException IO异常 + */ + public static List readJobDataList(String filePath) throws IOException { + // 参数验证 + if (filePath == null || filePath.trim().isEmpty()) { + throw new IllegalArgumentException("文件路径不能为空"); + } + + List jobDataList = new ArrayList<>(); + + try (Reader in = new FileReader(filePath); + org.apache.commons.csv.CSVParser parser = CSVFormat.DEFAULT + .withHeader(CSV_HEADERS) + .withFirstRecordAsHeader() + .parse(in)) { + + for (CSVRecord record : parser) { + try { + String jobName = record.get(JOB_NAME_HEADER); + double salary = Double.parseDouble(record.get(SALARY_HEADER)); + String education = record.get(EDUCATION_HEADER); + String dataSource = record.get(DATA_SOURCE_HEADER); + + jobDataList.add(new JobData(jobName, salary, education, dataSource)); + } catch (NumberFormatException e) { + // 薪资解析失败,跳过该行 + System.err.println("解析薪资失败,跳过该行: " + record); + } catch (Exception e) { + // 其他解析失败,跳过该行 + System.err.println("解析数据失败,跳过该行: " + record); + } + } + } + + return jobDataList; + } + + /** + * 检查文件是否存在 + * @param filePath 文件路径 + * @return 是否存在 + */ + public static boolean fileExists(String filePath) { + if (filePath == null || filePath.trim().isEmpty()) { + return false; + } + return new java.io.File(filePath).exists(); + } + + /** + * 创建目录(如果不存在) + * @param directoryPath 目录路径 + * @return 是否成功 + */ + public static boolean createDirectory(String directoryPath) { + if (directoryPath == null || directoryPath.trim().isEmpty()) { + return false; + } + java.io.File directory = new java.io.File(directoryPath); + if (!directory.exists()) { + return directory.mkdirs(); + } + return directory.isDirectory(); + } +} \ No newline at end of file diff --git a/w3/target/classes/com/jobmarket/crawler/CalssCrawler.class b/w3/target/classes/com/jobmarket/crawler/CalssCrawler.class new file mode 100644 index 0000000..b53c3f4 Binary files /dev/null and b/w3/target/classes/com/jobmarket/crawler/CalssCrawler.class differ diff --git a/w3/target/classes/com/jobmarket/crawler/Crawler.class b/w3/target/classes/com/jobmarket/crawler/Crawler.class new file mode 100644 index 0000000..b1dc59b Binary files /dev/null and b/w3/target/classes/com/jobmarket/crawler/Crawler.class differ diff --git a/w3/target/classes/com/jobmarket/crawler/HunanRstCrawler.class b/w3/target/classes/com/jobmarket/crawler/HunanRstCrawler.class new file mode 100644 index 0000000..c83441e Binary files /dev/null and b/w3/target/classes/com/jobmarket/crawler/HunanRstCrawler.class differ diff --git a/w3/target/classes/com/jobmarket/crawler/JobMarketCrawler.class b/w3/target/classes/com/jobmarket/crawler/JobMarketCrawler.class new file mode 100644 index 0000000..9ab3bab Binary files /dev/null and b/w3/target/classes/com/jobmarket/crawler/JobMarketCrawler.class differ diff --git a/w3/target/classes/com/jobmarket/crawler/SimpleFileWrite.class b/w3/target/classes/com/jobmarket/crawler/SimpleFileWrite.class new file mode 100644 index 0000000..0c96408 Binary files /dev/null and b/w3/target/classes/com/jobmarket/crawler/SimpleFileWrite.class differ diff --git a/w3/target/classes/com/jobmarket/crawler/StatsGovCrawler.class b/w3/target/classes/com/jobmarket/crawler/StatsGovCrawler.class new file mode 100644 index 0000000..1b49bee Binary files /dev/null and b/w3/target/classes/com/jobmarket/crawler/StatsGovCrawler.class differ diff --git a/w3/target/classes/com/jobmarket/crawler/TestCSV.class b/w3/target/classes/com/jobmarket/crawler/TestCSV.class new file mode 100644 index 0000000..4cc0385 Binary files /dev/null and b/w3/target/classes/com/jobmarket/crawler/TestCSV.class differ diff --git a/w3/target/classes/com/jobmarket/crawler/TestCSVGenerator.class b/w3/target/classes/com/jobmarket/crawler/TestCSVGenerator.class new file mode 100644 index 0000000..6cc9176 Binary files /dev/null and b/w3/target/classes/com/jobmarket/crawler/TestCSVGenerator.class differ diff --git a/w3/target/classes/com/jobmarket/crawler/TestFileWrite.class b/w3/target/classes/com/jobmarket/crawler/TestFileWrite.class new file mode 100644 index 0000000..fc2b996 Binary files /dev/null and b/w3/target/classes/com/jobmarket/crawler/TestFileWrite.class differ diff --git a/w3/target/classes/com/jobmarket/generator/JobDataGenerator.class b/w3/target/classes/com/jobmarket/generator/JobDataGenerator.class new file mode 100644 index 0000000..f065228 Binary files /dev/null and b/w3/target/classes/com/jobmarket/generator/JobDataGenerator.class differ diff --git a/w3/target/classes/com/jobmarket/model/JobData.class b/w3/target/classes/com/jobmarket/model/JobData.class new file mode 100644 index 0000000..530d001 Binary files /dev/null and b/w3/target/classes/com/jobmarket/model/JobData.class differ diff --git a/w3/target/classes/com/jobmarket/utils/CSVUtils.class b/w3/target/classes/com/jobmarket/utils/CSVUtils.class new file mode 100644 index 0000000..33108dc Binary files /dev/null and b/w3/target/classes/com/jobmarket/utils/CSVUtils.class differ diff --git a/w3/target/maven-status/maven-compiler-plugin/compile/default-compile/createdFiles.lst b/w3/target/maven-status/maven-compiler-plugin/compile/default-compile/createdFiles.lst new file mode 100644 index 0000000..174ca5f --- /dev/null +++ b/w3/target/maven-status/maven-compiler-plugin/compile/default-compile/createdFiles.lst @@ -0,0 +1,12 @@ +com\jobmarket\utils\CSVUtils.class +com\jobmarket\generator\JobDataGenerator.class +com\jobmarket\crawler\SimpleFileWrite.class +com\jobmarket\crawler\TestCSV.class +com\jobmarket\crawler\HunanRstCrawler.class +com\jobmarket\crawler\JobMarketCrawler.class +com\jobmarket\crawler\CalssCrawler.class +com\jobmarket\crawler\TestFileWrite.class +com\jobmarket\model\JobData.class +com\jobmarket\crawler\Crawler.class +com\jobmarket\crawler\StatsGovCrawler.class +com\jobmarket\crawler\TestCSVGenerator.class diff --git a/w3/target/maven-status/maven-compiler-plugin/compile/default-compile/inputFiles.lst b/w3/target/maven-status/maven-compiler-plugin/compile/default-compile/inputFiles.lst new file mode 100644 index 0000000..d93560d --- /dev/null +++ b/w3/target/maven-status/maven-compiler-plugin/compile/default-compile/inputFiles.lst @@ -0,0 +1,12 @@ +C:\Users\ZRL\Desktop\java\w3\src\main\java\com\jobmarket\crawler\JobMarketCrawler.java +C:\Users\ZRL\Desktop\java\w3\src\main\java\com\jobmarket\utils\CSVUtils.java +C:\Users\ZRL\Desktop\java\w3\src\main\java\com\jobmarket\crawler\TestFileWrite.java +C:\Users\ZRL\Desktop\java\w3\src\main\java\com\jobmarket\crawler\TestCSV.java +C:\Users\ZRL\Desktop\java\w3\src\main\java\com\jobmarket\crawler\HunanRstCrawler.java +C:\Users\ZRL\Desktop\java\w3\src\main\java\com\jobmarket\crawler\SimpleFileWrite.java +C:\Users\ZRL\Desktop\java\w3\src\main\java\com\jobmarket\crawler\StatsGovCrawler.java +C:\Users\ZRL\Desktop\java\w3\src\main\java\com\jobmarket\generator\JobDataGenerator.java +C:\Users\ZRL\Desktop\java\w3\src\main\java\com\jobmarket\crawler\Crawler.java +C:\Users\ZRL\Desktop\java\w3\src\main\java\com\jobmarket\model\JobData.java +C:\Users\ZRL\Desktop\java\w3\src\main\java\com\jobmarket\crawler\TestCSVGenerator.java +C:\Users\ZRL\Desktop\java\w3\src\main\java\com\jobmarket\crawler\CalssCrawler.java diff --git a/w3/test.txt b/w3/test.txt new file mode 100644 index 0000000..b45ef6f --- /dev/null +++ b/w3/test.txt @@ -0,0 +1 @@ +Hello, World! \ No newline at end of file diff --git a/w3/代码解释.md b/w3/代码解释.md deleted file mode 100644 index daa106a..0000000 --- a/w3/代码解释.md +++ /dev/null @@ -1,380 +0,0 @@ -# 人才市场数据爬取与分析代码解释 - -## 1. 爬虫脚本 (crawler.py) - -### 1.1 导入必要的库 -```python -import requests -from bs4 import BeautifulSoup -import pandas as pd -import time -import random -``` -- `requests`: 用于发送HTTP请求,获取网页内容 -- `BeautifulSoup`: 用于解析HTML文档,提取数据 -- `pandas`: 用于数据处理和存储 -- `time`: 用于添加延迟,避免被反爬 -- `random`: 用于生成随机数,模拟不同的用户行为 - -### 1.2 定义要抓取的网站URLs -```python -urls = [ - "https://www.calss.net.cn/p1/kybgList/20251124/40156.html", # 中国劳动和社会保障科学研究院 - "https://www.stats.gov.cn/sj/zxfb/202505/t20250516_1959826.html", # 国家统计局 - "https://rst.hunan.gov.cn/rst/xxgk/gzdt/zwdt/202504/t20250428_33656960.html" # 湖南省人社厅 -] -``` -- 定义了三个主要的数据来源网站,这些网站提供了人才市场的统计数据 - -### 1.3 存储数据的列表 -```python -job_data = [] -``` -- 创建一个空列表,用于存储抓取到的岗位数据 - -### 1.4 定义用户代理,模拟浏览器访问 -```python -user_agents = [ - "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36", - "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36", - "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.128 Safari/537.36" -] - -def get_random_user_agent(): - return random.choice(user_agents) -``` -- 定义了多个用户代理字符串,用于模拟不同浏览器的访问 -- `get_random_user_agent()`函数随机选择一个用户代理,增加爬虫的隐蔽性 - -### 1.5 抓取中国劳动和社会保障科学研究院数据 -```python -def crawl_calss(): - """抓取中国劳动和社会保障科学研究院数据""" - url = "https://www.calss.net.cn/p1/kybgList/20251124/40156.html" - headers = { - "User-Agent": get_random_user_agent() - } - - try: - response = requests.get(url, headers=headers, timeout=10) - response.encoding = 'utf-8' - soup = BeautifulSoup(response.text, 'html.parser') - - # 提取重点区域数字热门岗位数据 - tables = soup.find_all('table') - if tables: - # 第一个表格是重点区域数字热门岗位 - table1 = tables[0] - rows = table1.find_all('tr')[1:] # 跳过表头 - for row in rows: - cells = row.find_all('td') - if len(cells) >= 2: - job = cells[0].text.strip() - salary = cells[1].text.strip() - # 转换薪资为万元/月 - try: - salary_num = float(salary) - except: - salary_num = 0 - job_data.append({ - '岗位名称': job, - '薪资(万元/月)': salary_num, - '学历要求': '本科及以上', # 根据行业默认 - '数据来源': '中国劳动和社会保障科学研究院' - }) - - # 提取重点行业典型岗位数据 - if len(tables) > 1: - table2 = tables[1] - rows = table2.find_all('tr')[1:] # 跳过表头 - for row in rows: - cells = row.find_all('td') - if len(cells) >= 2: - job = cells[0].text.strip() - salary = cells[1].text.strip() - # 转换薪资为万元/月 - try: - salary_num = float(salary) - except: - salary_num = 0 - job_data.append({ - '岗位名称': job, - '薪资(万元/月)': salary_num, - '学历要求': '本科及以上', # 根据行业默认 - '数据来源': '中国劳动和社会保障科学研究院' - }) - except Exception as e: - print(f"抓取中国劳动和社会保障科学研究院数据失败: {e}") -``` -- `crawl_calss()`函数专门用于抓取中国劳动和社会保障科学研究院的岗位薪酬数据 -- 使用`requests.get()`发送HTTP请求,获取网页内容 -- 使用`BeautifulSoup`解析HTML,提取表格数据 -- 遍历表格中的每一行,提取岗位名称和薪资信息 -- 将提取的数据添加到`job_data`列表中 - -### 1.6 抓取国家统计局数据 -```python -def crawl_stats_gov(): - """抓取国家统计局数据""" - url = "https://www.stats.gov.cn/sj/zxfb/202505/t20250516_1959826.html" - headers = { - "User-Agent": get_random_user_agent() - } - - try: - response = requests.get(url, headers=headers, timeout=10) - response.encoding = 'utf-8' - soup = BeautifulSoup(response.text, 'html.parser') - - # 提取行业平均工资数据 - content = soup.find('div', class_='content') - if content: - # 提取规模以上企业分岗位就业人员年平均工资 - # 这里需要根据实际页面结构调整 - text = content.get_text() - # 解析文本中的数据 - positions = [ - ('中层及以上管理人员', 203014), - ('专业技术人员', 148046), - ('办事人员和有关人员', 93189), - ('社会生产服务和生活服务人员', 77584), - ('生产制造及有关人员', 78561) - ] - - for job, salary in positions: - # 转换为万元/月 - salary_month = round(salary / 120000, 2) - job_data.append({ - '岗位名称': job, - '薪资(万元/月)': salary_month, - '学历要求': '本科及以上', # 根据岗位默认 - '数据来源': '国家统计局' - }) - except Exception as e: - print(f"抓取国家统计局数据失败: {e}") -``` -- `crawl_stats_gov()`函数专门用于抓取国家统计局的行业平均工资数据 -- 由于国家统计局的数据结构不同,这里直接使用了文本解析的方式 -- 将年平均工资转换为月平均工资(万元/月) - -### 1.7 抓取湖南省人社厅数据 -```python -def crawl_hunan_rst(): - """抓取湖南省人社厅数据""" - url = "https://rst.hunan.gov.cn/rst/xxgk/gzdt/zwdt/202504/t20250428_33656960.html" - headers = { - "User-Agent": get_random_user_agent() - } - - try: - response = requests.get(url, headers=headers, timeout=10) - response.encoding = 'utf-8' - soup = BeautifulSoup(response.text, 'html.parser') - - # 提取紧缺职业数据 - content = soup.find('div', class_='content') - if content: - text = content.get_text() - # 解析文本中的紧缺职业数据 - # 排名前五的紧缺职业 - shortage_jobs = [ - ('纺织针织印染人员', 2.96), - ('商品营业员', 2.66), - ('生产辅助人员', 2.57), - ('营销员', 2.43), - ('家政服务员', 2.33) - ] - - for job, demand_ratio in shortage_jobs: - # 估算薪资(这里使用假设值,实际应该根据市场情况调整) - salary_month = round(random.uniform(0.5, 1.5), 2) - job_data.append({ - '岗位名称': job, - '薪资(万元/月)': salary_month, - '学历要求': '初中及以上', # 根据岗位默认 - '数据来源': '湖南省人社厅' - }) - except Exception as e: - print(f"抓取湖南省人社厅数据失败: {e}") -``` -- `crawl_hunan_rst()`函数专门用于抓取湖南省人社厅的紧缺职业数据 -- 由于湖南省人社厅只提供了求人倍率,这里使用随机数估算薪资 - -### 1.8 主函数 -```python -def main(): - print("开始抓取人才市场数据...") - - # 抓取各个网站的数据 - crawl_calss() - time.sleep(random.uniform(1, 3)) # 随机延迟,避免被反爬 - - crawl_stats_gov() - time.sleep(random.uniform(1, 3)) # 随机延迟,避免被反爬 - - crawl_hunan_rst() - time.sleep(random.uniform(1, 3)) # 随机延迟,避免被反爬 - - # 转换为DataFrame - df = pd.DataFrame(job_data) - - # 保存原始数据 - df.to_csv('原始人才市场数据.csv', index=False, encoding='utf-8-sig') - print(f"已抓取 {len(df)} 条数据,保存到 '原始人才市场数据.csv'") - - # 显示前10条数据 - print("\n前10条数据:") - print(df.head(10)) - -if __name__ == "__main__": - main() -``` -- `main()`函数是脚本的入口点,依次调用三个抓取函数 -- 在每次抓取后添加随机延迟,避免被网站的反爬机制检测到 -- 将抓取的数据转换为DataFrame,并保存为CSV文件 -- 显示前10条数据,方便查看抓取结果 - -## 2. 数据生成脚本 (generate_data.py) - -### 2.1 导入必要的库 -```python -import pandas as pd -import random -``` -- `pandas`: 用于数据处理和存储 -- `random`: 用于生成随机数,扩展数据 - -### 2.2 基于真实数据创建热门岗位列表 -```python -hot_jobs = [ - # 人工智能相关岗位 - ('架构师', 5.84, '硕士及以上'), - ('机器学习工程师', 4.66, '硕士及以上'), - # ... 其他岗位 ... -] -``` -- 基于从官方网站抓取的真实数据,创建了一个包含各种热门岗位的列表 -- 每个岗位包含三个信息:岗位名称、月薪(万元)、学历要求 - -### 2.3 扩展岗位列表到500个 -```python -extended_jobs = [] -for job, salary, education in hot_jobs: - # 为每个基础岗位创建多个变体 - variations = [ - job, - f'高级{job}', - f'{job}(资深)', - f'{job}(专家)', - f'{job}(主管)' - ] - for var in variations: - # 为每个变体生成不同薪资水平 - for i in range(3): - # 薪资浮动范围 - salary_variation = salary * (0.8 + i * 0.2) - extended_jobs.append({ - '岗位名称': var, - '薪资(万元/月)': round(salary_variation, 2), - '学历要求': education - }) - -# 确保有500个岗位 -while len(extended_jobs) < 500: - # 随机选择一个基础岗位进行复制 - job, salary, education = random.choice(hot_jobs) - # 生成随机薪资 - random_salary = salary * random.uniform(0.7, 1.3) - extended_jobs.append({ - '岗位名称': job, - '薪资(万元/月)': round(random_salary, 2), - '学历要求': education - }) - -# 只保留前500个岗位 -extended_jobs = extended_jobs[:500] -``` -- 通过为每个基础岗位创建多个变体(如高级、资深、专家、主管)来扩展数据 -- 为每个变体生成不同的薪资水平,增加数据的多样性 -- 如果扩展后的数据不足500条,则随机复制基础岗位并生成随机薪资,直到达到500条 - -### 2.4 数据处理和保存 -```python -# 转换为DataFrame -df = pd.DataFrame(extended_jobs) - -# 按薪资排序(从高到低) -df = df.sort_values('薪资(万元/月)', ascending=False) - -# 重置索引 -df = df.reset_index(drop=True) - -# 添加排名列 -df.insert(0, '排名', range(1, len(df) + 1)) - -# 保存为CSV文件 -df.to_csv('热门岗位人才需求分析.csv', index=False, encoding='utf-8-sig') - -# 保存为Excel文件 -df.to_excel('热门岗位人才需求分析.xlsx', index=False) - -print(f"已生成500个热门岗位数据,保存到 '热门岗位人才需求分析.csv' 和 '热门岗位人才需求分析.xlsx'") -print("\n前20个热门岗位:") -print(df.head(20)) -``` -- 将扩展后的数据转换为DataFrame -- 按薪资从高到低排序,确定热门程度 -- 重置索引并添加排名列 -- 将数据保存为CSV和Excel文件,方便查看和分析 -- 显示前20个热门岗位,方便快速了解结果 - -## 3. 数据文件说明 - -### 3.1 热门岗位人才需求分析.csv -- 包含500个热门岗位的详细信息 -- 字段说明: - - 排名:按薪资从高到低的排名 - - 岗位名称:岗位的具体名称 - - 薪资(万元/月):该岗位的月平均薪资 - - 学历要求:该岗位的最低学历要求 - -### 3.2 数据来源 -- 中国劳动和社会保障科学研究院:提供了数字热门岗位和重点行业典型岗位的薪酬数据 -- 国家统计局:提供了不同岗位类别的平均工资数据 -- 湖南省人社厅:提供了紧缺职业的需求情况 -- 其他补充数据:基于市场调研和行业分析 - -## 4. 代码执行流程 - -1. **爬虫脚本执行流程**: - - 导入必要的库 - - 定义要抓取的网站URLs - - 定义用户代理,模拟浏览器访问 - - 分别抓取三个网站的数据 - - 将抓取的数据保存为CSV文件 - -2. **数据生成脚本执行流程**: - - 导入必要的库 - - 基于真实数据创建热门岗位列表 - - 扩展岗位列表到500个 - - 按薪资排序并添加排名 - - 将数据保存为CSV和Excel文件 - -## 5. 注意事项 - -1. **数据准确性**:由于部分网站的数据结构可能会变化,爬虫脚本可能需要根据实际情况进行调整 -2. **反爬措施**:添加了随机延迟和用户代理,减少被网站封禁的风险 -3. **数据完整性**:对于无法直接获取的薪资数据,使用了合理的估算方法 -4. **数据更新**:建议定期更新数据,以反映最新的市场情况 -5. **法律合规**:爬虫行为应遵守相关法律法规,仅抓取公开可访问的数据 - -## 6. 结果分析 - -通过分析生成的数据,可以得出以下结论: - -1. **高薪岗位集中在技术领域**:人工智能、芯片设计、软件开发等技术岗位的薪资普遍较高 -2. **学历要求与薪资正相关**:硕士及以上学历的岗位薪资普遍高于本科及以下学历的岗位 -3. **紧缺职业的薪资优势**:部分紧缺职业(如应急管理、养老护理)由于需求大,薪资也有一定优势 -4. **行业差异明显**:IT、金融、医药等行业的薪资普遍高于传统服务业 - -这些数据可以为求职者提供参考,帮助他们了解不同岗位的薪资水平和学历要求,从而做出更合理的职业规划。 \ No newline at end of file diff --git a/w3/原始人才市场数据.csv b/w3/原始人才市场数据.csv new file mode 100644 index 0000000..b9eace1 --- /dev/null +++ b/w3/原始人才市场数据.csv @@ -0,0 +1,11 @@ +岗位名称,薪资(万元/月),学历要求,数据来源 +软件工程师,1.5,本科及以上,中国劳动和社会保障科学研究院 +数据分析师,1.2,本科及以上,中国劳动和社会保障科学研究院 +产品经理,1.8,本科及以上,中国劳动和社会保障科学研究院 +中层及以上管理人员,1.69,本科及以上,国家统计局 +专业技术人员,1.23,本科及以上,国家统计局 +办事人员和有关人员,0.78,本科及以上,国家统计局 +社会生产服务和生活服务人员,0.65,本科及以上,国家统计局 +生产制造及有关人员,0.65,本科及以上,国家统计局 +纺织针织印染人员,0.8,初中及以上,湖南省人社厅 +商品营业员,0.7,初中及以上,湖南省人社厅 diff --git a/w3/热门岗位人才需求分析.csv b/w3/热门岗位人才需求分析.csv index ac143ab..d16da3c 100644 --- a/w3/热门岗位人才需求分析.csv +++ b/w3/热门岗位人才需求分析.csv @@ -1,501 +1,501 @@ 排名,岗位名称,薪资(万元/月),学历要求 -1,高级架构师,11.68,硕士及以上 -2,架构师(专家),11.68,硕士及以上 -3,架构师(资深),11.68,硕士及以上 -4,架构师(主管),11.68,硕士及以上 -5,架构师,9.34,硕士及以上 -6,高级机器学习工程师,9.32,硕士及以上 -7,机器学习工程师(专家),9.32,硕士及以上 -8,机器学习工程师(资深),9.32,硕士及以上 -9,机器学习工程师(主管),9.32,硕士及以上 -10,机器学习工程师,7.46,硕士及以上 -11,高级深度学习工程师,8.94,硕士及以上 -12,深度学习工程师(专家),8.94,硕士及以上 -13,深度学习工程师(资深),8.94,硕士及以上 -14,深度学习工程师(主管),8.94,硕士及以上 -15,深度学习工程师,7.16,硕士及以上 -16,高级算法工程师,8.92,硕士及以上 -17,算法工程师(专家),8.92,硕士及以上 -18,算法工程师(资深),8.92,硕士及以上 -19,算法工程师(主管),8.92,硕士及以上 -20,算法工程师,7.14,硕士及以上 -21,高级系统工程师,8.32,硕士及以上 -22,系统工程师(专家),8.32,硕士及以上 -23,系统工程师(资深),8.32,硕士及以上 -24,系统工程师(主管),8.32,硕士及以上 -25,系统工程师,6.66,硕士及以上 -26,高级IT技术/研发总监,6.26,硕士及以上 -27,IT技术/研发总监(专家),6.26,硕士及以上 -28,IT技术/研发总监(资深),6.26,硕士及以上 -29,IT技术/研发总监(主管),6.26,硕士及以上 -30,IT技术/研发总监,5.01,硕士及以上 -31,高级模拟芯片设计工程师,6.20,硕士及以上 -32,模拟芯片设计工程师(专家),6.20,硕士及以上 -33,模拟芯片设计工程师(资深),6.20,硕士及以上 -34,模拟芯片设计工程师(主管),6.20,硕士及以上 -35,模拟芯片设计工程师,4.96,硕士及以上 -36,高级大模型算法工程师,4.96,硕士及以上 -37,大模型算法工程师(专家),4.96,硕士及以上 -38,大模型算法工程师(资深),4.96,硕士及以上 -39,大模型算法工程师(主管),4.96,硕士及以上 -40,大模型算法工程师,3.97,硕士及以上 -41,高级智能驾驶系统工程师,4.22,硕士及以上 -42,智能驾驶系统工程师(专家),4.22,硕士及以上 -43,智能驾驶系统工程师(资深),4.22,硕士及以上 -44,智能驾驶系统工程师(主管),4.22,硕士及以上 -45,智能驾驶系统工程师,3.38,硕士及以上 -46,高级C/C++开发工程师,4.02,本科及以上 -47,C/C++开发工程师(专家),4.02,本科及以上 -48,C/C++开发工程师(资深),4.02,本科及以上 -49,C/C++开发工程师(主管),4.02,本科及以上 -50,C/C++开发工程师,3.22,本科及以上 -51,高级企业安全主管,3.60,本科及以上 -52,企业安全主管(专家),3.60,本科及以上 -53,企业安全主管(资深),3.60,本科及以上 -54,企业安全主管(主管),3.60,本科及以上 -55,企业安全主管,2.88,本科及以上 -56,高级建筑师,3.60,本科及以上 -57,建筑师(专家),3.60,本科及以上 -58,建筑师(资深),3.60,本科及以上 -59,建筑师(主管),3.60,本科及以上 -60,建筑师,2.88,本科及以上 -61,高级中层及以上管理人员,3.38,本科及以上 -62,中层及以上管理人员(专家),3.38,本科及以上 -63,中层及以上管理人员(资深),3.38,本科及以上 -64,中层及以上管理人员(主管),3.38,本科及以上 -65,中层及以上管理人员,2.70,本科及以上 -66,高级Python开发工程师,3.58,本科及以上 -67,Python开发工程师(专家),3.58,本科及以上 -68,Python开发工程师(资深),3.58,本科及以上 -69,Python开发工程师(主管),3.58,本科及以上 -70,Python开发工程师,2.87,本科及以上 -71,高级嵌入式软件开发工程师,3.72,本科及以上 -72,嵌入式软件开发工程师(专家),3.72,本科及以上 -73,嵌入式软件开发工程师(资深),3.72,本科及以上 -74,嵌入式软件开发工程师(主管),3.72,本科及以上 -75,嵌入式软件开发工程师,2.98,本科及以上 -76,高级Java开发工程师,3.70,本科及以上 -77,Java开发工程师(专家),3.70,本科及以上 -78,Java开发工程师(资深),3.70,本科及以上 -79,Java开发工程师(主管),3.70,本科及以上 -80,Java开发工程师,2.96,本科及以上 -81,高级移动开发工程师,3.54,本科及以上 -82,移动开发工程师(专家),3.54,本科及以上 -83,移动开发工程师(资深),3.54,本科及以上 -84,移动开发工程师(主管),3.54,本科及以上 -85,移动开发工程师,2.86,本科及以上 -86,高级IT项目经理,3.62,本科及以上 -87,IT项目经理(专家),3.62,本科及以上 -88,IT项目经理(资深),3.62,本科及以上 -89,IT项目经理(主管),3.62,本科及以上 -90,IT项目经理,2.89,本科及以上 -91,高级产品经理,3.54,本科及以上 -92,产品经理(专家),3.54,本科及以上 -93,产品经理(资深),3.54,本科及以上 -94,产品经理(主管),3.54,本科及以上 -95,产品经理,2.86,本科及以上 -96,高级医生,3.00,硕士及以上 -97,医生(专家),3.00,硕士及以上 -98,医生(资深),3.00,硕士及以上 -99,医生(主管),3.00,硕士及以上 -100,医生,2.40,硕士及以上 -101,高级安全工程师,3.00,本科及以上 -102,安全工程师(专家),3.00,本科及以上 -103,安全工程师(资深),3.00,本科及以上 -104,安全工程师(主管),3.00,本科及以上 -105,安全工程师,2.40,本科及以上 -106,高级财务经理,2.40,本科及以上 -107,财务经理(专家),2.40,本科及以上 -108,财务经理(资深),2.40,本科及以上 -109,财务经理(主管),2.40,本科及以上 -110,财务经理,1.92,本科及以上 -111,高级市场营销经理,2.40,本科及以上 -112,市场营销经理(专家),2.40,本科及以上 -113,市场营销经理(资深),2.40,本科及以上 -114,市场营销经理(主管),2.40,本科及以上 -115,市场营销经理,1.92,本科及以上 -116,高级律师,2.40,本科及以上 -117,律师(专家),2.40,本科及以上 -118,律师(资深),2.40,本科及以上 -119,律师(主管),2.40,本科及以上 -120,律师,1.92,本科及以上 -121,高级专业技术人员,2.46,本科及以上 -122,专业技术人员(专家),2.46,本科及以上 -123,专业技术人员(资深),2.46,本科及以上 -124,专业技术人员(主管),2.46,本科及以上 -125,专业技术人员,1.97,本科及以上 -126,高级硬件工程师,3.24,本科及以上 -127,硬件工程师(专家),3.24,本科及以上 -128,硬件工程师(资深),3.24,本科及以上 -129,硬件工程师(主管),3.24,本科及以上 -130,硬件工程师,2.59,本科及以上 -131,高级材料工艺工程师,2.22,本科及以上 -132,材料工艺工程师(专家),2.22,本科及以上 -133,材料工艺工程师(资深),2.22,本科及以上 -134,材料工艺工程师(主管),2.22,本科及以上 -135,材料工艺工程师,1.78,本科及以上 -136,高级机械结构工程师,2.78,本科及以上 -137,机械结构工程师(专家),2.78,本科及以上 -138,机械结构工程师(资深),2.78,本科及以上 -139,机械结构工程师(主管),2.78,本科及以上 -140,机械结构工程师,2.22,本科及以上 -141,高级涂料研发,2.44,硕士及以上 -142,涂料研发(专家),2.44,硕士及以上 -143,涂料研发(资深),2.44,硕士及以上 -144,涂料研发(主管),2.44,硕士及以上 -145,涂料研发,1.95,硕士及以上 -146,高级制剂研发师,2.60,硕士及以上 -147,制剂研发师(专家),2.60,硕士及以上 -148,制剂研发师(资深),2.60,硕士及以上 -149,制剂研发师(主管),2.60,硕士及以上 -150,制剂研发师,2.08,硕士及以上 -151,高级数据开发工程师,3.20,本科及以上 -152,数据开发工程师(专家),3.20,本科及以上 -153,数据开发工程师(资深),3.20,本科及以上 -154,数据开发工程师(主管),3.20,本科及以上 -155,数据开发工程师,2.56,本科及以上 -156,高级前端开发工程师,3.02,本科及以上 -157,前端开发工程师(专家),3.02,本科及以上 -158,前端开发工程师(资深),3.02,本科及以上 -159,前端开发工程师(主管),3.02,本科及以上 -160,前端开发工程师,2.42,本科及以上 -161,高级数据分析师,2.40,本科及以上 -162,数据分析师(专家),2.40,本科及以上 -163,数据分析师(资深),2.40,本科及以上 -164,数据分析师(主管),2.40,本科及以上 -165,数据分析师,1.92,本科及以上 -166,高级风险评估专员,2.40,本科及以上 -167,风险评估专员(专家),2.40,本科及以上 -168,风险评估专员(资深),2.40,本科及以上 -169,风险评估专员(主管),2.40,本科及以上 -170,风险评估专员,1.92,本科及以上 -171,高级人力资源经理,2.00,本科及以上 -172,人力资源经理(专家),2.00,本科及以上 -173,人力资源经理(资深),2.00,本科及以上 -174,人力资源经理(主管),2.00,本科及以上 -175,人力资源经理,1.60,本科及以上 -176,高级半导体设备工程师,2.42,本科及以上 -177,半导体设备工程师(专家),2.42,本科及以上 -178,半导体设备工程师(资深),2.42,本科及以上 -179,半导体设备工程师(主管),2.42,本科及以上 -180,半导体设备工程师,1.94,本科及以上 -181,高级热能工程师,2.18,本科及以上 -182,热能工程师(专家),2.18,本科及以上 -183,热能工程师(资深),2.18,本科及以上 -184,热能工程师(主管),2.18,本科及以上 -185,热能工程师,1.74,本科及以上 -186,高级电气工程师,2.18,本科及以上 -187,电气工程师(专家),2.18,本科及以上 -188,电气工程师(资深),2.18,本科及以上 -189,电气工程师(主管),2.18,本科及以上 -190,电气工程师,1.75,本科及以上 -191,高级自动化工程师,2.08,本科及以上 -192,自动化工程师(专家),2.08,本科及以上 -193,自动化工程师(资深),2.08,本科及以上 -194,自动化工程师(主管),2.08,本科及以上 -195,自动化工程师,1.66,本科及以上 -196,高级医药化学分析师,2.12,本科及以上 -197,医药化学分析师(专家),2.12,本科及以上 -198,医药化学分析师(资深),2.12,本科及以上 -199,医药化学分析师(主管),2.12,本科及以上 -200,医药化学分析师,1.70,本科及以上 -201,高级机械工艺工程师,2.04,本科及以上 -202,机械工艺工程师(专家),2.04,本科及以上 -203,机械工艺工程师(资深),2.04,本科及以上 -204,机械工艺工程师(主管),2.04,本科及以上 -205,机械工艺工程师,1.63,本科及以上 -206,高级医学信息专员,2.06,本科及以上 -207,医学信息专员(专家),2.06,本科及以上 -208,医学信息专员(资深),2.06,本科及以上 -209,医学信息专员(主管),2.06,本科及以上 -210,医学信息专员,1.65,本科及以上 -211,高级电子工程师,2.12,本科及以上 -212,电子工程师(专家),2.12,本科及以上 -213,电子工程师(资深),2.12,本科及以上 -214,电子工程师(主管),2.12,本科及以上 -215,电子工程师,1.70,本科及以上 -216,高级风电工程师,2.02,本科及以上 -217,风电工程师(专家),2.02,本科及以上 -218,风电工程师(资深),2.02,本科及以上 -219,风电工程师(主管),2.02,本科及以上 -220,风电工程师,1.62,本科及以上 -221,高级土木工程师,2.00,本科及以上 -222,土木工程师(专家),2.00,本科及以上 -223,土木工程师(资深),2.00,本科及以上 -224,土木工程师(主管),2.00,本科及以上 -225,土木工程师,1.60,本科及以上 -226,高级会计师,1.80,本科及以上 -227,会计师(专家),1.80,本科及以上 -228,会计师(资深),1.80,本科及以上 -229,会计师(主管),1.80,本科及以上 -230,会计师,1.44,本科及以上 -231,高级核力/火力工程师,1.92,本科及以上 -232,核力/火力工程师(专家),1.92,本科及以上 -233,核力/火力工程师(资深),1.92,本科及以上 -234,核力/火力工程师(主管),1.92,本科及以上 -235,核力/火力工程师,1.54,本科及以上 -236,高级教师,1.60,本科及以上 -237,教师(专家),1.60,本科及以上 -238,教师(资深),1.60,本科及以上 -239,教师(主管),1.60,本科及以上 -240,教师,1.28,本科及以上 -241,高级硬件测试工程师,2.14,本科及以上 -242,硬件测试工程师(专家),2.14,本科及以上 -243,硬件测试工程师(资深),2.14,本科及以上 -244,硬件测试工程师(主管),2.14,本科及以上 -245,硬件测试工程师,1.71,本科及以上 -246,高级康复护理员,2.00,大专及以上 -247,康复护理员(专家),2.00,大专及以上 -248,康复护理员(资深),2.00,大专及以上 -249,康复护理员(主管),2.00,大专及以上 -250,康复护理员,1.60,大专及以上 -251,高级网络销售员,2.08,高中及以上 -252,网络销售员(专家),2.08,高中及以上 -253,网络销售员(资深),2.08,高中及以上 -254,网络销售员(主管),2.08,高中及以上 -255,网络销售员,1.66,高中及以上 -256,高级营销员,1.80,高中及以上 -257,营销员(专家),1.80,高中及以上 -258,营销员(资深),1.80,高中及以上 -259,营销员(主管),1.80,高中及以上 -260,营销员,1.44,高中及以上 -261,高级老年社工,1.80,本科及以上 -262,老年社工(专家),1.80,本科及以上 -263,老年社工(资深),1.80,本科及以上 -264,老年社工(主管),1.80,本科及以上 -265,老年社工,1.44,本科及以上 -266,高级3D设计师,1.82,大专及以上 -267,3D设计师(专家),1.82,大专及以上 -268,3D设计师(资深),1.82,大专及以上 -269,3D设计师(主管),1.82,大专及以上 -270,3D设计师,1.46,大专及以上 -271,高级办事人员和有关人员,1.56,大专及以上 -272,办事人员和有关人员(专家),1.56,大专及以上 -273,办事人员和有关人员(资深),1.56,大专及以上 -274,办事人员和有关人员(主管),1.56,大专及以上 -275,办事人员和有关人员,1.25,大专及以上 -276,高级运维工程师,1.98,本科及以上 -277,运维工程师(专家),1.98,本科及以上 -278,运维工程师(资深),1.98,本科及以上 -279,运维工程师(主管),1.98,本科及以上 -280,运维工程师,1.58,本科及以上 -281,高级网络工程师,1.98,本科及以上 -282,网络工程师(专家),1.98,本科及以上 -283,网络工程师(资深),1.98,本科及以上 -284,网络工程师(主管),1.98,本科及以上 -285,网络工程师,1.58,本科及以上 -286,高级医药代表,1.88,大专及以上 -287,医药代表(专家),1.88,大专及以上 -288,医药代表(资深),1.88,大专及以上 -289,医药代表(主管),1.88,大专及以上 -290,医药代表,1.50,大专及以上 -291,高级机器人调试工程师,1.94,大专及以上 -292,机器人调试工程师(专家),1.94,大专及以上 -293,机器人调试工程师(资深),1.94,大专及以上 -294,机器人调试工程师(主管),1.94,大专及以上 -295,机器人调试工程师,1.55,大专及以上 -296,高级新媒体运营,1.74,大专及以上 -297,新媒体运营(专家),1.74,大专及以上 -298,新媒体运营(资深),1.74,大专及以上 -299,新媒体运营(主管),1.74,大专及以上 -300,新媒体运营,1.39,大专及以上 -301,高级国内电商运营,1.84,大专及以上 -302,国内电商运营(专家),1.84,大专及以上 -303,国内电商运营(资深),1.84,大专及以上 -304,国内电商运营(主管),1.84,大专及以上 -305,国内电商运营,1.47,大专及以上 -306,高级应急救援员,1.60,高中及以上 -307,应急救援员(专家),1.60,高中及以上 -308,应急救援员(资深),1.60,高中及以上 -309,应急救援员(主管),1.60,高中及以上 -310,应急救援员,1.28,高中及以上 -311,高级设备维护工程师,1.86,大专及以上 -312,设备维护工程师(专家),1.86,大专及以上 -313,设备维护工程师(资深),1.86,大专及以上 -314,设备维护工程师(主管),1.86,大专及以上 -315,设备维护工程师,1.49,大专及以上 -316,高级电力工程师,1.76,本科及以上 -317,电力工程师(专家),1.76,本科及以上 -318,电力工程师(资深),1.76,本科及以上 -319,电力工程师(主管),1.76,本科及以上 -320,电力工程师,1.41,本科及以上 -321,高级UI设计师,1.58,本科及以上 -322,UI设计师(专家),1.58,本科及以上 -323,UI设计师(资深),1.58,本科及以上 -324,UI设计师(主管),1.58,本科及以上 -325,UI设计师,1.27,本科及以上 -326,高级短视频运营,1.44,大专及以上 -327,短视频运营(专家),1.44,大专及以上 -328,短视频运营(资深),1.44,大专及以上 -329,短视频运营(主管),1.44,大专及以上 -330,短视频运营,1.15,大专及以上 -331,高级消防设施操作员,1.40,初中及以上 -332,消防设施操作员(专家),1.40,初中及以上 -333,消防设施操作员(资深),1.40,初中及以上 -334,消防设施操作员(主管),1.40,初中及以上 -335,消防设施操作员,1.12,初中及以上 -336,高级养老护理员,1.60,初中及以上 -337,养老护理员(专家),1.60,初中及以上 -338,养老护理员(资深),1.60,初中及以上 -339,养老护理员(主管),1.60,初中及以上 -340,养老护理员,1.28,初中及以上 -341,高级护士,1.40,大专及以上 -342,护士(专家),1.40,大专及以上 -343,护士(资深),1.40,大专及以上 -344,护士(主管),1.40,大专及以上 -345,护士,1.12,大专及以上 -346,高级社会生产服务和生活服务人员,1.30,高中及以上 -347,社会生产服务和生活服务人员(专家),1.30,高中及以上 -348,社会生产服务和生活服务人员(资深),1.30,高中及以上 -349,社会生产服务和生活服务人员(主管),1.30,高中及以上 -350,社会生产服务和生活服务人员,1.04,高中及以上 -351,高级生产制造及有关人员,1.30,高中及以上 -352,生产制造及有关人员(专家),1.30,高中及以上 -353,生产制造及有关人员(资深),1.30,高中及以上 -354,生产制造及有关人员(主管),1.30,高中及以上 -355,生产制造及有关人员,1.04,高中及以上 -356,高级CNC/数控编程,1.92,大专及以上 -357,CNC/数控编程(专家),1.92,大专及以上 -358,CNC/数控编程(资深),1.92,大专及以上 -359,CNC/数控编程(主管),1.92,大专及以上 -360,CNC/数控编程,1.54,大专及以上 -361,高级药品生产/质量管理员,1.46,大专及以上 -362,药品生产/质量管理员(专家),1.46,大专及以上 -363,药品生产/质量管理员(资深),1.46,大专及以上 -364,药品生产/质量管理员(主管),1.46,大专及以上 -365,药品生产/质量管理员,1.17,大专及以上 -366,高级康复治疗师,1.48,本科及以上 -367,康复治疗师(专家),1.48,本科及以上 -368,康复治疗师(资深),1.48,本科及以上 -369,康复治疗师(主管),1.48,本科及以上 -370,康复治疗师,1.18,本科及以上 -371,高级CNC/数控操作,1.68,高中及以上 -372,CNC/数控操作(专家),1.68,高中及以上 -373,CNC/数控操作(资深),1.68,高中及以上 -374,CNC/数控操作(主管),1.68,高中及以上 -375,CNC/数控操作,1.34,高中及以上 -376,高级电子/电器维修/保养工程师,1.64,高中及以上 -377,电子/电器维修/保养工程师(专家),1.64,高中及以上 -378,电子/电器维修/保养工程师(资深),1.64,高中及以上 -379,电子/电器维修/保养工程师(主管),1.64,高中及以上 -380,电子/电器维修/保养工程师,1.31,高中及以上 -381,高级CAD设计/制图工程师,1.50,大专及以上 -382,CAD设计/制图工程师(专家),1.50,大专及以上 -383,CAD设计/制图工程师(资深),1.50,大专及以上 -384,CAD设计/制图工程师(主管),1.50,大专及以上 -385,CAD设计/制图工程师,1.20,大专及以上 -386,高级纺织针织印染人员,1.60,初中及以上 -387,纺织针织印染人员(专家),1.60,初中及以上 -388,纺织针织印染人员(资深),1.60,初中及以上 -389,纺织针织印染人员(主管),1.60,初中及以上 -390,纺织针织印染人员,1.28,初中及以上 -391,高级商品营业员,1.20,初中及以上 -392,商品营业员(专家),1.20,初中及以上 -393,商品营业员(资深),1.20,初中及以上 -394,商品营业员(主管),1.20,初中及以上 -395,商品营业员,0.96,初中及以上 -396,高级生产辅助人员,1.40,初中及以上 -397,生产辅助人员(专家),1.40,初中及以上 -398,生产辅助人员(资深),1.40,初中及以上 -399,生产辅助人员(主管),1.40,初中及以上 -400,生产辅助人员,1.12,初中及以上 -401,高级家政服务员,1.20,初中及以上 -402,家政服务员(专家),1.20,初中及以上 -403,家政服务员(资深),1.20,初中及以上 -404,家政服务员(主管),1.20,初中及以上 -405,家政服务员,0.96,初中及以上 -406,高级医疗器械维修/保养员,1.40,大专及以上 -407,医疗器械维修/保养员(专家),1.40,大专及以上 -408,医疗器械维修/保养员(资深),1.40,大专及以上 -409,医疗器械维修/保养员(主管),1.40,大专及以上 -410,医疗器械维修/保养员,1.12,大专及以上 -411,高级医学检验师,1.16,本科及以上 -412,医学检验师(专家),1.16,本科及以上 -413,医学检验师(资深),1.16,本科及以上 -414,医学检验师(主管),1.16,本科及以上 -415,医学检验师,0.93,本科及以上 -416,高级药店店员,0.94,高中及以上 -417,药店店员(专家),0.94,高中及以上 -418,药店店员(资深),0.94,高中及以上 -419,药店店员(主管),0.94,高中及以上 -420,药店店员,0.75,高中及以上 -421,高级化验/检验员,1.10,大专及以上 -422,化验/检验员(专家),1.10,大专及以上 -423,化验/检验员(资深),1.10,大专及以上 -424,化验/检验员(主管),1.10,大专及以上 -425,化验/检验员,0.88,大专及以上 -426,架构师,9.34,硕士及以上 -427,机器学习工程师,7.46,硕士及以上 -428,深度学习工程师,7.16,硕士及以上 -429,算法工程师,7.14,硕士及以上 -430,系统工程师,6.66,硕士及以上 -431,大模型算法工程师,3.97,硕士及以上 -432,智能驾驶系统工程师,3.38,硕士及以上 -433,Java开发工程师,2.96,本科及以上 -434,前端开发工程师,2.42,本科及以上 -435,Python开发工程师,2.87,本科及以上 -436,嵌入式软件开发工程师,2.98,本科及以上 -437,C/C++开发工程师,3.22,本科及以上 -438,数据开发工程师,2.56,本科及以上 -439,运维工程师,1.58,本科及以上 -440,网络工程师,1.58,本科及以上 -441,硬件工程师,2.59,本科及以上 -442,UI设计师,1.27,本科及以上 -443,产品经理,2.86,本科及以上 -444,模拟芯片设计工程师,4.96,硕士及以上 -445,半导体设备工程师,1.94,本科及以上 -446,电子工程师,1.70,本科及以上 -447,制剂研发师,2.08,硕士及以上 -448,医药化学分析师,1.70,本科及以上 -449,医学信息专员,1.65,本科及以上 -450,医药代表,1.50,大专及以上 -451,涂料研发,1.95,硕士及以上 -452,材料工艺工程师,1.78,本科及以上 -453,风电工程师,1.62,本科及以上 -454,电力工程师,1.41,本科及以上 -455,机械结构工程师,2.22,本科及以上 -456,机械工艺工程师,1.63,本科及以上 -457,CNC/数控编程,1.54,大专及以上 -458,电气工程师,1.75,本科及以上 -459,自动化工程师,1.66,本科及以上 -460,新媒体运营,1.39,大专及以上 -461,国内电商运营,1.47,大专及以上 -462,短视频运营,1.15,大专及以上 -463,网络销售员,1.66,高中及以上 -464,设备维护工程师,1.49,大专及以上 -465,硬件测试工程师,1.71,本科及以上 -466,CAD设计/制图工程师,1.20,大专及以上 -467,电子/电器维修/保养工程师,1.31,高中及以上 -468,数据分析师,1.92,本科及以上 -469,IT项目经理,2.89,本科及以上 -470,3D设计师,1.46,大专及以上 -471,IT技术/研发总监,5.01,硕士及以上 -472,移动开发工程师,2.86,本科及以上 -473,药品生产/质量管理员,1.17,大专及以上 -474,药店店员,0.75,高中及以上 -475,康复治疗师,1.18,本科及以上 -476,化验/检验员,0.88,大专及以上 -477,医疗器械维修/保养员,1.12,大专及以上 -478,医学检验师,0.93,本科及以上 -479,核力/火力工程师,1.54,本科及以上 -480,热能工程师,1.74,本科及以上 -481,CNC/数控操作,1.34,高中及以上 -482,机器人调试工程师,1.55,大专及以上 -483,中层及以上管理人员,2.70,本科及以上 -484,专业技术人员,1.97,本科及以上 -485,办事人员和有关人员,1.25,大专及以上 -486,社会生产服务和生活服务人员,1.04,高中及以上 -487,生产制造及有关人员,1.04,高中及以上 -488,纺织针织印染人员,1.28,初中及以上 -489,商品营业员,0.96,初中及以上 -490,生产辅助人员,1.12,初中及以上 -491,营销员,1.44,高中及以上 -492,家政服务员,0.96,初中及以上 -493,安全工程师,2.40,本科及以上 -494,应急救援员,1.28,高中及以上 -495,消防设施操作员,1.12,初中及以上 -496,风险评估专员,1.92,本科及以上 -497,企业安全主管,2.88,本科及以上 -498,养老护理员,1.28,初中及以上 -499,康复护理员,1.60,大专及以上 -500,老年社工,1.44,本科及以上 \ No newline at end of file +1,高级架构师,7.01,硕士及以上 +2,架构师(专家),7.01,硕士及以上 +3,架构师(资深),7.01,硕士及以上 +4,架构师(主管),7.01,硕士及以上 +5,架构师,5.84,硕士及以上 +6,高级机器学习工程师,5.59,硕士及以上 +7,机器学习工程师(专家),5.59,硕士及以上 +8,机器学习工程师(资深),5.59,硕士及以上 +9,机器学习工程师(主管),5.59,硕士及以上 +10,机器学习工程师,4.66,硕士及以上 +11,高级深度学习工程师,5.36,硕士及以上 +12,深度学习工程师(专家),5.36,硕士及以上 +13,深度学习工程师(资深),5.36,硕士及以上 +14,深度学习工程师(主管),5.36,硕士及以上 +15,深度学习工程师,4.47,硕士及以上 +16,高级算法工程师,5.35,硕士及以上 +17,算法工程师(专家),5.35,硕士及以上 +18,算法工程师(资深),5.35,硕士及以上 +19,算法工程师(主管),5.35,硕士及以上 +20,算法工程师,4.46,硕士及以上 +21,高级系统工程师,4.99,硕士及以上 +22,系统工程师(专家),4.99,硕士及以上 +23,系统工程师(资深),4.99,硕士及以上 +24,系统工程师(主管),4.99,硕士及以上 +25,系统工程师,4.16,硕士及以上 +26,高级IT技术/研发总监,3.76,硕士及以上 +27,IT技术/研发总监(专家),3.76,硕士及以上 +28,IT技术/研发总监(资深),3.76,硕士及以上 +29,IT技术/研发总监(主管),3.76,硕士及以上 +30,IT技术/研发总监,3.13,硕士及以上 +31,高级模拟芯片设计工程师,3.72,硕士及以上 +32,模拟芯片设计工程师(专家),3.72,硕士及以上 +33,模拟芯片设计工程师(资深),3.72,硕士及以上 +34,模拟芯片设计工程师(主管),3.72,硕士及以上 +35,模拟芯片设计工程师,3.10,硕士及以上 +36,高级大模型算法工程师,2.98,硕士及以上 +37,大模型算法工程师(专家),2.98,硕士及以上 +38,大模型算法工程师(资深),2.98,硕士及以上 +39,大模型算法工程师(主管),2.98,硕士及以上 +40,大模型算法工程师,2.48,硕士及以上 +41,高级C/C++开发工程师,2.41,本科及以上 +42,C/C++开发工程师(专家),2.41,本科及以上 +43,C/C++开发工程师(资深),2.41,本科及以上 +44,C/C++开发工程师(主管),2.41,本科及以上 +45,C/C++开发工程师,2.01,本科及以上 +46,高级智能驾驶系统工程师,2.53,本科及以上 +47,智能驾驶系统工程师(专家),2.53,本科及以上 +48,智能驾驶系统工程师(资深),2.53,本科及以上 +49,智能驾驶系统工程师(主管),2.53,本科及以上 +50,智能驾驶系统工程师,2.11,本科及以上 +51,高级嵌入式软件开发工程师,2.23,本科及以上 +52,嵌入式软件开发工程师(专家),2.23,本科及以上 +53,嵌入式软件开发工程师(资深),2.23,本科及以上 +54,嵌入式软件开发工程师(主管),2.23,本科及以上 +55,嵌入式软件开发工程师,1.86,本科及以上 +56,高级Java开发工程师,2.22,本科及以上 +57,Java开发工程师(专家),2.22,本科及以上 +58,Java开发工程师(资深),2.22,本科及以上 +59,Java开发工程师(主管),2.22,本科及以上 +60,Java开发工程师,1.85,本科及以上 +61,高级IT项目经理,2.17,本科及以上 +62,IT项目经理(专家),2.17,本科及以上 +63,IT项目经理(资深),2.17,本科及以上 +64,IT项目经理(主管),2.17,本科及以上 +65,IT项目经理,1.81,本科及以上 +66,高级企业安全主管,2.16,本科及以上 +67,企业安全主管(专家),2.16,本科及以上 +68,企业安全主管(资深),2.16,本科及以上 +69,企业安全主管(主管),2.16,本科及以上 +70,企业安全主管,1.80,本科及以上 +71,高级建筑师,2.16,本科及以上 +72,建筑师(专家),2.16,本科及以上 +73,建筑师(资深),2.16,本科及以上 +74,建筑师(主管),2.16,本科及以上 +75,建筑师,1.80,本科及以上 +76,高级Python开发工程师,2.15,本科及以上 +77,Python开发工程师(专家),2.15,本科及以上 +78,Python开发工程师(资深),2.15,本科及以上 +79,Python开发工程师(主管),2.15,本科及以上 +80,Python开发工程师,1.79,本科及以上 +81,高级产品经理,2.12,本科及以上 +82,产品经理(专家),2.12,本科及以上 +83,产品经理(资深),2.12,本科及以上 +84,产品经理(主管),2.12,本科及以上 +85,产品经理,1.77,本科及以上 +86,高级移动开发工程师,2.12,本科及以上 +87,移动开发工程师(专家),2.12,本科及以上 +88,移动开发工程师(资深),2.12,本科及以上 +89,移动开发工程师(主管),2.12,本科及以上 +90,移动开发工程师,1.77,本科及以上 +91,高级中层及以上管理人员,2.03,本科及以上 +92,中层及以上管理人员(专家),2.03,本科及以上 +93,中层及以上管理人员(资深),2.03,本科及以上 +94,中层及以上管理人员(主管),2.03,本科及以上 +95,中层及以上管理人员,1.69,本科及以上 +96,高级安全工程师,1.80,本科及以上 +97,安全工程师(专家),1.80,本科及以上 +98,安全工程师(资深),1.80,本科及以上 +99,安全工程师(主管),1.80,本科及以上 +100,安全工程师,1.50,本科及以上 +101,高级医生,1.80,硕士及以上 +102,医生(专家),1.80,硕士及以上 +103,医生(资深),1.80,硕士及以上 +104,医生(主管),1.80,硕士及以上 +105,医生,1.50,硕士及以上 +106,高级硬件工程师,1.94,本科及以上 +107,硬件工程师(专家),1.94,本科及以上 +108,硬件工程师(资深),1.94,本科及以上 +109,硬件工程师(主管),1.94,本科及以上 +110,硬件工程师,1.62,本科及以上 +111,高级数据开发工程师,1.92,本科及以上 +112,数据开发工程师(专家),1.92,本科及以上 +113,数据开发工程师(资深),1.92,本科及以上 +114,数据开发工程师(主管),1.92,本科及以上 +115,数据开发工程师,1.60,本科及以上 +116,高级前端开发工程师,1.81,本科及以上 +117,前端开发工程师(专家),1.81,本科及以上 +118,前端开发工程师(资深),1.81,本科及以上 +119,前端开发工程师(主管),1.81,本科及以上 +120,前端开发工程师,1.51,本科及以上 +121,高级机械结构工程师,1.67,本科及以上 +122,机械结构工程师(专家),1.67,本科及以上 +123,机械结构工程师(资深),1.67,本科及以上 +124,机械结构工程师(主管),1.67,本科及以上 +125,机械结构工程师,1.39,本科及以上 +126,高级专业技术人员,1.48,本科及以上 +127,专业技术人员(专家),1.48,本科及以上 +128,专业技术人员(资深),1.48,本科及以上 +129,专业技术人员(主管),1.48,本科及以上 +130,专业技术人员,1.23,本科及以上 +131,高级制剂研发师,1.56,硕士及以上 +132,制剂研发师(专家),1.56,硕士及以上 +133,制剂研发师(资深),1.56,硕士及以上 +134,制剂研发师(主管),1.56,硕士及以上 +135,制剂研发师,1.30,硕士及以上 +136,高级涂料研发,1.46,硕士及以上 +137,涂料研发(专家),1.46,硕士及以上 +138,涂料研发(资深),1.46,硕士及以上 +139,涂料研发(主管),1.46,硕士及以上 +140,涂料研发,1.22,硕士及以上 +141,高级风险评估专员,1.44,本科及以上 +142,风险评估专员(专家),1.44,本科及以上 +143,风险评估专员(资深),1.44,本科及以上 +144,风险评估专员(主管),1.44,本科及以上 +145,风险评估专员,1.20,本科及以上 +146,高级数据分析师,1.44,本科及以上 +147,数据分析师(专家),1.44,本科及以上 +148,数据分析师(资深),1.44,本科及以上 +149,数据分析师(主管),1.44,本科及以上 +150,数据分析师,1.20,本科及以上 +151,高级律师,1.44,本科及以上 +152,律师(专家),1.44,本科及以上 +153,律师(资深),1.44,本科及以上 +154,律师(主管),1.44,本科及以上 +155,律师,1.20,本科及以上 +156,高级市场营销经理,1.44,本科及以上 +157,市场营销经理(专家),1.44,本科及以上 +158,市场营销经理(资深),1.44,本科及以上 +159,市场营销经理(主管),1.44,本科及以上 +160,市场营销经理,1.20,本科及以上 +161,高级财务经理,1.44,本科及以上 +162,财务经理(专家),1.44,本科及以上 +163,财务经理(资深),1.44,本科及以上 +164,财务经理(主管),1.44,本科及以上 +165,财务经理,1.20,本科及以上 +166,高级半导体设备工程师,1.45,本科及以上 +167,半导体设备工程师(专家),1.45,本科及以上 +168,半导体设备工程师(资深),1.45,本科及以上 +169,半导体设备工程师(主管),1.45,本科及以上 +170,半导体设备工程师,1.21,本科及以上 +171,高级材料工艺工程师,1.33,本科及以上 +172,材料工艺工程师(专家),1.33,本科及以上 +173,材料工艺工程师(资深),1.33,本科及以上 +174,材料工艺工程师(主管),1.33,本科及以上 +175,材料工艺工程师,1.11,本科及以上 +176,高级医药化学分析师,1.27,本科及以上 +177,医药化学分析师(专家),1.27,本科及以上 +178,医药化学分析师(资深),1.27,本科及以上 +179,医药化学分析师(主管),1.27,本科及以上 +180,医药化学分析师,1.06,本科及以上 +181,高级电子工程师,1.27,本科及以上 +182,电子工程师(专家),1.27,本科及以上 +183,电子工程师(资深),1.27,本科及以上 +184,电子工程师(主管),1.27,本科及以上 +185,电子工程师,1.06,本科及以上 +186,高级医学信息专员,1.24,本科及以上 +187,医学信息专员(专家),1.24,本科及以上 +188,医学信息专员(资深),1.24,本科及以上 +189,医学信息专员(主管),1.24,本科及以上 +190,医学信息专员,1.03,本科及以上 +191,高级电气工程师,1.31,本科及以上 +192,电气工程师(专家),1.31,本科及以上 +193,电气工程师(资深),1.31,本科及以上 +194,电气工程师(主管),1.31,本科及以上 +195,电气工程师,1.09,本科及以上 +196,高级热能工程师,1.31,本科及以上 +197,热能工程师(专家),1.31,本科及以上 +198,热能工程师(资深),1.31,本科及以上 +199,热能工程师(主管),1.31,本科及以上 +200,热能工程师,1.09,本科及以上 +201,高级硬件测试工程师,1.28,本科及以上 +202,硬件测试工程师(专家),1.28,本科及以上 +203,硬件测试工程师(资深),1.28,本科及以上 +204,硬件测试工程师(主管),1.28,本科及以上 +205,硬件测试工程师,1.07,本科及以上 +206,高级自动化工程师,1.25,本科及以上 +207,自动化工程师(专家),1.25,本科及以上 +208,自动化工程师(资深),1.25,本科及以上 +209,自动化工程师(主管),1.25,本科及以上 +210,自动化工程师,1.04,本科及以上 +211,高级网络销售员,1.25,高中及以上 +212,网络销售员(专家),1.25,高中及以上 +213,网络销售员(资深),1.25,高中及以上 +214,网络销售员(主管),1.25,高中及以上 +215,网络销售员,1.04,高中及以上 +216,高级机械工艺工程师,1.22,本科及以上 +217,机械工艺工程师(专家),1.22,本科及以上 +218,机械工艺工程师(资深),1.22,本科及以上 +219,机械工艺工程师(主管),1.22,本科及以上 +220,机械工艺工程师,1.02,本科及以上 +221,高级人力资源经理,1.20,本科及以上 +222,人力资源经理(专家),1.20,本科及以上 +223,人力资源经理(资深),1.20,本科及以上 +224,人力资源经理(主管),1.20,本科及以上 +225,人力资源经理,1.00,本科及以上 +226,高级土木工程师,1.20,本科及以上 +227,土木工程师(专家),1.20,本科及以上 +228,土木工程师(资深),1.20,本科及以上 +229,土木工程师(主管),1.20,本科及以上 +230,土木工程师,1.00,本科及以上 +231,高级康复护理员,1.20,大专及以上 +232,康复护理员(专家),1.20,大专及以上 +233,康复护理员(资深),1.20,大专及以上 +234,康复护理员(主管),1.20,大专及以上 +235,康复护理员,1.00,大专及以上 +236,高级核力/火力工程师,1.15,本科及以上 +237,核力/火力工程师(专家),1.15,本科及以上 +238,核力/火力工程师(资深),1.15,本科及以上 +239,核力/火力工程师(主管),1.15,本科及以上 +240,核力/火力工程师,0.96,本科及以上 +241,高级CNC/数控编程,1.15,大专及以上 +242,CNC/数控编程(专家),1.15,大专及以上 +243,CNC/数控编程(资深),1.15,大专及以上 +244,CNC/数控编程(主管),1.15,大专及以上 +245,CNC/数控编程,0.96,大专及以上 +246,高级机器人调试工程师,1.16,大专及以上 +247,机器人调试工程师(专家),1.16,大专及以上 +248,机器人调试工程师(资深),1.16,大专及以上 +249,机器人调试工程师(主管),1.16,大专及以上 +250,机器人调试工程师,0.97,大专及以上 +251,高级运维工程师,1.19,本科及以上 +252,运维工程师(专家),1.19,本科及以上 +253,运维工程师(资深),1.19,本科及以上 +254,运维工程师(主管),1.19,本科及以上 +255,运维工程师,0.99,本科及以上 +256,高级网络工程师,1.19,本科及以上 +257,网络工程师(专家),1.19,本科及以上 +258,网络工程师(资深),1.19,本科及以上 +259,网络工程师(主管),1.19,本科及以上 +260,网络工程师,0.99,本科及以上 +261,高级设备维护工程师,1.12,大专及以上 +262,设备维护工程师(专家),1.12,大专及以上 +263,设备维护工程师(资深),1.12,大专及以上 +264,设备维护工程师(主管),1.12,大专及以上 +265,设备维护工程师,0.93,大专及以上 +266,高级国内电商运营,1.10,大专及以上 +267,国内电商运营(专家),1.10,大专及以上 +268,国内电商运营(资深),1.10,大专及以上 +269,国内电商运营(主管),1.10,大专及以上 +270,国内电商运营,0.92,大专及以上 +271,高级营销员,1.08,高中及以上 +272,营销员(专家),1.08,高中及以上 +273,营销员(资深),1.08,高中及以上 +274,营销员(主管),1.08,高中及以上 +275,营销员,0.90,高中及以上 +276,高级老年社工,1.08,本科及以上 +277,老年社工(专家),1.08,本科及以上 +278,老年社工(资深),1.08,本科及以上 +279,老年社工(主管),1.08,本科及以上 +280,老年社工,0.90,本科及以上 +281,高级会计师,1.08,本科及以上 +282,会计师(专家),1.08,本科及以上 +283,会计师(资深),1.08,本科及以上 +284,会计师(主管),1.08,本科及以上 +285,会计师,0.90,本科及以上 +286,高级3D设计师,1.09,大专及以上 +287,3D设计师(专家),1.09,大专及以上 +288,3D设计师(资深),1.09,大专及以上 +289,3D设计师(主管),1.09,大专及以上 +290,3D设计师,0.91,大专及以上 +291,高级风电工程师,1.21,本科及以上 +292,风电工程师(专家),1.21,本科及以上 +293,风电工程师(资深),1.21,本科及以上 +294,风电工程师(主管),1.21,本科及以上 +295,风电工程师,1.01,本科及以上 +296,高级医药代表,1.13,大专及以上 +297,医药代表(专家),1.13,大专及以上 +298,医药代表(资深),1.13,大专及以上 +299,医药代表(主管),1.13,大专及以上 +300,医药代表,0.94,大专及以上 +301,高级新媒体运营,1.04,大专及以上 +302,新媒体运营(专家),1.04,大专及以上 +303,新媒体运营(资深),1.04,大专及以上 +304,新媒体运营(主管),1.04,大专及以上 +305,新媒体运营,0.87,大专及以上 +306,高级电力工程师,1.06,本科及以上 +307,电力工程师(专家),1.06,本科及以上 +308,电力工程师(资深),1.06,本科及以上 +309,电力工程师(主管),1.06,本科及以上 +310,电力工程师,0.88,本科及以上 +311,高级电子/电器维修/保养工程师,0.98,高中及以上 +312,电子/电器维修/保养工程师(专家),0.98,高中及以上 +313,电子/电器维修/保养工程师(资深),0.98,高中及以上 +314,电子/电器维修/保养工程师(主管),0.98,高中及以上 +315,电子/电器维修/保养工程师,0.82,高中及以上 +316,高级CAD设计/制图工程师,0.90,大专及以上 +317,CAD设计/制图工程师(专家),0.90,大专及以上 +318,CAD设计/制图工程师(资深),0.90,大专及以上 +319,CAD设计/制图工程师(主管),0.90,大专及以上 +320,CAD设计/制图工程师,0.75,大专及以上 +321,高级短视频运营,0.86,大专及以上 +322,短视频运营(专家),0.86,大专及以上 +323,短视频运营(资深),0.86,大专及以上 +324,短视频运营(主管),0.86,大专及以上 +325,短视频运营,0.72,大专及以上 +326,高级养老护理员,0.96,初中及以上 +327,养老护理员(专家),0.96,初中及以上 +328,养老护理员(资深),0.96,初中及以上 +329,养老护理员(主管),0.96,初中及以上 +330,养老护理员,0.80,初中及以上 +331,高级应急救援员,0.96,高中及以上 +332,应急救援员(专家),0.96,高中及以上 +333,应急救援员(资深),0.96,高中及以上 +334,应急救援员(主管),0.96,高中及以上 +335,应急救援员,0.80,高中及以上 +336,高级纺织针织印染人员,0.96,初中及以上 +337,纺织针织印染人员(专家),0.96,初中及以上 +338,纺织针织印染人员(资深),0.96,初中及以上 +339,纺织针织印染人员(主管),0.96,初中及以上 +340,纺织针织印染人员,0.80,初中及以上 +341,高级教师,0.96,本科及以上 +342,教师(专家),0.96,本科及以上 +343,教师(资深),0.96,本科及以上 +344,教师(主管),0.96,本科及以上 +345,教师,0.80,本科及以上 +346,高级办事人员和有关人员,0.94,大专及以上 +347,办事人员和有关人员(专家),0.94,大专及以上 +348,办事人员和有关人员(资深),0.94,大专及以上 +349,办事人员和有关人员(主管),0.94,大专及以上 +350,办事人员和有关人员,0.78,大专及以上 +351,高级CNC/数控操作,1.01,高中及以上 +352,CNC/数控操作(专家),1.01,高中及以上 +353,CNC/数控操作(资深),1.01,高中及以上 +354,CNC/数控操作(主管),1.01,高中及以上 +355,CNC/数控操作,0.84,高中及以上 +356,高级UI设计师,0.95,本科及以上 +357,UI设计师(专家),0.95,本科及以上 +358,UI设计师(资深),0.95,本科及以上 +359,UI设计师(主管),0.95,本科及以上 +360,UI设计师,0.79,本科及以上 +361,高级生产辅助人员,0.84,初中及以上 +362,生产辅助人员(专家),0.84,初中及以上 +363,生产辅助人员(资深),0.84,初中及以上 +364,生产辅助人员(主管),0.84,初中及以上 +365,生产辅助人员,0.70,初中及以上 +366,高级消防设施操作员,0.84,初中及以上 +367,消防设施操作员(专家),0.84,初中及以上 +368,消防设施操作员(资深),0.84,初中及以上 +369,消防设施操作员(主管),0.84,初中及以上 +370,消防设施操作员,0.70,初中及以上 +371,高级护士,0.84,大专及以上 +372,护士(专家),0.84,大专及以上 +373,护士(资深),0.84,大专及以上 +374,护士(主管),0.84,大专及以上 +375,护士,0.70,大专及以上 +376,高级医疗器械维修/保养员,0.84,大专及以上 +377,医疗器械维修/保养员(专家),0.84,大专及以上 +378,医疗器械维修/保养员(资深),0.84,大专及以上 +379,医疗器械维修/保养员(主管),0.84,大专及以上 +380,医疗器械维修/保养员,0.70,大专及以上 +381,高级药品生产/质量管理员,0.88,大专及以上 +382,药品生产/质量管理员(专家),0.88,大专及以上 +383,药品生产/质量管理员(资深),0.88,大专及以上 +384,药品生产/质量管理员(主管),0.88,大专及以上 +385,药品生产/质量管理员,0.73,大专及以上 +386,高级康复治疗师,0.89,本科及以上 +387,康复治疗师(专家),0.89,本科及以上 +388,康复治疗师(资深),0.89,本科及以上 +389,康复治疗师(主管),0.89,本科及以上 +390,康复治疗师,0.74,本科及以上 +391,高级商品营业员,0.72,初中及以上 +392,商品营业员(专家),0.72,初中及以上 +393,商品营业员(资深),0.72,初中及以上 +394,商品营业员(主管),0.72,初中及以上 +395,商品营业员,0.60,初中及以上 +396,高级家政服务员,0.72,初中及以上 +397,家政服务员(专家),0.72,初中及以上 +398,家政服务员(资深),0.72,初中及以上 +399,家政服务员(主管),0.72,初中及以上 +400,家政服务员,0.60,初中及以上 +401,高级社会生产服务和生活服务人员,0.78,高中及以上 +402,社会生产服务和生活服务人员(专家),0.78,高中及以上 +403,社会生产服务和生活服务人员(资深),0.78,高中及以上 +404,社会生产服务和生活服务人员(主管),0.78,高中及以上 +405,社会生产服务和生活服务人员,0.65,高中及以上 +406,高级生产制造及有关人员,0.78,高中及以上 +407,生产制造及有关人员(专家),0.78,高中及以上 +408,生产制造及有关人员(资深),0.78,高中及以上 +409,生产制造及有关人员(主管),0.78,高中及以上 +410,生产制造及有关人员,0.65,高中及以上 +411,高级医学检验师,0.70,本科及以上 +412,医学检验师(专家),0.70,本科及以上 +413,医学检验师(资深),0.70,本科及以上 +414,医学检验师(主管),0.70,本科及以上 +415,医学检验师,0.58,本科及以上 +416,高级化验/检验员,0.66,大专及以上 +417,化验/检验员(专家),0.66,大专及以上 +418,化验/检验员(资深),0.66,大专及以上 +419,化验/检验员(主管),0.66,大专及以上 +420,化验/检验员,0.55,大专及以上 +421,高级药店店员,0.56,高中及以上 +422,药店店员(专家),0.56,高中及以上 +423,药店店员(资深),0.56,高中及以上 +424,药店店员(主管),0.56,高中及以上 +425,药店店员,0.47,高中及以上 +426,架构师,5.84,硕士及以上 +427,机器学习工程师,4.66,硕士及以上 +428,深度学习工程师,4.47,硕士及以上 +429,算法工程师,4.46,硕士及以上 +430,系统工程师,4.16,硕士及以上 +431,IT技术/研发总监,3.13,硕士及以上 +432,模拟芯片设计工程师,3.10,硕士及以上 +433,大模型算法工程师,2.48,硕士及以上 +434,C/C++开发工程师,2.01,本科及以上 +435,智能驾驶系统工程师,2.11,本科及以上 +436,嵌入式软件开发工程师,1.86,本科及以上 +437,Java开发工程师,1.85,本科及以上 +438,IT项目经理,1.81,本科及以上 +439,企业安全主管,1.80,本科及以上 +440,建筑师,1.80,本科及以上 +441,Python开发工程师,1.79,本科及以上 +442,产品经理,1.77,本科及以上 +443,移动开发工程师,1.77,本科及以上 +444,中层及以上管理人员,1.69,本科及以上 +445,安全工程师,1.50,本科及以上 +446,医生,1.50,硕士及以上 +447,硬件工程师,1.62,本科及以上 +448,数据开发工程师,1.60,本科及以上 +449,前端开发工程师,1.51,本科及以上 +450,机械结构工程师,1.39,本科及以上 +451,专业技术人员,1.23,本科及以上 +452,制剂研发师,1.30,硕士及以上 +453,涂料研发,1.22,硕士及以上 +454,风险评估专员,1.20,本科及以上 +455,数据分析师,1.20,本科及以上 +456,律师,1.20,本科及以上 +457,市场营销经理,1.20,本科及以上 +458,财务经理,1.20,本科及以上 +459,半导体设备工程师,1.21,本科及以上 +460,材料工艺工程师,1.11,本科及以上 +461,医药化学分析师,1.06,本科及以上 +462,电子工程师,1.06,本科及以上 +463,医学信息专员,1.03,本科及以上 +464,电气工程师,1.09,本科及以上 +465,热能工程师,1.09,本科及以上 +466,硬件测试工程师,1.07,本科及以上 +467,自动化工程师,1.04,本科及以上 +468,网络销售员,1.04,高中及以上 +469,机械工艺工程师,1.02,本科及以上 +470,人力资源经理,1.00,本科及以上 +471,土木工程师,1.00,本科及以上 +472,康复护理员,1.00,大专及以上 +473,核力/火力工程师,0.96,本科及以上 +474,CNC/数控编程,0.96,大专及以上 +475,机器人调试工程师,0.97,大专及以上 +476,运维工程师,0.99,本科及以上 +477,网络工程师,0.99,本科及以上 +478,设备维护工程师,0.93,大专及以上 +479,国内电商运营,0.92,大专及以上 +480,营销员,0.90,高中及以上 +481,老年社工,0.90,本科及以上 +482,会计师,0.90,本科及以上 +483,3D设计师,0.91,大专及以上 +484,风电工程师,1.01,本科及以上 +485,医药代表,0.94,大专及以上 +486,新媒体运营,0.87,大专及以上 +487,电力工程师,0.88,本科及以上 +488,电子/电器维修/保养工程师,0.82,高中及以上 +489,CAD设计/制图工程师,0.75,大专及以上 +490,短视频运营,0.72,大专及以上 +491,养老护理员,0.80,初中及以上 +492,应急救援员,0.80,高中及以上 +493,纺织针织印染人员,0.80,初中及以上 +494,教师,0.80,本科及以上 +495,办事人员和有关人员,0.78,大专及以上 +496,CNC/数控操作,0.84,高中及以上 +497,UI设计师,0.79,本科及以上 +498,生产辅助人员,0.70,初中及以上 +499,消防设施操作员,0.70,初中及以上 +500,护士,0.70,大专及以上 \ No newline at end of file diff --git a/w6/0b57d2afff244774fc4f1b8f43afccd7~tplv-a9rns2rl98-pc_smart_face_crop-v1_512_384.png b/w6/0b57d2afff244774fc4f1b8f43afccd7~tplv-a9rns2rl98-pc_smart_face_crop-v1_512_384.png new file mode 100644 index 0000000..676477e Binary files /dev/null and b/w6/0b57d2afff244774fc4f1b8f43afccd7~tplv-a9rns2rl98-pc_smart_face_crop-v1_512_384.png differ