package java01; import java.util.*; import java.util.stream.Collectors; public class DataAnalyzer { private DataCleaner cleaner; public DataAnalyzer() { this.cleaner = new DataCleaner(); } // 统计技能词频 public Map analyzeSkillFrequency(List jobs) { Map skillMap = new HashMap<>(); for (Job job : jobs) { String skills = job.getSkills(); if (skills != null && !skills.isEmpty()) { String[] skillArray = skills.split(" "); for (String skill : skillArray) { if (!skill.isEmpty()) { skillMap.put(skill, skillMap.getOrDefault(skill, 0) + 1); } } } } // 按词频排序 return skillMap.entrySet().stream() .sorted(Map.Entry.comparingByValue().reversed()) .collect(Collectors.toMap( Map.Entry::getKey, Map.Entry::getValue, (e1, e2) -> e1, LinkedHashMap::new )); } // 分析薪资与经验的关系 public Map analyzeSalaryByExperience(List jobs) { Map> experienceSalaryMap = new HashMap<>(); for (Job job : jobs) { String experience = job.getExperience(); int minSalary = cleaner.extractMinSalary(job.getSalary()); int maxSalary = cleaner.extractMaxSalary(job.getSalary()); int avgSalary = (minSalary + maxSalary) / 2; if (!experience.isEmpty() && avgSalary > 0) { experienceSalaryMap.computeIfAbsent(experience, k -> new ArrayList<>()).add(avgSalary); } } // 计算每个经验级别的平均薪资 Map result = new HashMap<>(); for (Map.Entry> entry : experienceSalaryMap.entrySet()) { double avgSalary = entry.getValue().stream().mapToInt(Integer::intValue).average().orElse(0); result.put(entry.getKey(), avgSalary); } return result; } // 分析薪资与学历的关系 public Map analyzeSalaryByEducation(List jobs) { Map> educationSalaryMap = new HashMap<>(); for (Job job : jobs) { String education = job.getEducation(); int minSalary = cleaner.extractMinSalary(job.getSalary()); int maxSalary = cleaner.extractMaxSalary(job.getSalary()); int avgSalary = (minSalary + maxSalary) / 2; if (!education.isEmpty() && avgSalary > 0) { educationSalaryMap.computeIfAbsent(education, k -> new ArrayList<>()).add(avgSalary); } } // 计算每个学历级别的平均薪资 Map result = new HashMap<>(); for (Map.Entry> entry : educationSalaryMap.entrySet()) { double avgSalary = entry.getValue().stream().mapToInt(Integer::intValue).average().orElse(0); result.put(entry.getKey(), avgSalary); } return result; } // 分析不同地点的薪资水平 public Map analyzeSalaryByLocation(List jobs) { Map> locationSalaryMap = new HashMap<>(); for (Job job : jobs) { String location = job.getLocation(); int minSalary = cleaner.extractMinSalary(job.getSalary()); int maxSalary = cleaner.extractMaxSalary(job.getSalary()); int avgSalary = (minSalary + maxSalary) / 2; if (!location.isEmpty() && avgSalary > 0) { locationSalaryMap.computeIfAbsent(location, k -> new ArrayList<>()).add(avgSalary); } } // 计算每个地点的平均薪资 Map result = new HashMap<>(); for (Map.Entry> entry : locationSalaryMap.entrySet()) { double avgSalary = entry.getValue().stream().mapToInt(Integer::intValue).average().orElse(0); result.put(entry.getKey(), avgSalary); } return result; } // 获取薪资分布 public Map analyzeSalaryDistribution(List jobs) { Map salaryDistribution = new HashMap<>(); for (Job job : jobs) { int avgSalary = (cleaner.extractMinSalary(job.getSalary()) + cleaner.extractMaxSalary(job.getSalary())) / 2; String salaryRange; if (avgSalary < 5000) { salaryRange = "5K以下"; } else if (avgSalary < 10000) { salaryRange = "5K-10K"; } else if (avgSalary < 15000) { salaryRange = "10K-15K"; } else if (avgSalary < 20000) { salaryRange = "15K-20K"; } else if (avgSalary < 30000) { salaryRange = "20K-30K"; } else { salaryRange = "30K以上"; } salaryDistribution.put(salaryRange, salaryDistribution.getOrDefault(salaryRange, 0) + 1); } return salaryDistribution; } }