You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

142 lines
5.4 KiB

package java01;
import java.util.*;
import java.util.stream.Collectors;
public class DataAnalyzer {
private DataCleaner cleaner;
public DataAnalyzer() {
this.cleaner = new DataCleaner();
}
// 统计技能词频
public Map<String, Integer> analyzeSkillFrequency(List<Job> jobs) {
Map<String, Integer> skillMap = new HashMap<>();
for (Job job : jobs) {
String skills = job.getSkills();
if (skills != null && !skills.isEmpty()) {
String[] skillArray = skills.split(" ");
for (String skill : skillArray) {
if (!skill.isEmpty()) {
skillMap.put(skill, skillMap.getOrDefault(skill, 0) + 1);
}
}
}
}
// 按词频排序
return skillMap.entrySet().stream()
.sorted(Map.Entry.<String, Integer>comparingByValue().reversed())
.collect(Collectors.toMap(
Map.Entry::getKey,
Map.Entry::getValue,
(e1, e2) -> e1,
LinkedHashMap::new
));
}
// 分析薪资与经验的关系
public Map<String, Double> analyzeSalaryByExperience(List<Job> jobs) {
Map<String, List<Integer>> experienceSalaryMap = new HashMap<>();
for (Job job : jobs) {
String experience = job.getExperience();
int minSalary = cleaner.extractMinSalary(job.getSalary());
int maxSalary = cleaner.extractMaxSalary(job.getSalary());
int avgSalary = (minSalary + maxSalary) / 2;
if (!experience.isEmpty() && avgSalary > 0) {
experienceSalaryMap.computeIfAbsent(experience, k -> new ArrayList<>()).add(avgSalary);
}
}
// 计算每个经验级别的平均薪资
Map<String, Double> result = new HashMap<>();
for (Map.Entry<String, List<Integer>> entry : experienceSalaryMap.entrySet()) {
double avgSalary = entry.getValue().stream().mapToInt(Integer::intValue).average().orElse(0);
result.put(entry.getKey(), avgSalary);
}
return result;
}
// 分析薪资与学历的关系
public Map<String, Double> analyzeSalaryByEducation(List<Job> jobs) {
Map<String, List<Integer>> educationSalaryMap = new HashMap<>();
for (Job job : jobs) {
String education = job.getEducation();
int minSalary = cleaner.extractMinSalary(job.getSalary());
int maxSalary = cleaner.extractMaxSalary(job.getSalary());
int avgSalary = (minSalary + maxSalary) / 2;
if (!education.isEmpty() && avgSalary > 0) {
educationSalaryMap.computeIfAbsent(education, k -> new ArrayList<>()).add(avgSalary);
}
}
// 计算每个学历级别的平均薪资
Map<String, Double> result = new HashMap<>();
for (Map.Entry<String, List<Integer>> entry : educationSalaryMap.entrySet()) {
double avgSalary = entry.getValue().stream().mapToInt(Integer::intValue).average().orElse(0);
result.put(entry.getKey(), avgSalary);
}
return result;
}
// 分析不同地点的薪资水平
public Map<String, Double> analyzeSalaryByLocation(List<Job> jobs) {
Map<String, List<Integer>> locationSalaryMap = new HashMap<>();
for (Job job : jobs) {
String location = job.getLocation();
int minSalary = cleaner.extractMinSalary(job.getSalary());
int maxSalary = cleaner.extractMaxSalary(job.getSalary());
int avgSalary = (minSalary + maxSalary) / 2;
if (!location.isEmpty() && avgSalary > 0) {
locationSalaryMap.computeIfAbsent(location, k -> new ArrayList<>()).add(avgSalary);
}
}
// 计算每个地点的平均薪资
Map<String, Double> result = new HashMap<>();
for (Map.Entry<String, List<Integer>> entry : locationSalaryMap.entrySet()) {
double avgSalary = entry.getValue().stream().mapToInt(Integer::intValue).average().orElse(0);
result.put(entry.getKey(), avgSalary);
}
return result;
}
// 获取薪资分布
public Map<String, Integer> analyzeSalaryDistribution(List<Job> jobs) {
Map<String, Integer> salaryDistribution = new HashMap<>();
for (Job job : jobs) {
int avgSalary = (cleaner.extractMinSalary(job.getSalary()) + cleaner.extractMaxSalary(job.getSalary())) / 2;
String salaryRange;
if (avgSalary < 5000) {
salaryRange = "5K以下";
} else if (avgSalary < 10000) {
salaryRange = "5K-10K";
} else if (avgSalary < 15000) {
salaryRange = "10K-15K";
} else if (avgSalary < 20000) {
salaryRange = "15K-20K";
} else if (avgSalary < 30000) {
salaryRange = "20K-30K";
} else {
salaryRange = "30K以上";
}
salaryDistribution.put(salaryRange, salaryDistribution.getOrDefault(salaryRange, 0) + 1);
}
return salaryDistribution;
}
}