package java01; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; public class DataCleaner { public List cleanJobs(List jobs) { for (Job job : jobs) { cleanJob(job); } return jobs; } private void cleanJob(Job job) { // 清理职位标题 if (job.getTitle() != null) { job.setTitle(job.getTitle().trim()); } // 清理公司名称 if (job.getCompany() != null) { job.setCompany(job.getCompany().trim()); } // 清理薪资 if (job.getSalary() != null) { job.setSalary(job.getSalary().trim()); } // 清理地点 if (job.getLocation() != null) { job.setLocation(job.getLocation().trim()); } // 清理经验 if (job.getExperience() != null) { job.setExperience(job.getExperience().trim()); } // 清理学历 if (job.getEducation() != null) { job.setEducation(job.getEducation().trim()); } // 清理技能 if (job.getSkills() != null) { job.setSkills(job.getSkills().trim()); } } // 提取薪资范围的最小值 public int extractMinSalary(String salary) { if (salary == null || salary.isEmpty()) { return 0; } // 匹配薪资范围,如"10K-20K" Pattern pattern = Pattern.compile("(\\d+)K-(\\d+)K"); Matcher matcher = pattern.matcher(salary); if (matcher.find()) { return Integer.parseInt(matcher.group(1)) * 1000; } // 匹配固定薪资,如"15K" pattern = Pattern.compile("(\\d+)K"); matcher = pattern.matcher(salary); if (matcher.find()) { return Integer.parseInt(matcher.group(1)) * 1000; } return 0; } // 提取薪资范围的最大值 public int extractMaxSalary(String salary) { if (salary == null || salary.isEmpty()) { return 0; } // 匹配薪资范围,如"10K-20K" Pattern pattern = Pattern.compile("(\\d+)K-(\\d+)K"); Matcher matcher = pattern.matcher(salary); if (matcher.find()) { return Integer.parseInt(matcher.group(2)) * 1000; } // 匹配固定薪资,如"15K" pattern = Pattern.compile("(\\d+)K"); matcher = pattern.matcher(salary); if (matcher.find()) { return Integer.parseInt(matcher.group(1)) * 1000; } return 0; } // 提取经验年限 public int extractExperienceYears(String experience) { if (experience == null || experience.isEmpty()) { return 0; } // 匹配经验年限,如"3-5年" Pattern pattern = Pattern.compile("(\\d+)-(\\d+)年"); Matcher matcher = pattern.matcher(experience); if (matcher.find()) { return Integer.parseInt(matcher.group(1)); } // 匹配固定经验,如"3年以上" pattern = Pattern.compile("(\\d+)年"); matcher = pattern.matcher(experience); if (matcher.find()) { return Integer.parseInt(matcher.group(1)); } return 0; } }