You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
124 lines
3.4 KiB
124 lines
3.4 KiB
package java01;
|
|
|
|
import java.util.List;
|
|
import java.util.regex.Matcher;
|
|
import java.util.regex.Pattern;
|
|
|
|
public class DataCleaner {
|
|
|
|
public List<Job> cleanJobs(List<Job> jobs) {
|
|
for (Job job : jobs) {
|
|
cleanJob(job);
|
|
}
|
|
return jobs;
|
|
}
|
|
|
|
private void cleanJob(Job job) {
|
|
// 清理职位标题
|
|
if (job.getTitle() != null) {
|
|
job.setTitle(job.getTitle().trim());
|
|
}
|
|
|
|
// 清理公司名称
|
|
if (job.getCompany() != null) {
|
|
job.setCompany(job.getCompany().trim());
|
|
}
|
|
|
|
// 清理薪资
|
|
if (job.getSalary() != null) {
|
|
job.setSalary(job.getSalary().trim());
|
|
}
|
|
|
|
// 清理地点
|
|
if (job.getLocation() != null) {
|
|
job.setLocation(job.getLocation().trim());
|
|
}
|
|
|
|
// 清理经验
|
|
if (job.getExperience() != null) {
|
|
job.setExperience(job.getExperience().trim());
|
|
}
|
|
|
|
// 清理学历
|
|
if (job.getEducation() != null) {
|
|
job.setEducation(job.getEducation().trim());
|
|
}
|
|
|
|
// 清理技能
|
|
if (job.getSkills() != null) {
|
|
job.setSkills(job.getSkills().trim());
|
|
}
|
|
}
|
|
|
|
// 提取薪资范围的最小值
|
|
public int extractMinSalary(String salary) {
|
|
if (salary == null || salary.isEmpty()) {
|
|
return 0;
|
|
}
|
|
|
|
// 匹配薪资范围,如"10K-20K"
|
|
Pattern pattern = Pattern.compile("(\\d+)K-(\\d+)K");
|
|
Matcher matcher = pattern.matcher(salary);
|
|
|
|
if (matcher.find()) {
|
|
return Integer.parseInt(matcher.group(1)) * 1000;
|
|
}
|
|
|
|
// 匹配固定薪资,如"15K"
|
|
pattern = Pattern.compile("(\\d+)K");
|
|
matcher = pattern.matcher(salary);
|
|
if (matcher.find()) {
|
|
return Integer.parseInt(matcher.group(1)) * 1000;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
// 提取薪资范围的最大值
|
|
public int extractMaxSalary(String salary) {
|
|
if (salary == null || salary.isEmpty()) {
|
|
return 0;
|
|
}
|
|
|
|
// 匹配薪资范围,如"10K-20K"
|
|
Pattern pattern = Pattern.compile("(\\d+)K-(\\d+)K");
|
|
Matcher matcher = pattern.matcher(salary);
|
|
|
|
if (matcher.find()) {
|
|
return Integer.parseInt(matcher.group(2)) * 1000;
|
|
}
|
|
|
|
// 匹配固定薪资,如"15K"
|
|
pattern = Pattern.compile("(\\d+)K");
|
|
matcher = pattern.matcher(salary);
|
|
if (matcher.find()) {
|
|
return Integer.parseInt(matcher.group(1)) * 1000;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
// 提取经验年限
|
|
public int extractExperienceYears(String experience) {
|
|
if (experience == null || experience.isEmpty()) {
|
|
return 0;
|
|
}
|
|
|
|
// 匹配经验年限,如"3-5年"
|
|
Pattern pattern = Pattern.compile("(\\d+)-(\\d+)年");
|
|
Matcher matcher = pattern.matcher(experience);
|
|
|
|
if (matcher.find()) {
|
|
return Integer.parseInt(matcher.group(1));
|
|
}
|
|
|
|
// 匹配固定经验,如"3年以上"
|
|
pattern = Pattern.compile("(\\d+)年");
|
|
matcher = pattern.matcher(experience);
|
|
if (matcher.find()) {
|
|
return Integer.parseInt(matcher.group(1));
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
}
|