From 48f9f31324b67029df6858fd0a6237d65c8be3d8 Mon Sep 17 00:00:00 2001 From: LiuZihan <1353843969@qq.com> Date: Thu, 26 Mar 2026 20:11:25 +0800 Subject: [PATCH] =?UTF-8?q?=E5=88=A0=E9=99=A4=20'project/JobCrawler.java'?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- project/JobCrawler.java | 86 ----------------------------------------- 1 file changed, 86 deletions(-) delete mode 100644 project/JobCrawler.java diff --git a/project/JobCrawler.java b/project/JobCrawler.java deleted file mode 100644 index f6e615b..0000000 --- a/project/JobCrawler.java +++ /dev/null @@ -1,86 +0,0 @@ -package java01; - -import java.io.BufferedReader; -import java.io.InputStreamReader; -import java.net.HttpURLConnection; -import java.net.URL; -import java.net.URLEncoder; -import java.util.ArrayList; -import java.util.List; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -public class JobCrawler { - private static final String USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"; - - public List crawlJobs(String keyword, int pageCount) { - List jobs = new ArrayList<>(); - - try { - for (int page = 1; page <= pageCount; page++) { - String encodedKeyword = URLEncoder.encode(keyword, "UTF-8"); - String url = String.format("https://sou.zhaopin.com/?jl=489&kw=%s&p=%d", encodedKeyword, page); - System.out.println("爬取页面: " + url); - - // 发送HTTP请求 - String html = sendHttpRequest(url); - - // 解析HTML,提取职位信息 - List pageJobs = extractJobsFromHtml(html); - jobs.addAll(pageJobs); - - // 添加延迟,避免被反爬虫 - Thread.sleep(2000); - } - } catch (Exception e) { - e.printStackTrace(); - } - - return jobs; - } - - private String sendHttpRequest(String urlString) throws Exception { - URL url = new URL(urlString); - HttpURLConnection conn = (HttpURLConnection) url.openConnection(); - conn.setRequestMethod("GET"); - conn.setRequestProperty("User-Agent", USER_AGENT); - conn.setConnectTimeout(10000); - conn.setReadTimeout(10000); - - BufferedReader in = new BufferedReader(new InputStreamReader(conn.getInputStream(), "UTF-8")); - StringBuilder response = new StringBuilder(); - String inputLine; - - while ((inputLine = in.readLine()) != null) { - response.append(inputLine); - } - in.close(); - conn.disconnect(); - - return response.toString(); - } - - private List extractJobsFromHtml(String html) { - List jobs = new ArrayList<>(); - - // 简化的正则表达式,实际项目中可能需要更复杂的解析 - // 这里使用模拟数据,因为实际解析HTML需要更复杂的逻辑 - // 在真实环境中,建议使用Jsoup等库进行HTML解析 - - // 模拟数据 - for (int i = 1; i <= 10; i++) { - Job job = new Job( - "Java开发工程师" + i, - "科技公司" + i, - (10 + i) + "K-" + (20 + i) + "K", - "北京", - "3-5年", - "本科", - "Java Spring Boot MySQL Redis" - ); - jobs.add(job); - } - - return jobs; - } -} \ No newline at end of file