2 changed files with 101 additions and 0 deletions
@ -0,0 +1,101 @@ |
|||
import org.jsoup.Jsoup; |
|||
import org.jsoup.Connection; |
|||
import org.json.JSONArray; |
|||
import org.json.JSONObject; |
|||
import org.apache.commons.csv.*; |
|||
|
|||
import java.io.FileWriter; |
|||
import java.io.IOException; |
|||
import java.util.ArrayList; |
|||
import java.util.List; |
|||
|
|||
public class JobSpider { |
|||
|
|||
// ⚠️ 注意:这个 URL 可能会随时间变化,请务必按上面的步骤在 F12 中确认最新的 URL
|
|||
// 这里的参数 keyword=Java, page=1 是示例,实际需要根据网站调整
|
|||
private static final String API_URL = "https://www.iguopin.com/api/job/search?keyword=&page=1&pageSize=20"; |
|||
|
|||
public static void main(String[] args) { |
|||
List<String[]> jobList = new ArrayList<>(); |
|||
|
|||
try { |
|||
System.out.println("🚀 开始连接国聘网数据接口..."); |
|||
|
|||
// 1. 构造请求,必须伪装 Header,否则会被拒绝
|
|||
String jsonResponse = Jsoup.connect(API_URL) |
|||
.userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36") |
|||
.header("Accept", "application/json, text/plain, */*") |
|||
.header("Referer", "https://www.iguopin.com/") // 假装是从首页跳过来的
|
|||
.timeout(5000) |
|||
.ignoreContentType(true) // 重要!允许接收非 HTML 内容 (即 JSON)
|
|||
.execute() |
|||
.body(); |
|||
|
|||
// 2. 解析 JSON 数据
|
|||
JSONObject root = new JSONObject(jsonResponse); |
|||
|
|||
// ⚠️ 关键:你需要根据 F12 看到的实际 JSON 结构调整这里的键名 (key)
|
|||
// 假设数据结构是 { "data": { "list": [...] } } 或者 { "result": [...] }
|
|||
// 下面是一个通用的猜测逻辑,请根据实际打印结果修改!
|
|||
|
|||
JSONArray jobsArray = null; |
|||
|
|||
// 尝试几种常见的结构 (你需要打印 root.toString() 来确认到底是哪一层)
|
|||
if (root.has("data")) { |
|||
JSONObject dataObj = root.getJSONObject("data"); |
|||
if (dataObj.has("list")) jobsArray = dataObj.getJSONArray("list"); |
|||
else if (dataObj.has("jobs")) jobsArray = dataObj.getJSONArray("jobs"); |
|||
} else if (root.has("result")) { |
|||
jobsArray = root.getJSONArray("result"); |
|||
} else if (root.has("jobs")) { |
|||
jobsArray = root.getJSONArray("jobs"); |
|||
} |
|||
|
|||
if (jobsArray == null) { |
|||
System.err.println("❌ 未找到职位列表数据。JSON 结构可能已变更,请打印查看:\n" + jsonResponse); |
|||
return; |
|||
} |
|||
|
|||
System.out.println("✅ 解析成功,共发现 " + jobsArray.length() + " 个职位。"); |
|||
|
|||
// 3. 提取具体字段
|
|||
for (int i = 0; i < jobsArray.length(); i++) { |
|||
JSONObject job = jobsArray.getJSONObject(i); |
|||
|
|||
// ⚠️ 再次强调:这里的 "jobName", "companyName" 必须和你 F12 里看到的一模一样!
|
|||
String title = job.optString("jobName", "未知职位"); |
|||
String company = job.optString("companyName", "未知公司"); |
|||
String salary = job.optString("salary", "面议"); |
|||
String location = job.optString("workLocation", "未知地点"); |
|||
String link = "https://www.iguopin.com/job/detail/" + job.optString("id"); // 拼接详情页链接
|
|||
|
|||
jobList.add(new String[]{title, company, salary, location, link}); |
|||
System.out.println("[" + (i+1) + "] " + title + " | " + company); |
|||
} |
|||
|
|||
// 4. 保存到 CSV
|
|||
saveToCsv(jobList, "guopin_jobs.csv"); |
|||
System.out.println("💾 数据已保存至 guopin_jobs.csv"); |
|||
|
|||
} catch (IOException e) { |
|||
e.printStackTrace(); |
|||
System.err.println("❌ 网络请求失败:可能是接口地址变了,或者被反爬拦截。"); |
|||
} catch (Exception e) { |
|||
e.printStackTrace(); |
|||
System.err.println("❌ JSON 解析失败:请检查代码中的 key 名称是否与网页返回的一致。"); |
|||
} |
|||
} |
|||
|
|||
private static void saveToCsv(List<String[]> data, String fileName) throws IOException { |
|||
FileWriter out = new FileWriter(fileName); |
|||
// 定义表头
|
|||
CSVFormat format = CSVFormat.DEFAULT.withHeader("职位名称", "公司名称", "薪资", "地点", "链接"); |
|||
CSVPrinter printer = new CSVPrinter(out, format); |
|||
|
|||
for (String[] row : data) { |
|||
printer.printRecord(row); |
|||
} |
|||
printer.close(); |
|||
out.close(); |
|||
} |
|||
} |
|||
Binary file not shown.
Loading…
Reference in new issue