10 changed files with 0 additions and 810 deletions
|
@ -1,30 +0,0 @@ |
|||||
import project.bean.Movie; |
|
||||
import project.crawler.MovieCrawler; |
|
||||
import project.utils.DataStorage; |
|
||||
import project.display.ResultDisplay; |
|
||||
|
|
||||
import java.util.List; |
|
||||
|
|
||||
public class Main { |
|
||||
public static void main(String[] args) { |
|
||||
try { |
|
||||
System.out.println("Starting to crawl movie data..."); |
|
||||
List<Movie> movies = MovieCrawler.crawlMovies(10); // Crawl 10 pages of data
|
|
||||
System.out.println("Crawling completed, obtained " + movies.size() + " movies data"); |
|
||||
|
|
||||
System.out.println("Saving data to CSV file..."); |
|
||||
DataStorage.saveToCsv(movies, "project/movies.csv"); |
|
||||
System.out.println("Data saved successfully"); |
|
||||
|
|
||||
System.out.println("Analyzing data..."); |
|
||||
ResultDisplay.displayResults(movies); |
|
||||
|
|
||||
System.out.println("Generating charts..."); |
|
||||
ResultDisplay.generateCharts(movies); |
|
||||
System.out.println("Chart generation completed, saved to project directory"); |
|
||||
|
|
||||
} catch (Exception e) { |
|
||||
e.printStackTrace(); |
|
||||
} |
|
||||
} |
|
||||
} |
|
||||
@ -1,101 +0,0 @@ |
|||||
import org.jsoup.Jsoup; |
|
||||
import org.jsoup.Connection; |
|
||||
import org.json.JSONArray; |
|
||||
import org.json.JSONObject; |
|
||||
import org.apache.commons.csv.*; |
|
||||
|
|
||||
import java.io.FileWriter; |
|
||||
import java.io.IOException; |
|
||||
import java.util.ArrayList; |
|
||||
import java.util.List; |
|
||||
|
|
||||
public class JobSpider { |
|
||||
|
|
||||
// ⚠️ 注意:这个 URL 可能会随时间变化,请务必按上面的步骤在 F12 中确认最新的 URL
|
|
||||
// 这里的参数 keyword=Java, page=1 是示例,实际需要根据网站调整
|
|
||||
private static final String API_URL = "https://www.iguopin.com/api/job/search?keyword=&page=1&pageSize=20"; |
|
||||
|
|
||||
public static void main(String[] args) { |
|
||||
List<String[]> jobList = new ArrayList<>(); |
|
||||
|
|
||||
try { |
|
||||
System.out.println("🚀 开始连接国聘网数据接口..."); |
|
||||
|
|
||||
// 1. 构造请求,必须伪装 Header,否则会被拒绝
|
|
||||
String jsonResponse = Jsoup.connect(API_URL) |
|
||||
.userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36") |
|
||||
.header("Accept", "application/json, text/plain, */*") |
|
||||
.header("Referer", "https://www.iguopin.com/") // 假装是从首页跳过来的
|
|
||||
.timeout(5000) |
|
||||
.ignoreContentType(true) // 重要!允许接收非 HTML 内容 (即 JSON)
|
|
||||
.execute() |
|
||||
.body(); |
|
||||
|
|
||||
// 2. 解析 JSON 数据
|
|
||||
JSONObject root = new JSONObject(jsonResponse); |
|
||||
|
|
||||
// ⚠️ 关键:你需要根据 F12 看到的实际 JSON 结构调整这里的键名 (key)
|
|
||||
// 假设数据结构是 { "data": { "list": [...] } } 或者 { "result": [...] }
|
|
||||
// 下面是一个通用的猜测逻辑,请根据实际打印结果修改!
|
|
||||
|
|
||||
JSONArray jobsArray = null; |
|
||||
|
|
||||
// 尝试几种常见的结构 (你需要打印 root.toString() 来确认到底是哪一层)
|
|
||||
if (root.has("data")) { |
|
||||
JSONObject dataObj = root.getJSONObject("data"); |
|
||||
if (dataObj.has("list")) jobsArray = dataObj.getJSONArray("list"); |
|
||||
else if (dataObj.has("jobs")) jobsArray = dataObj.getJSONArray("jobs"); |
|
||||
} else if (root.has("result")) { |
|
||||
jobsArray = root.getJSONArray("result"); |
|
||||
} else if (root.has("jobs")) { |
|
||||
jobsArray = root.getJSONArray("jobs"); |
|
||||
} |
|
||||
|
|
||||
if (jobsArray == null) { |
|
||||
System.err.println("❌ 未找到职位列表数据。JSON 结构可能已变更,请打印查看:\n" + jsonResponse); |
|
||||
return; |
|
||||
} |
|
||||
|
|
||||
System.out.println("✅ 解析成功,共发现 " + jobsArray.length() + " 个职位。"); |
|
||||
|
|
||||
// 3. 提取具体字段
|
|
||||
for (int i = 0; i < jobsArray.length(); i++) { |
|
||||
JSONObject job = jobsArray.getJSONObject(i); |
|
||||
|
|
||||
// ⚠️ 再次强调:这里的 "jobName", "companyName" 必须和你 F12 里看到的一模一样!
|
|
||||
String title = job.optString("jobName", "未知职位"); |
|
||||
String company = job.optString("companyName", "未知公司"); |
|
||||
String salary = job.optString("salary", "面议"); |
|
||||
String location = job.optString("workLocation", "未知地点"); |
|
||||
String link = "https://www.iguopin.com/job/detail/" + job.optString("id"); // 拼接详情页链接
|
|
||||
|
|
||||
jobList.add(new String[]{title, company, salary, location, link}); |
|
||||
System.out.println("[" + (i+1) + "] " + title + " | " + company); |
|
||||
} |
|
||||
|
|
||||
// 4. 保存到 CSV
|
|
||||
saveToCsv(jobList, "guopin_jobs.csv"); |
|
||||
System.out.println("💾 数据已保存至 guopin_jobs.csv"); |
|
||||
|
|
||||
} catch (IOException e) { |
|
||||
e.printStackTrace(); |
|
||||
System.err.println("❌ 网络请求失败:可能是接口地址变了,或者被反爬拦截。"); |
|
||||
} catch (Exception e) { |
|
||||
e.printStackTrace(); |
|
||||
System.err.println("❌ JSON 解析失败:请检查代码中的 key 名称是否与网页返回的一致。"); |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
private static void saveToCsv(List<String[]> data, String fileName) throws IOException { |
|
||||
FileWriter out = new FileWriter(fileName); |
|
||||
// 定义表头
|
|
||||
CSVFormat format = CSVFormat.DEFAULT.withHeader("职位名称", "公司名称", "薪资", "地点", "链接"); |
|
||||
CSVPrinter printer = new CSVPrinter(out, format); |
|
||||
|
|
||||
for (String[] row : data) { |
|
||||
printer.printRecord(row); |
|
||||
} |
|
||||
printer.close(); |
|
||||
out.close(); |
|
||||
} |
|
||||
} |
|
||||
@ -1,42 +0,0 @@ |
|||||
package project.analysis; |
|
||||
|
|
||||
import project.bean.Movie; |
|
||||
|
|
||||
import java.util.*; |
|
||||
import java.util.stream.Collectors; |
|
||||
|
|
||||
public class MovieAnalyzer { |
|
||||
public static Map<Double, Long> getRatingDistribution(List<Movie> movies) { |
|
||||
return movies.stream() |
|
||||
.collect(Collectors.groupingBy(Movie::getRating, Collectors.counting())); |
|
||||
} |
|
||||
|
|
||||
public static Map<Integer, Double> getYearRatingCorrelation(List<Movie> movies) { |
|
||||
return movies.stream() |
|
||||
.collect(Collectors.groupingBy(Movie::getYear, |
|
||||
Collectors.averagingDouble(Movie::getRating))); |
|
||||
} |
|
||||
|
|
||||
public static Map<String, Long> getDirectorMovieCount(List<Movie> movies) { |
|
||||
return movies.stream() |
|
||||
.collect(Collectors.groupingBy(Movie::getDirector, Collectors.counting())) |
|
||||
.entrySet().stream() |
|
||||
.filter(entry -> entry.getValue() > 1) |
|
||||
.sorted(Map.Entry.<String, Long>comparingByValue().reversed()) |
|
||||
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue, (e1, e2) -> e1, LinkedHashMap::new)); |
|
||||
} |
|
||||
|
|
||||
public static double getAverageRating(List<Movie> movies) { |
|
||||
return movies.stream() |
|
||||
.mapToDouble(Movie::getRating) |
|
||||
.average() |
|
||||
.orElse(0.0); |
|
||||
} |
|
||||
|
|
||||
public static List<Movie> getTopRatedMovies(List<Movie> movies, int count) { |
|
||||
return movies.stream() |
|
||||
.sorted(Comparator.comparingDouble(Movie::getRating).reversed()) |
|
||||
.limit(count) |
|
||||
.collect(Collectors.toList()); |
|
||||
} |
|
||||
} |
|
||||
@ -1,60 +0,0 @@ |
|||||
package project.bean; |
|
||||
|
|
||||
public class Movie { |
|
||||
private String title; |
|
||||
private double rating; |
|
||||
private int year; |
|
||||
private String director; |
|
||||
|
|
||||
public Movie() { |
|
||||
} |
|
||||
|
|
||||
public Movie(String title, double rating, int year, String director) { |
|
||||
this.title = title; |
|
||||
this.rating = rating; |
|
||||
this.year = year; |
|
||||
this.director = director; |
|
||||
} |
|
||||
|
|
||||
public String getTitle() { |
|
||||
return title; |
|
||||
} |
|
||||
|
|
||||
public void setTitle(String title) { |
|
||||
this.title = title; |
|
||||
} |
|
||||
|
|
||||
public double getRating() { |
|
||||
return rating; |
|
||||
} |
|
||||
|
|
||||
public void setRating(double rating) { |
|
||||
this.rating = rating; |
|
||||
} |
|
||||
|
|
||||
public int getYear() { |
|
||||
return year; |
|
||||
} |
|
||||
|
|
||||
public void setYear(int year) { |
|
||||
this.year = year; |
|
||||
} |
|
||||
|
|
||||
public String getDirector() { |
|
||||
return director; |
|
||||
} |
|
||||
|
|
||||
public void setDirector(String director) { |
|
||||
this.director = director; |
|
||||
} |
|
||||
|
|
||||
@Override |
|
||||
public String toString() { |
|
||||
return "Movie{" + |
|
||||
"title='" + title + '\'' + |
|
||||
", rating=" + rating + |
|
||||
", year=" + year + |
|
||||
", director='" + director + '\'' + |
|
||||
'}'; |
|
||||
} |
|
||||
} |
|
||||
@ -1,194 +0,0 @@ |
|||||
package project.crawler; |
|
||||
|
|
||||
import project.bean.Movie; |
|
||||
import project.utils.DataCleaner; |
|
||||
import project.utils.HttpUtils; |
|
||||
|
|
||||
import java.util.ArrayList; |
|
||||
import java.util.List; |
|
||||
import java.util.regex.Matcher; |
|
||||
import java.util.regex.Pattern; |
|
||||
|
|
||||
public class MovieCrawler { |
|
||||
public static List<Movie> crawlMovies(int pageCount) throws Exception { |
|
||||
List<Movie> movies = new ArrayList<>(); |
|
||||
|
|
||||
for (int page = 1; page <= pageCount; page++) { |
|
||||
String url = "https://movie.douban.com/top250?start=" + (page - 1) * 25; |
|
||||
System.out.println("Crawling page " + page + " from " + url); |
|
||||
try { |
|
||||
String html = HttpUtils.getHtml(url); |
|
||||
System.out.println("Got HTML content, length: " + html.length()); |
|
||||
|
|
||||
// 打印 HTML 内容的前 500 个字符,了解实际结构
|
|
||||
if (html.length() > 500) { |
|
||||
System.out.println("HTML preview: " + html.substring(0, 500) + "..."); |
|
||||
} |
|
||||
|
|
||||
List<Movie> pageMovies = parseMovies(html); |
|
||||
System.out.println("Parsed " + pageMovies.size() + " movies from page " + page); |
|
||||
movies.addAll(pageMovies); |
|
||||
} catch (Exception e) { |
|
||||
System.out.println("Error crawling page " + page + ": " + e.getMessage()); |
|
||||
} |
|
||||
Thread.sleep(1000); // 控制请求频率
|
|
||||
} |
|
||||
|
|
||||
System.out.println("Total movies crawled: " + movies.size()); |
|
||||
return movies; |
|
||||
} |
|
||||
|
|
||||
private static List<Movie> parseMovies(String html) { |
|
||||
List<Movie> movies = new ArrayList<>(); |
|
||||
|
|
||||
// Find all movie items by looking for <div class="item"> and matching until </div> at the same nesting level
|
|
||||
int startIndex = 0; |
|
||||
int count = 0; |
|
||||
|
|
||||
while (true) { |
|
||||
int itemStart = html.indexOf("<div class=\"item\">", startIndex); |
|
||||
if (itemStart < 0) break; |
|
||||
|
|
||||
// Find the matching </div> by counting nested divs
|
|
||||
int pos = itemStart + "<div class=\"item\">".length(); |
|
||||
int depth = 1; |
|
||||
int itemEnd = -1; |
|
||||
|
|
||||
while (pos < html.length() && depth > 0) { |
|
||||
int nextOpen = html.indexOf("<div", pos); |
|
||||
int nextClose = html.indexOf("</div>", pos); |
|
||||
|
|
||||
if (nextClose < 0) break; // No closing tag found
|
|
||||
|
|
||||
if (nextOpen >= 0 && nextOpen < nextClose) { |
|
||||
// Found an opening div before closing
|
|
||||
depth++; |
|
||||
pos = nextOpen + 4; |
|
||||
} else { |
|
||||
// Found a closing div
|
|
||||
depth--; |
|
||||
if (depth == 0) { |
|
||||
itemEnd = nextClose + 6; |
|
||||
} |
|
||||
pos = nextClose + 6; |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
if (itemEnd > itemStart) { |
|
||||
count++; |
|
||||
String movieHtml = html.substring(itemStart, itemEnd); |
|
||||
// Don't print movie HTML to avoid excessive output
|
|
||||
Movie movie = parseMovie(movieHtml); |
|
||||
if (movie != null) { |
|
||||
movies.add(movie); |
|
||||
} |
|
||||
startIndex = itemEnd; |
|
||||
} else { |
|
||||
break; |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
System.out.println("Found " + count + " movie items, parsed " + movies.size() + " valid movies"); |
|
||||
return movies; |
|
||||
} |
|
||||
|
|
||||
private static Movie parseMovie(String movieHtml) { |
|
||||
try { |
|
||||
// Extract title from img alt attribute
|
|
||||
String title = ""; |
|
||||
int altIndex = movieHtml.indexOf("alt="); |
|
||||
if (altIndex > 0) { |
|
||||
int start = movieHtml.indexOf('"', altIndex); |
|
||||
int end = movieHtml.indexOf('"', start + 1); |
|
||||
if (start > 0 && end > 0) { |
|
||||
title = movieHtml.substring(start + 1, end).trim(); |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
// Extract rating
|
|
||||
double rating = 0.0; |
|
||||
int ratingIndex = movieHtml.indexOf("rating_num"); |
|
||||
if (ratingIndex > 0) { |
|
||||
int start = movieHtml.indexOf('>', ratingIndex); |
|
||||
int end = movieHtml.indexOf("</span>", start); |
|
||||
if (start > 0 && end > 0) { |
|
||||
String ratingStr = movieHtml.substring(start + 1, end).trim(); |
|
||||
try { |
|
||||
rating = Double.parseDouble(ratingStr); |
|
||||
} catch (NumberFormatException e) { |
|
||||
rating = 0.0; |
|
||||
} |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
// Extract year and director from movie info
|
|
||||
int year = 0; |
|
||||
String director = "Unknown"; |
|
||||
|
|
||||
// Find the info section which contains year and director
|
|
||||
// Look for <p> tag without class or with specific class
|
|
||||
int infoStart = -1; |
|
||||
int pStart = movieHtml.indexOf("<p>"); |
|
||||
int pClassStart = movieHtml.indexOf("<p class=\"\">"); |
|
||||
|
|
||||
if (pStart >= 0) { |
|
||||
infoStart = pStart; |
|
||||
} |
|
||||
if (pClassStart >= 0 && (pStart < 0 || pClassStart < pStart)) { |
|
||||
infoStart = pClassStart; |
|
||||
} |
|
||||
|
|
||||
if (infoStart > 0) { |
|
||||
int infoEnd = movieHtml.indexOf("</p>", infoStart); |
|
||||
if (infoEnd > infoStart) { |
|
||||
String infoSection = movieHtml.substring(infoStart, infoEnd); |
|
||||
|
|
||||
// Extract year - look for 4-digit year after <br> tag
|
|
||||
int brIndex = infoSection.indexOf("<br>"); |
|
||||
if (brIndex > 0) { |
|
||||
String afterBr = infoSection.substring(brIndex + 4).trim(); |
|
||||
// Find first 4-digit number
|
|
||||
for (int i = 0; i <= afterBr.length() - 4; i++) { |
|
||||
String possibleYear = afterBr.substring(i, i + 4); |
|
||||
if (possibleYear.matches("\\d{4}")) { |
|
||||
try { |
|
||||
year = Integer.parseInt(possibleYear); |
|
||||
break; |
|
||||
} catch (NumberFormatException e) { |
|
||||
// Continue
|
|
||||
} |
|
||||
} |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
// Extract director - director info is between "导演:" and " "
|
|
||||
// Look for the pattern: 导演: [director name]
|
|
||||
int directorLabelIdx = infoSection.indexOf("\u5bfc\u6f14:"); // Unicode for "导演:"
|
|
||||
if (directorLabelIdx >= 0) { |
|
||||
int directorStart = directorLabelIdx + 3; // Skip "导演:"
|
|
||||
int directorEnd = infoSection.indexOf(" ", directorStart); |
|
||||
if (directorEnd > directorStart) { |
|
||||
director = infoSection.substring(directorStart, directorEnd).trim(); |
|
||||
// Clean up any remaining HTML
|
|
||||
director = director.replaceAll("<[^>]*>", "").trim(); |
|
||||
// Extract only Chinese name (before space)
|
|
||||
int spaceIdx = director.indexOf(" "); |
|
||||
if (spaceIdx > 0) { |
|
||||
director = director.substring(0, spaceIdx).trim(); |
|
||||
} |
|
||||
if (director.isEmpty()) director = "Unknown"; |
|
||||
} |
|
||||
} |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
// If title and rating are valid, create movie object
|
|
||||
if (!title.isEmpty() && rating > 0) { |
|
||||
return new Movie(title, rating, year, director); |
|
||||
} |
|
||||
} catch (Exception e) { |
|
||||
// Silently handle exceptions
|
|
||||
} |
|
||||
return null; |
|
||||
} |
|
||||
} |
|
||||
@ -1,47 +0,0 @@ |
|||||
package project.display; |
|
||||
|
|
||||
import project.bean.Movie; |
|
||||
import project.analysis.MovieAnalyzer; |
|
||||
|
|
||||
import java.util.List; |
|
||||
import java.util.Map; |
|
||||
|
|
||||
public class ResultDisplay { |
|
||||
public static void displayResults(List<Movie> movies) { |
|
||||
System.out.println("===== Movie Data Analysis Results ====="); |
|
||||
System.out.println("Total movies: " + movies.size()); |
|
||||
System.out.printf("Average rating: %.2f\n\n", MovieAnalyzer.getAverageRating(movies)); |
|
||||
|
|
||||
System.out.println("===== Rating Distribution ====="); |
|
||||
Map<Double, Long> ratingDistribution = MovieAnalyzer.getRatingDistribution(movies); |
|
||||
ratingDistribution.entrySet().stream() |
|
||||
.sorted(Map.Entry.comparingByKey()) |
|
||||
.forEach(entry -> System.out.printf("Rating %.1f: %d movies\n", entry.getKey(), entry.getValue())); |
|
||||
|
|
||||
System.out.println("\n===== Year-Rating Correlation ====="); |
|
||||
Map<Integer, Double> yearRating = MovieAnalyzer.getYearRatingCorrelation(movies); |
|
||||
yearRating.entrySet().stream() |
|
||||
.sorted(Map.Entry.comparingByKey()) |
|
||||
.forEach(entry -> System.out.printf("%d: %.2f\n", entry.getKey(), entry.getValue())); |
|
||||
|
|
||||
System.out.println("\n===== Director Movie Count Ranking ====="); |
|
||||
Map<String, Long> directorCount = MovieAnalyzer.getDirectorMovieCount(movies); |
|
||||
directorCount.entrySet().stream() |
|
||||
.limit(10) |
|
||||
.forEach(entry -> System.out.printf("%s: %d movies\n", entry.getKey(), entry.getValue())); |
|
||||
|
|
||||
System.out.println("\n===== Top 10 Highest Rated Movies ====="); |
|
||||
List<Movie> topRated = MovieAnalyzer.getTopRatedMovies(movies, 10); |
|
||||
for (int i = 0; i < topRated.size(); i++) { |
|
||||
Movie movie = topRated.get(i); |
|
||||
System.out.printf("%d. %s (%.1f) - %d - Director: %s\n", |
|
||||
i + 1, movie.getTitle(), movie.getRating(), movie.getYear(), movie.getDirector()); |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
public static void generateCharts(List<Movie> movies) throws Exception { |
|
||||
System.out.println("\n===== Chart Generation ====="); |
|
||||
System.out.println("Due to environment limitations, chart generation is not implemented"); |
|
||||
System.out.println("Suggest using JFreeChart or other chart libraries for visualization"); |
|
||||
} |
|
||||
} |
|
||||
@ -1,29 +0,0 @@ |
|||||
package project.utils; |
|
||||
|
|
||||
public class DataCleaner { |
|
||||
public static String cleanText(String text) { |
|
||||
if (text == null) return ""; |
|
||||
return text.trim() |
|
||||
.replaceAll("<[^>]*>", "") |
|
||||
.replaceAll("\\s+", " ") |
|
||||
.replaceAll("[\\r\\n]", ""); |
|
||||
} |
|
||||
|
|
||||
public static double parseRating(String ratingStr) { |
|
||||
if (ratingStr == null || ratingStr.isEmpty()) return 0.0; |
|
||||
try { |
|
||||
return Double.parseDouble(ratingStr.trim()); |
|
||||
} catch (NumberFormatException e) { |
|
||||
return 0.0; |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
public static int parseYear(String yearStr) { |
|
||||
if (yearStr == null || yearStr.isEmpty()) return 0; |
|
||||
try { |
|
||||
return Integer.parseInt(yearStr.replaceAll("[^0-9]", "")); |
|
||||
} catch (NumberFormatException e) { |
|
||||
return 0; |
|
||||
} |
|
||||
} |
|
||||
} |
|
||||
@ -1,26 +0,0 @@ |
|||||
package project.utils; |
|
||||
|
|
||||
import project.bean.Movie; |
|
||||
|
|
||||
import java.io.OutputStreamWriter; |
|
||||
import java.io.FileOutputStream; |
|
||||
import java.io.IOException; |
|
||||
import java.util.List; |
|
||||
|
|
||||
public class DataStorage { |
|
||||
public static void saveToCsv(List<Movie> movies, String filePath) throws IOException { |
|
||||
// Use UTF-8 encoding to properly handle Chinese characters
|
|
||||
OutputStreamWriter writer = new OutputStreamWriter(new FileOutputStream(filePath), "UTF-8"); |
|
||||
writer.write("Title,Rating,Year,Director\n"); |
|
||||
|
|
||||
for (Movie movie : movies) { |
|
||||
writer.write(String.format("%s,%.1f,%d,%s\n", |
|
||||
movie.getTitle(), |
|
||||
movie.getRating(), |
|
||||
movie.getYear(), |
|
||||
movie.getDirector())); |
|
||||
} |
|
||||
|
|
||||
writer.close(); |
|
||||
} |
|
||||
} |
|
||||
@ -1,30 +0,0 @@ |
|||||
package project.utils; |
|
||||
|
|
||||
import java.io.BufferedReader; |
|
||||
import java.io.InputStreamReader; |
|
||||
import java.net.HttpURLConnection; |
|
||||
import java.net.URL; |
|
||||
|
|
||||
public class HttpUtils { |
|
||||
public static String getHtml(String url) throws Exception { |
|
||||
URL obj = new URL(url); |
|
||||
HttpURLConnection con = (HttpURLConnection) obj.openConnection(); |
|
||||
con.setRequestMethod("GET"); |
|
||||
con.setRequestProperty("User-Agent", "Mozilla/5.0"); |
|
||||
|
|
||||
int responseCode = con.getResponseCode(); |
|
||||
if (responseCode != HttpURLConnection.HTTP_OK) { |
|
||||
throw new Exception("HTTP error code: " + responseCode); |
|
||||
} |
|
||||
|
|
||||
BufferedReader in = new BufferedReader(new InputStreamReader(con.getInputStream(), "UTF-8")); |
|
||||
String inputLine; |
|
||||
StringBuilder html = new StringBuilder(); |
|
||||
|
|
||||
while ((inputLine = in.readLine()) != null) { |
|
||||
html.append(inputLine); |
|
||||
} |
|
||||
in.close(); |
|
||||
return html.toString(); |
|
||||
} |
|
||||
} |
|
||||
Loading…
Reference in new issue