java/HttpUtil.java


								package util;


								import java.io.*;

								import java.net.*;

								import java.util.zip.GZIPInputStream;

								import exception.*;


								public class HttpUtil {

								    private static final int TIMEOUT = 10000;

								    private static final String USER_AGENT =

								        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36";


								    public static String get(String urlStr, String encoding) throws SpiderException {

								        HttpURLConnection connection = null;

								        BufferedReader reader = null;


								        try {

								            URL url = new URL(urlStr);

								            connection = (HttpURLConnection) url.openConnection();


								            connection.setRequestMethod("GET");

								            connection.setConnectTimeout(TIMEOUT);

								            connection.setReadTimeout(TIMEOUT);

								            connection.setRequestProperty("User-Agent", USER_AGENT);

								            connection.setRequestProperty("Accept-Encoding", "gzip, deflate");


								            int responseCode = connection.getResponseCode();

								            if (responseCode != HttpURLConnection.HTTP_OK) {

								                throw new NetworkException("HTTP响应错误: " + responseCode,

								                    NetworkException.ErrorType.RESPONSE_ERROR);

								            }


								            String contentEncoding = connection.getContentEncoding();

								            InputStream inputStream = connection.getInputStream();


								            if (contentEncoding != null && contentEncoding.toLowerCase().contains("gzip")) {

								                inputStream = new GZIPInputStream(inputStream);

								            }


								            reader = new BufferedReader(new InputStreamReader(inputStream, encoding));

								            StringBuilder result = new StringBuilder();

								            String line;


								            while ((line = reader.readLine()) != null) {

								                result.append(line).append("\n");

								            }


								            return result.toString();


								        } catch (MalformedURLException e) {

								            throw new NetworkException("URL格式错误: " + urlStr,

								                NetworkException.ErrorType.HOST_NOT_FOUND, e);

								        } catch (SocketTimeoutException e) {

								            throw new NetworkException("连接超时: " + urlStr,

								                NetworkException.ErrorType.CONNECTION_TIMEOUT, e);

								        } catch (IOException e) {

								            throw new NetworkException("网络IO错误: " + e.getMessage(),

								                NetworkException.ErrorType.CONNECTION_REFUSED, e);

								        } finally {

								            if (reader != null) {

								                try { reader.close(); } catch (IOException e) {}

								            }

								            if (connection != null) {

								                connection.disconnect();

								            }

								        }

								    }


								    public static String extractTag(String html, String startTag, String endTag)

								            throws ParseException {

								        int startIndex = html.indexOf(startTag);

								        if (startIndex == -1) {

								            throw new ParseException("未找到开始标签: " + startTag,

								                ParseException.ErrorType.TAG_NOT_FOUND);

								        }


								        int endIndex = html.indexOf(endTag, startIndex + startTag.length());

								        if (endIndex == -1) {

								            throw new ParseException("未找到结束标签: " + endTag,

								                ParseException.ErrorType.TAG_NOT_FOUND);

								        }


								        return html.substring(startIndex + startTag.length(), endIndex).trim();

								    }


								    public static String extractTagSafe(String html, String startTag, String endTag) {

								        try {

								            return extractTag(html, startTag, endTag);

								        } catch (ParseException e) {

								            return "未找到";

								        }

								    }

								}