diff --git a/project/202506050327-侯鸿魁-期末实验报告 (1).docx b/project/202506050327-侯鸿魁-期末实验报告 (1).docx new file mode 100644 index 0000000..22b9abe Binary files /dev/null and b/project/202506050327-侯鸿魁-期末实验报告 (1).docx differ diff --git a/project/ChangshaHouseCrawler.java b/project/ChangshaHouseCrawler.java new file mode 100644 index 0000000..ee25d25 --- /dev/null +++ b/project/ChangshaHouseCrawler.java @@ -0,0 +1,290 @@ +import java.util.*; +import java.io.*; + +// ==================== 异常体系 ==================== +class CrawlerException extends Exception { + public CrawlerException(String msg) { super(msg); } + public CrawlerException(String msg, Throwable t) { super(msg, t); } +} + +class NetworkException extends CrawlerException { + public NetworkException(String msg) { super("网络异常: " + msg); } +} + +class ParseException extends CrawlerException { + public ParseException(String msg) { super("解析异常: " + msg); } +} + +class SaveException extends CrawlerException { + public SaveException(String msg) { super("保存异常: " + msg); } +} + +// ==================== 数据模型 (Model) ==================== +class HouseData { + private String title; + private double area; + private double price; + private String district; + private String source; + + public HouseData(String t, double a, double p, String d, String s) { + title = t; area = a; price = p; district = d; source = s; + } + + public String toCSV() { + return String.format("\"%s\",%.1f,%.1f,\"%s\",\"%s\"", title, area, price, district, source); + } + + public static String getCSVHeader() { + return "标题,面积(㎡),总价(万),区域,来源"; + } + + public String getSource() { return source; } +} + +// ==================== 策略模式 (Strategy) ==================== +interface CrawlerStrategy { + String getSiteName(); + List crawl() throws CrawlerException; +} + +class LianJiaCrawler implements CrawlerStrategy { + private Random r = new Random(); + private String[] dists = {"岳麓区", "雨花区", "天心区", "开福区", "芙蓉区"}; + + public String getSiteName() { return "链家"; } + + public List crawl() throws CrawlerException { + List list = new ArrayList<>(); + try { + Thread.sleep(300); + for (int i = 0; i < 10; i++) { + list.add(new HouseData("链家房源" + (i+1), + 60 + r.nextDouble() * 100, + 80 + r.nextDouble() * 150, + dists[r.nextInt(5)], "链家")); + } + return list; + } catch (InterruptedException e) { + throw new CrawlerException("被中断", e); + } + } +} + +class AnJuKeCrawler implements CrawlerStrategy { + private Random r = new Random(); + private String[] dists = {"岳麓区", "雨花区", "天心区", "开福区", "芙蓉区"}; + + public String getSiteName() { return "安居客"; } + + public List crawl() throws CrawlerException { + List list = new ArrayList<>(); + try { + Thread.sleep(250); + for (int i = 0; i < 8; i++) { + list.add(new HouseData("安居客房源" + (i+1), + 70 + r.nextDouble() * 80, + 70 + r.nextDouble() * 120, + dists[r.nextInt(5)], "安居客")); + } + return list; + } catch (InterruptedException e) { + throw new CrawlerException("被中断", e); + } + } +} + +class BeiKeCrawler implements CrawlerStrategy { + private Random r = new Random(); + private String[] dists = {"岳麓区", "雨花区", "天心区", "开福区", "芙蓉区"}; + + public String getSiteName() { return "贝壳"; } + + public List crawl() throws CrawlerException { + List list = new ArrayList<>(); + try { + Thread.sleep(350); + for (int i = 0; i < 12; i++) { + list.add(new HouseData("贝壳房源" + (i+1), + 80 + r.nextDouble() * 90, + 90 + r.nextDouble() * 130, + dists[r.nextInt(5)], "贝壳")); + } + return list; + } catch (InterruptedException e) { + throw new CrawlerException("被中断", e); + } + } +} + +// ==================== Command模式 ==================== +interface Command { + void execute(); +} + +class CrawlCommand implements Command { + private CrawlerStrategy strategy; + private List dataList; + private View view; + + public CrawlCommand(CrawlerStrategy s, List d, View v) { + strategy = s; dataList = d; view = v; + } + + public void execute() { + view.showMessage("正在爬取 " + strategy.getSiteName() + "..."); + try { + List result = strategy.crawl(); + dataList.addAll(result); + view.showSuccess(strategy.getSiteName() + ": " + result.size() + " 条"); + } catch (CrawlerException e) { + view.showError(strategy.getSiteName() + " 失败: " + e.getMessage()); + } + } +} + +class SaveCommand implements Command { + private List dataList; + private View view; + + public SaveCommand(List d, View v) { dataList = d; view = v; } + + public void execute() { + view.showMessage("正在保存数据..."); + try (PrintWriter w = new PrintWriter("changsha_house.csv")) { + w.println(HouseData.getCSVHeader()); + for (HouseData d : dataList) w.println(d.toCSV()); + view.showSuccess("已保存到 changsha_house.csv"); + } catch (FileNotFoundException e) { + view.showError("保存失败: " + e.getMessage()); + } + } +} + +class DisplayCommand implements Command { + private List dataList; + private View view; + + public DisplayCommand(List d, View v) { dataList = d; view = v; } + + public void execute() { + view.displayData(dataList); + } +} + +// ==================== 视图 (View) ==================== +class View { + public void showWelcome() { + System.out.println("╔════════════════════════════════════╗"); + System.out.println("║ 长沙房价爬虫系统 v1.0 ║"); + System.out.println("╚════════════════════════════════════╝"); + System.out.println(); + } + + public void showMenu() { + System.out.println("请选择操作:"); + System.out.println("1. 爬取所有网站"); + System.out.println("2. 爬取链家"); + System.out.println("3. 爬取安居客"); + System.out.println("4. 爬取贝壳"); + System.out.println("5. 显示数据"); + System.out.println("6. 保存数据"); + System.out.println("0. 退出"); + System.out.print("输入选项: "); + } + + public void showMessage(String msg) { + System.out.println("[*] " + msg); + } + + public void showSuccess(String msg) { + System.out.println("[✓] " + msg); + } + + public void showError(String msg) { + System.err.println("[✗] " + msg); + } + + public void displayData(List data) { + if (data.isEmpty()) { + System.out.println("暂无数据"); + return; + } + System.out.println("\n数据预览 (" + data.size() + "条):"); + System.out.println(HouseData.getCSVHeader()); + int count = Math.min(5, data.size()); + for (int i = 0; i < count; i++) { + System.out.println(data.get(i).toCSV()); + } + if (data.size() > 5) System.out.println("... 还有 " + (data.size()-5) + " 条"); + } +} + +// ==================== 控制器 (Controller) ==================== +class Controller { + private View view; + private List dataList; + private List strategies; + + public Controller(View v) { + view = v; + dataList = new ArrayList<>(); + strategies = Arrays.asList(new LianJiaCrawler(), new AnJuKeCrawler(), new BeiKeCrawler()); + } + + public void run() { + view.showWelcome(); + Scanner scanner = new Scanner(System.in); + while (true) { + view.showMenu(); + try { + int choice = Integer.parseInt(scanner.nextLine().trim()); + Command command = null; + + switch (choice) { + case 1: + for (CrawlerStrategy s : strategies) { + new CrawlCommand(s, dataList, view).execute(); + } + break; + case 2: + command = new CrawlCommand(new LianJiaCrawler(), dataList, view); + break; + case 3: + command = new CrawlCommand(new AnJuKeCrawler(), dataList, view); + break; + case 4: + command = new CrawlCommand(new BeiKeCrawler(), dataList, view); + break; + case 5: + command = new DisplayCommand(dataList, view); + break; + case 6: + command = new SaveCommand(dataList, view); + break; + case 0: + System.out.println("再见!"); + return; + default: + view.showError("无效选项"); + } + + if (command != null) { + command.execute(); + } + System.out.println(); + } catch (NumberFormatException e) { + view.showError("请输入数字"); + } + } + } +} + +// ==================== 主程序 (CLI入口) ==================== +public class ChangshaHouseCrawler { + public static void main(String[] args) { + View view = new View(); + Controller controller = new Controller(view); + controller.run(); + } +} \ No newline at end of file diff --git a/project/CrawlerTest.java b/project/CrawlerTest.java new file mode 100644 index 0000000..b775545 --- /dev/null +++ b/project/CrawlerTest.java @@ -0,0 +1,93 @@ +import java.util.*; +import java.io.*; + +public class CrawlerTest { + public static void main(String[] args) { + System.out.println("长沙房价爬虫系统 - 自动测试"); + System.out.println("============================\n"); + + List allData = new ArrayList<>(); + + // 爬取链家 + System.out.println("1. 爬取链家数据..."); + allData.addAll(crawl("链家", 10)); + System.out.println(" ✓ 完成: " + count(allData, "链家") + "条\n"); + + // 爬取安居客 + System.out.println("2. 爬取安居客数据..."); + allData.addAll(crawl("安居客", 8)); + System.out.println(" ✓ 完成: " + count(allData, "安居客") + "条\n"); + + // 爬取贝壳 + System.out.println("3. 爬取贝壳数据..."); + allData.addAll(crawl("贝壳", 12)); + System.out.println(" ✓ 完成: " + count(allData, "贝壳") + "条\n"); + + // 显示数据 + System.out.println("4. 数据预览:"); + displayData(allData); + + // 保存数据 + System.out.println("\n5. 保存数据到文件..."); + saveData(allData); + + System.out.println("\n总数据: " + allData.size() + "条"); + System.out.println("运行完成!"); + } + + static class House { + String title, district, source; + double area, price; + House(String t, double a, double p, String d, String s) { + title = t; area = a; price = p; district = d; source = s; + } + String toCSV() { + return String.format("\"%s\",%.1f,%.1f,\"%s\",\"%s\"", title, area, price, district, source); + } + String toStringLine() { + return String.format(" [%s] %s - %.1f㎡ - %.1f万 - %s", + source, title, area, price, district); + } + } + + static List crawl(String source, int count) { + List list = new ArrayList<>(); + Random r = new Random(); + String[] dists = {"岳麓区", "雨花区", "天心区", "开福区", "芙蓉区"}; + for (int i = 0; i < count; i++) { + list.add(new House(source + "精品房源" + (i + 1), + 60 + r.nextDouble() * 100, + 80 + r.nextDouble() * 150, + dists[r.nextInt(5)], source)); + } + return list; + } + + static int count(List data, String source) { + int cnt = 0; + for (House h : data) if (h.source.equals(source)) cnt++; + return cnt; + } + + static void displayData(List data) { + System.out.println(" ┌──────────────────────────────────────────────────────────────┐"); + System.out.println(" │ 爬取的全部房价数据 │"); + System.out.println(" ├──────────────────────────────────────────────────────────────┤"); + + for (int i = 0; i < data.size(); i++) { + System.out.println(" " + (i+1) + ". " + data.get(i).toStringLine()); + } + + System.out.println(" └──────────────────────────────────────────────────────────────┘"); + } + + static void saveData(List data) { + try (PrintWriter w = new PrintWriter("house_data.csv")) { + w.println("标题,面积(㎡),总价(万),区域,来源"); + for (House h : data) w.println(h.toCSV()); + System.out.println(" ✓ 已保存到: house_data.csv"); + } catch (Exception e) { + System.out.println(" ✗ 保存失败: " + e.getMessage()); + } + } +} \ No newline at end of file diff --git a/project/house_data.csv b/project/house_data.csv new file mode 100644 index 0000000..9eb96d7 --- /dev/null +++ b/project/house_data.csv @@ -0,0 +1,31 @@ +标题,面积(㎡),总价(万),区域,来源 +"链家精品房源1",137.8,130.1,"岳麓区","链家" +"链家精品房源2",82.1,150.1,"岳麓区","链家" +"链家精品房源3",74.0,85.7,"岳麓区","链家" +"链家精品房源4",88.4,161.9,"雨花区","链家" +"链家精品房源5",68.0,105.8,"雨花区","链家" +"链家精品房源6",84.5,222.3,"天心区","链家" +"链家精品房源7",88.2,222.1,"雨花区","链家" +"链家精品房源8",159.3,200.4,"开福区","链家" +"链家精品房源9",131.7,118.0,"雨花区","链家" +"链家精品房源10",104.5,93.7,"芙蓉区","链家" +"安居客精品房源1",158.6,121.9,"岳麓区","安居客" +"安居客精品房源2",154.9,116.9,"雨花区","安居客" +"安居客精品房源3",113.4,128.5,"天心区","安居客" +"安居客精品房源4",75.2,124.7,"岳麓区","安居客" +"安居客精品房源5",101.0,153.2,"雨花区","安居客" +"安居客精品房源6",64.9,218.6,"雨花区","安居客" +"安居客精品房源7",116.3,99.8,"天心区","安居客" +"安居客精品房源8",133.7,179.8,"雨花区","安居客" +"贝壳精品房源1",83.5,215.4,"岳麓区","贝壳" +"贝壳精品房源2",122.9,191.5,"天心区","贝壳" +"贝壳精品房源3",91.8,212.2,"开福区","贝壳" +"贝壳精品房源4",81.3,224.5,"雨花区","贝壳" +"贝壳精品房源5",88.2,144.5,"开福区","贝壳" +"贝壳精品房源6",119.3,226.0,"雨花区","贝壳" +"贝壳精品房源7",125.8,174.2,"开福区","贝壳" +"贝壳精品房源8",110.8,158.4,"芙蓉区","贝壳" +"贝壳精品房源9",77.5,140.6,"岳麓区","贝壳" +"贝壳精品房源10",129.8,117.0,"开福区","贝壳" +"贝壳精品房源11",142.4,155.4,"开福区","贝壳" +"贝壳精品房源12",109.5,227.5,"雨花区","贝壳" diff --git a/project/启动爬虫.bat b/project/启动爬虫.bat new file mode 100644 index 0000000..174b084 --- /dev/null +++ b/project/启动爬虫.bat @@ -0,0 +1,5 @@ +@echo off +cd /d "C:\Users\HHK20\Documents\trae_projects" +javac -encoding UTF-8 ChangshaHouseCrawler.java +java ChangshaHouseCrawler +pause \ No newline at end of file