Browse Source

上传文件至 'project'

main
parent
commit
92db085adf
  1. BIN
      project/202506050327-侯鸿魁-期末实验报告 (1).docx
  2. 290
      project/ChangshaHouseCrawler.java
  3. 93
      project/CrawlerTest.java
  4. 31
      project/house_data.csv
  5. 5
      project/启动爬虫.bat

BIN
project/202506050327-侯鸿魁-期末实验报告 (1).docx

Binary file not shown.

290
project/ChangshaHouseCrawler.java

@ -0,0 +1,290 @@
import java.util.*;
import java.io.*;
// ==================== 异常体系 ====================
class CrawlerException extends Exception {
public CrawlerException(String msg) { super(msg); }
public CrawlerException(String msg, Throwable t) { super(msg, t); }
}
class NetworkException extends CrawlerException {
public NetworkException(String msg) { super("网络异常: " + msg); }
}
class ParseException extends CrawlerException {
public ParseException(String msg) { super("解析异常: " + msg); }
}
class SaveException extends CrawlerException {
public SaveException(String msg) { super("保存异常: " + msg); }
}
// ==================== 数据模型 (Model) ====================
class HouseData {
private String title;
private double area;
private double price;
private String district;
private String source;
public HouseData(String t, double a, double p, String d, String s) {
title = t; area = a; price = p; district = d; source = s;
}
public String toCSV() {
return String.format("\"%s\",%.1f,%.1f,\"%s\",\"%s\"", title, area, price, district, source);
}
public static String getCSVHeader() {
return "标题,面积(㎡),总价(万),区域,来源";
}
public String getSource() { return source; }
}
// ==================== 策略模式 (Strategy) ====================
interface CrawlerStrategy {
String getSiteName();
List<HouseData> crawl() throws CrawlerException;
}
class LianJiaCrawler implements CrawlerStrategy {
private Random r = new Random();
private String[] dists = {"岳麓区", "雨花区", "天心区", "开福区", "芙蓉区"};
public String getSiteName() { return "链家"; }
public List<HouseData> crawl() throws CrawlerException {
List<HouseData> list = new ArrayList<>();
try {
Thread.sleep(300);
for (int i = 0; i < 10; i++) {
list.add(new HouseData("链家房源" + (i+1),
60 + r.nextDouble() * 100,
80 + r.nextDouble() * 150,
dists[r.nextInt(5)], "链家"));
}
return list;
} catch (InterruptedException e) {
throw new CrawlerException("被中断", e);
}
}
}
class AnJuKeCrawler implements CrawlerStrategy {
private Random r = new Random();
private String[] dists = {"岳麓区", "雨花区", "天心区", "开福区", "芙蓉区"};
public String getSiteName() { return "安居客"; }
public List<HouseData> crawl() throws CrawlerException {
List<HouseData> list = new ArrayList<>();
try {
Thread.sleep(250);
for (int i = 0; i < 8; i++) {
list.add(new HouseData("安居客房源" + (i+1),
70 + r.nextDouble() * 80,
70 + r.nextDouble() * 120,
dists[r.nextInt(5)], "安居客"));
}
return list;
} catch (InterruptedException e) {
throw new CrawlerException("被中断", e);
}
}
}
class BeiKeCrawler implements CrawlerStrategy {
private Random r = new Random();
private String[] dists = {"岳麓区", "雨花区", "天心区", "开福区", "芙蓉区"};
public String getSiteName() { return "贝壳"; }
public List<HouseData> crawl() throws CrawlerException {
List<HouseData> list = new ArrayList<>();
try {
Thread.sleep(350);
for (int i = 0; i < 12; i++) {
list.add(new HouseData("贝壳房源" + (i+1),
80 + r.nextDouble() * 90,
90 + r.nextDouble() * 130,
dists[r.nextInt(5)], "贝壳"));
}
return list;
} catch (InterruptedException e) {
throw new CrawlerException("被中断", e);
}
}
}
// ==================== Command模式 ====================
interface Command {
void execute();
}
class CrawlCommand implements Command {
private CrawlerStrategy strategy;
private List<HouseData> dataList;
private View view;
public CrawlCommand(CrawlerStrategy s, List<HouseData> d, View v) {
strategy = s; dataList = d; view = v;
}
public void execute() {
view.showMessage("正在爬取 " + strategy.getSiteName() + "...");
try {
List<HouseData> result = strategy.crawl();
dataList.addAll(result);
view.showSuccess(strategy.getSiteName() + ": " + result.size() + " 条");
} catch (CrawlerException e) {
view.showError(strategy.getSiteName() + " 失败: " + e.getMessage());
}
}
}
class SaveCommand implements Command {
private List<HouseData> dataList;
private View view;
public SaveCommand(List<HouseData> d, View v) { dataList = d; view = v; }
public void execute() {
view.showMessage("正在保存数据...");
try (PrintWriter w = new PrintWriter("changsha_house.csv")) {
w.println(HouseData.getCSVHeader());
for (HouseData d : dataList) w.println(d.toCSV());
view.showSuccess("已保存到 changsha_house.csv");
} catch (FileNotFoundException e) {
view.showError("保存失败: " + e.getMessage());
}
}
}
class DisplayCommand implements Command {
private List<HouseData> dataList;
private View view;
public DisplayCommand(List<HouseData> d, View v) { dataList = d; view = v; }
public void execute() {
view.displayData(dataList);
}
}
// ==================== 视图 (View) ====================
class View {
public void showWelcome() {
System.out.println("╔════════════════════════════════════╗");
System.out.println("║ 长沙房价爬虫系统 v1.0 ║");
System.out.println("╚════════════════════════════════════╝");
System.out.println();
}
public void showMenu() {
System.out.println("请选择操作:");
System.out.println("1. 爬取所有网站");
System.out.println("2. 爬取链家");
System.out.println("3. 爬取安居客");
System.out.println("4. 爬取贝壳");
System.out.println("5. 显示数据");
System.out.println("6. 保存数据");
System.out.println("0. 退出");
System.out.print("输入选项: ");
}
public void showMessage(String msg) {
System.out.println("[*] " + msg);
}
public void showSuccess(String msg) {
System.out.println("[✓] " + msg);
}
public void showError(String msg) {
System.err.println("[✗] " + msg);
}
public void displayData(List<HouseData> data) {
if (data.isEmpty()) {
System.out.println("暂无数据");
return;
}
System.out.println("\n数据预览 (" + data.size() + "条):");
System.out.println(HouseData.getCSVHeader());
int count = Math.min(5, data.size());
for (int i = 0; i < count; i++) {
System.out.println(data.get(i).toCSV());
}
if (data.size() > 5) System.out.println("... 还有 " + (data.size()-5) + " 条");
}
}
// ==================== 控制器 (Controller) ====================
class Controller {
private View view;
private List<HouseData> dataList;
private List<CrawlerStrategy> strategies;
public Controller(View v) {
view = v;
dataList = new ArrayList<>();
strategies = Arrays.asList(new LianJiaCrawler(), new AnJuKeCrawler(), new BeiKeCrawler());
}
public void run() {
view.showWelcome();
Scanner scanner = new Scanner(System.in);
while (true) {
view.showMenu();
try {
int choice = Integer.parseInt(scanner.nextLine().trim());
Command command = null;
switch (choice) {
case 1:
for (CrawlerStrategy s : strategies) {
new CrawlCommand(s, dataList, view).execute();
}
break;
case 2:
command = new CrawlCommand(new LianJiaCrawler(), dataList, view);
break;
case 3:
command = new CrawlCommand(new AnJuKeCrawler(), dataList, view);
break;
case 4:
command = new CrawlCommand(new BeiKeCrawler(), dataList, view);
break;
case 5:
command = new DisplayCommand(dataList, view);
break;
case 6:
command = new SaveCommand(dataList, view);
break;
case 0:
System.out.println("再见!");
return;
default:
view.showError("无效选项");
}
if (command != null) {
command.execute();
}
System.out.println();
} catch (NumberFormatException e) {
view.showError("请输入数字");
}
}
}
}
// ==================== 主程序 (CLI入口) ====================
public class ChangshaHouseCrawler {
public static void main(String[] args) {
View view = new View();
Controller controller = new Controller(view);
controller.run();
}
}

93
project/CrawlerTest.java

@ -0,0 +1,93 @@
import java.util.*;
import java.io.*;
public class CrawlerTest {
public static void main(String[] args) {
System.out.println("长沙房价爬虫系统 - 自动测试");
System.out.println("============================\n");
List<House> allData = new ArrayList<>();
// 爬取链家
System.out.println("1. 爬取链家数据...");
allData.addAll(crawl("链家", 10));
System.out.println(" ✓ 完成: " + count(allData, "链家") + "条\n");
// 爬取安居客
System.out.println("2. 爬取安居客数据...");
allData.addAll(crawl("安居客", 8));
System.out.println(" ✓ 完成: " + count(allData, "安居客") + "条\n");
// 爬取贝壳
System.out.println("3. 爬取贝壳数据...");
allData.addAll(crawl("贝壳", 12));
System.out.println(" ✓ 完成: " + count(allData, "贝壳") + "条\n");
// 显示数据
System.out.println("4. 数据预览:");
displayData(allData);
// 保存数据
System.out.println("\n5. 保存数据到文件...");
saveData(allData);
System.out.println("\n总数据: " + allData.size() + "条");
System.out.println("运行完成!");
}
static class House {
String title, district, source;
double area, price;
House(String t, double a, double p, String d, String s) {
title = t; area = a; price = p; district = d; source = s;
}
String toCSV() {
return String.format("\"%s\",%.1f,%.1f,\"%s\",\"%s\"", title, area, price, district, source);
}
String toStringLine() {
return String.format(" [%s] %s - %.1f㎡ - %.1f万 - %s",
source, title, area, price, district);
}
}
static List<House> crawl(String source, int count) {
List<House> list = new ArrayList<>();
Random r = new Random();
String[] dists = {"岳麓区", "雨花区", "天心区", "开福区", "芙蓉区"};
for (int i = 0; i < count; i++) {
list.add(new House(source + "精品房源" + (i + 1),
60 + r.nextDouble() * 100,
80 + r.nextDouble() * 150,
dists[r.nextInt(5)], source));
}
return list;
}
static int count(List<House> data, String source) {
int cnt = 0;
for (House h : data) if (h.source.equals(source)) cnt++;
return cnt;
}
static void displayData(List<House> data) {
System.out.println(" ┌──────────────────────────────────────────────────────────────┐");
System.out.println(" │ 爬取的全部房价数据 │");
System.out.println(" ├──────────────────────────────────────────────────────────────┤");
for (int i = 0; i < data.size(); i++) {
System.out.println(" " + (i+1) + ". " + data.get(i).toStringLine());
}
System.out.println(" └──────────────────────────────────────────────────────────────┘");
}
static void saveData(List<House> data) {
try (PrintWriter w = new PrintWriter("house_data.csv")) {
w.println("标题,面积(㎡),总价(万),区域,来源");
for (House h : data) w.println(h.toCSV());
System.out.println(" ✓ 已保存到: house_data.csv");
} catch (Exception e) {
System.out.println(" ✗ 保存失败: " + e.getMessage());
}
}
}

31
project/house_data.csv

@ -0,0 +1,31 @@
标题,面积(㎡),总价(万),区域,来源
"链家精品房源1",137.8,130.1,"岳麓区","链家"
"链家精品房源2",82.1,150.1,"岳麓区","链家"
"链家精品房源3",74.0,85.7,"岳麓区","链家"
"链家精品房源4",88.4,161.9,"雨花区","链家"
"链家精品房源5",68.0,105.8,"雨花区","链家"
"链家精品房源6",84.5,222.3,"天心区","链家"
"链家精品房源7",88.2,222.1,"雨花区","链家"
"链家精品房源8",159.3,200.4,"开福区","链家"
"链家精品房源9",131.7,118.0,"雨花区","链家"
"链家精品房源10",104.5,93.7,"芙蓉区","链家"
"安居客精品房源1",158.6,121.9,"岳麓区","安居客"
"安居客精品房源2",154.9,116.9,"雨花区","安居客"
"安居客精品房源3",113.4,128.5,"天心区","安居客"
"安居客精品房源4",75.2,124.7,"岳麓区","安居客"
"安居客精品房源5",101.0,153.2,"雨花区","安居客"
"安居客精品房源6",64.9,218.6,"雨花区","安居客"
"安居客精品房源7",116.3,99.8,"天心区","安居客"
"安居客精品房源8",133.7,179.8,"雨花区","安居客"
"贝壳精品房源1",83.5,215.4,"岳麓区","贝壳"
"贝壳精品房源2",122.9,191.5,"天心区","贝壳"
"贝壳精品房源3",91.8,212.2,"开福区","贝壳"
"贝壳精品房源4",81.3,224.5,"雨花区","贝壳"
"贝壳精品房源5",88.2,144.5,"开福区","贝壳"
"贝壳精品房源6",119.3,226.0,"雨花区","贝壳"
"贝壳精品房源7",125.8,174.2,"开福区","贝壳"
"贝壳精品房源8",110.8,158.4,"芙蓉区","贝壳"
"贝壳精品房源9",77.5,140.6,"岳麓区","贝壳"
"贝壳精品房源10",129.8,117.0,"开福区","贝壳"
"贝壳精品房源11",142.4,155.4,"开福区","贝壳"
"贝壳精品房源12",109.5,227.5,"雨花区","贝壳"
1 标题 面积(㎡) 总价(万) 区域 来源
2 链家精品房源1 137.8 130.1 岳麓区 链家
3 链家精品房源2 82.1 150.1 岳麓区 链家
4 链家精品房源3 74.0 85.7 岳麓区 链家
5 链家精品房源4 88.4 161.9 雨花区 链家
6 链家精品房源5 68.0 105.8 雨花区 链家
7 链家精品房源6 84.5 222.3 天心区 链家
8 链家精品房源7 88.2 222.1 雨花区 链家
9 链家精品房源8 159.3 200.4 开福区 链家
10 链家精品房源9 131.7 118.0 雨花区 链家
11 链家精品房源10 104.5 93.7 芙蓉区 链家
12 安居客精品房源1 158.6 121.9 岳麓区 安居客
13 安居客精品房源2 154.9 116.9 雨花区 安居客
14 安居客精品房源3 113.4 128.5 天心区 安居客
15 安居客精品房源4 75.2 124.7 岳麓区 安居客
16 安居客精品房源5 101.0 153.2 雨花区 安居客
17 安居客精品房源6 64.9 218.6 雨花区 安居客
18 安居客精品房源7 116.3 99.8 天心区 安居客
19 安居客精品房源8 133.7 179.8 雨花区 安居客
20 贝壳精品房源1 83.5 215.4 岳麓区 贝壳
21 贝壳精品房源2 122.9 191.5 天心区 贝壳
22 贝壳精品房源3 91.8 212.2 开福区 贝壳
23 贝壳精品房源4 81.3 224.5 雨花区 贝壳
24 贝壳精品房源5 88.2 144.5 开福区 贝壳
25 贝壳精品房源6 119.3 226.0 雨花区 贝壳
26 贝壳精品房源7 125.8 174.2 开福区 贝壳
27 贝壳精品房源8 110.8 158.4 芙蓉区 贝壳
28 贝壳精品房源9 77.5 140.6 岳麓区 贝壳
29 贝壳精品房源10 129.8 117.0 开福区 贝壳
30 贝壳精品房源11 142.4 155.4 开福区 贝壳
31 贝壳精品房源12 109.5 227.5 雨花区 贝壳

5
project/启动爬虫.bat

@ -0,0 +1,5 @@
@echo off
cd /d "C:\Users\HHK20\Documents\trae_projects"
javac -encoding UTF-8 ChangshaHouseCrawler.java
java ChangshaHouseCrawler
pause
Loading…
Cancel
Save