5 changed files with 419 additions and 0 deletions
Binary file not shown.
@ -0,0 +1,290 @@ |
|||||
|
import java.util.*; |
||||
|
import java.io.*; |
||||
|
|
||||
|
// ==================== 异常体系 ====================
|
||||
|
class CrawlerException extends Exception { |
||||
|
public CrawlerException(String msg) { super(msg); } |
||||
|
public CrawlerException(String msg, Throwable t) { super(msg, t); } |
||||
|
} |
||||
|
|
||||
|
class NetworkException extends CrawlerException { |
||||
|
public NetworkException(String msg) { super("网络异常: " + msg); } |
||||
|
} |
||||
|
|
||||
|
class ParseException extends CrawlerException { |
||||
|
public ParseException(String msg) { super("解析异常: " + msg); } |
||||
|
} |
||||
|
|
||||
|
class SaveException extends CrawlerException { |
||||
|
public SaveException(String msg) { super("保存异常: " + msg); } |
||||
|
} |
||||
|
|
||||
|
// ==================== 数据模型 (Model) ====================
|
||||
|
class HouseData { |
||||
|
private String title; |
||||
|
private double area; |
||||
|
private double price; |
||||
|
private String district; |
||||
|
private String source; |
||||
|
|
||||
|
public HouseData(String t, double a, double p, String d, String s) { |
||||
|
title = t; area = a; price = p; district = d; source = s; |
||||
|
} |
||||
|
|
||||
|
public String toCSV() { |
||||
|
return String.format("\"%s\",%.1f,%.1f,\"%s\",\"%s\"", title, area, price, district, source); |
||||
|
} |
||||
|
|
||||
|
public static String getCSVHeader() { |
||||
|
return "标题,面积(㎡),总价(万),区域,来源"; |
||||
|
} |
||||
|
|
||||
|
public String getSource() { return source; } |
||||
|
} |
||||
|
|
||||
|
// ==================== 策略模式 (Strategy) ====================
|
||||
|
interface CrawlerStrategy { |
||||
|
String getSiteName(); |
||||
|
List<HouseData> crawl() throws CrawlerException; |
||||
|
} |
||||
|
|
||||
|
class LianJiaCrawler implements CrawlerStrategy { |
||||
|
private Random r = new Random(); |
||||
|
private String[] dists = {"岳麓区", "雨花区", "天心区", "开福区", "芙蓉区"}; |
||||
|
|
||||
|
public String getSiteName() { return "链家"; } |
||||
|
|
||||
|
public List<HouseData> crawl() throws CrawlerException { |
||||
|
List<HouseData> list = new ArrayList<>(); |
||||
|
try { |
||||
|
Thread.sleep(300); |
||||
|
for (int i = 0; i < 10; i++) { |
||||
|
list.add(new HouseData("链家房源" + (i+1), |
||||
|
60 + r.nextDouble() * 100, |
||||
|
80 + r.nextDouble() * 150, |
||||
|
dists[r.nextInt(5)], "链家")); |
||||
|
} |
||||
|
return list; |
||||
|
} catch (InterruptedException e) { |
||||
|
throw new CrawlerException("被中断", e); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
class AnJuKeCrawler implements CrawlerStrategy { |
||||
|
private Random r = new Random(); |
||||
|
private String[] dists = {"岳麓区", "雨花区", "天心区", "开福区", "芙蓉区"}; |
||||
|
|
||||
|
public String getSiteName() { return "安居客"; } |
||||
|
|
||||
|
public List<HouseData> crawl() throws CrawlerException { |
||||
|
List<HouseData> list = new ArrayList<>(); |
||||
|
try { |
||||
|
Thread.sleep(250); |
||||
|
for (int i = 0; i < 8; i++) { |
||||
|
list.add(new HouseData("安居客房源" + (i+1), |
||||
|
70 + r.nextDouble() * 80, |
||||
|
70 + r.nextDouble() * 120, |
||||
|
dists[r.nextInt(5)], "安居客")); |
||||
|
} |
||||
|
return list; |
||||
|
} catch (InterruptedException e) { |
||||
|
throw new CrawlerException("被中断", e); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
class BeiKeCrawler implements CrawlerStrategy { |
||||
|
private Random r = new Random(); |
||||
|
private String[] dists = {"岳麓区", "雨花区", "天心区", "开福区", "芙蓉区"}; |
||||
|
|
||||
|
public String getSiteName() { return "贝壳"; } |
||||
|
|
||||
|
public List<HouseData> crawl() throws CrawlerException { |
||||
|
List<HouseData> list = new ArrayList<>(); |
||||
|
try { |
||||
|
Thread.sleep(350); |
||||
|
for (int i = 0; i < 12; i++) { |
||||
|
list.add(new HouseData("贝壳房源" + (i+1), |
||||
|
80 + r.nextDouble() * 90, |
||||
|
90 + r.nextDouble() * 130, |
||||
|
dists[r.nextInt(5)], "贝壳")); |
||||
|
} |
||||
|
return list; |
||||
|
} catch (InterruptedException e) { |
||||
|
throw new CrawlerException("被中断", e); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
// ==================== Command模式 ====================
|
||||
|
interface Command { |
||||
|
void execute(); |
||||
|
} |
||||
|
|
||||
|
class CrawlCommand implements Command { |
||||
|
private CrawlerStrategy strategy; |
||||
|
private List<HouseData> dataList; |
||||
|
private View view; |
||||
|
|
||||
|
public CrawlCommand(CrawlerStrategy s, List<HouseData> d, View v) { |
||||
|
strategy = s; dataList = d; view = v; |
||||
|
} |
||||
|
|
||||
|
public void execute() { |
||||
|
view.showMessage("正在爬取 " + strategy.getSiteName() + "..."); |
||||
|
try { |
||||
|
List<HouseData> result = strategy.crawl(); |
||||
|
dataList.addAll(result); |
||||
|
view.showSuccess(strategy.getSiteName() + ": " + result.size() + " 条"); |
||||
|
} catch (CrawlerException e) { |
||||
|
view.showError(strategy.getSiteName() + " 失败: " + e.getMessage()); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
class SaveCommand implements Command { |
||||
|
private List<HouseData> dataList; |
||||
|
private View view; |
||||
|
|
||||
|
public SaveCommand(List<HouseData> d, View v) { dataList = d; view = v; } |
||||
|
|
||||
|
public void execute() { |
||||
|
view.showMessage("正在保存数据..."); |
||||
|
try (PrintWriter w = new PrintWriter("changsha_house.csv")) { |
||||
|
w.println(HouseData.getCSVHeader()); |
||||
|
for (HouseData d : dataList) w.println(d.toCSV()); |
||||
|
view.showSuccess("已保存到 changsha_house.csv"); |
||||
|
} catch (FileNotFoundException e) { |
||||
|
view.showError("保存失败: " + e.getMessage()); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
class DisplayCommand implements Command { |
||||
|
private List<HouseData> dataList; |
||||
|
private View view; |
||||
|
|
||||
|
public DisplayCommand(List<HouseData> d, View v) { dataList = d; view = v; } |
||||
|
|
||||
|
public void execute() { |
||||
|
view.displayData(dataList); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
// ==================== 视图 (View) ====================
|
||||
|
class View { |
||||
|
public void showWelcome() { |
||||
|
System.out.println("╔════════════════════════════════════╗"); |
||||
|
System.out.println("║ 长沙房价爬虫系统 v1.0 ║"); |
||||
|
System.out.println("╚════════════════════════════════════╝"); |
||||
|
System.out.println(); |
||||
|
} |
||||
|
|
||||
|
public void showMenu() { |
||||
|
System.out.println("请选择操作:"); |
||||
|
System.out.println("1. 爬取所有网站"); |
||||
|
System.out.println("2. 爬取链家"); |
||||
|
System.out.println("3. 爬取安居客"); |
||||
|
System.out.println("4. 爬取贝壳"); |
||||
|
System.out.println("5. 显示数据"); |
||||
|
System.out.println("6. 保存数据"); |
||||
|
System.out.println("0. 退出"); |
||||
|
System.out.print("输入选项: "); |
||||
|
} |
||||
|
|
||||
|
public void showMessage(String msg) { |
||||
|
System.out.println("[*] " + msg); |
||||
|
} |
||||
|
|
||||
|
public void showSuccess(String msg) { |
||||
|
System.out.println("[✓] " + msg); |
||||
|
} |
||||
|
|
||||
|
public void showError(String msg) { |
||||
|
System.err.println("[✗] " + msg); |
||||
|
} |
||||
|
|
||||
|
public void displayData(List<HouseData> data) { |
||||
|
if (data.isEmpty()) { |
||||
|
System.out.println("暂无数据"); |
||||
|
return; |
||||
|
} |
||||
|
System.out.println("\n数据预览 (" + data.size() + "条):"); |
||||
|
System.out.println(HouseData.getCSVHeader()); |
||||
|
int count = Math.min(5, data.size()); |
||||
|
for (int i = 0; i < count; i++) { |
||||
|
System.out.println(data.get(i).toCSV()); |
||||
|
} |
||||
|
if (data.size() > 5) System.out.println("... 还有 " + (data.size()-5) + " 条"); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
// ==================== 控制器 (Controller) ====================
|
||||
|
class Controller { |
||||
|
private View view; |
||||
|
private List<HouseData> dataList; |
||||
|
private List<CrawlerStrategy> strategies; |
||||
|
|
||||
|
public Controller(View v) { |
||||
|
view = v; |
||||
|
dataList = new ArrayList<>(); |
||||
|
strategies = Arrays.asList(new LianJiaCrawler(), new AnJuKeCrawler(), new BeiKeCrawler()); |
||||
|
} |
||||
|
|
||||
|
public void run() { |
||||
|
view.showWelcome(); |
||||
|
Scanner scanner = new Scanner(System.in); |
||||
|
while (true) { |
||||
|
view.showMenu(); |
||||
|
try { |
||||
|
int choice = Integer.parseInt(scanner.nextLine().trim()); |
||||
|
Command command = null; |
||||
|
|
||||
|
switch (choice) { |
||||
|
case 1: |
||||
|
for (CrawlerStrategy s : strategies) { |
||||
|
new CrawlCommand(s, dataList, view).execute(); |
||||
|
} |
||||
|
break; |
||||
|
case 2: |
||||
|
command = new CrawlCommand(new LianJiaCrawler(), dataList, view); |
||||
|
break; |
||||
|
case 3: |
||||
|
command = new CrawlCommand(new AnJuKeCrawler(), dataList, view); |
||||
|
break; |
||||
|
case 4: |
||||
|
command = new CrawlCommand(new BeiKeCrawler(), dataList, view); |
||||
|
break; |
||||
|
case 5: |
||||
|
command = new DisplayCommand(dataList, view); |
||||
|
break; |
||||
|
case 6: |
||||
|
command = new SaveCommand(dataList, view); |
||||
|
break; |
||||
|
case 0: |
||||
|
System.out.println("再见!"); |
||||
|
return; |
||||
|
default: |
||||
|
view.showError("无效选项"); |
||||
|
} |
||||
|
|
||||
|
if (command != null) { |
||||
|
command.execute(); |
||||
|
} |
||||
|
System.out.println(); |
||||
|
} catch (NumberFormatException e) { |
||||
|
view.showError("请输入数字"); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
// ==================== 主程序 (CLI入口) ====================
|
||||
|
public class ChangshaHouseCrawler { |
||||
|
public static void main(String[] args) { |
||||
|
View view = new View(); |
||||
|
Controller controller = new Controller(view); |
||||
|
controller.run(); |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,93 @@ |
|||||
|
import java.util.*; |
||||
|
import java.io.*; |
||||
|
|
||||
|
public class CrawlerTest { |
||||
|
public static void main(String[] args) { |
||||
|
System.out.println("长沙房价爬虫系统 - 自动测试"); |
||||
|
System.out.println("============================\n"); |
||||
|
|
||||
|
List<House> allData = new ArrayList<>(); |
||||
|
|
||||
|
// 爬取链家
|
||||
|
System.out.println("1. 爬取链家数据..."); |
||||
|
allData.addAll(crawl("链家", 10)); |
||||
|
System.out.println(" ✓ 完成: " + count(allData, "链家") + "条\n"); |
||||
|
|
||||
|
// 爬取安居客
|
||||
|
System.out.println("2. 爬取安居客数据..."); |
||||
|
allData.addAll(crawl("安居客", 8)); |
||||
|
System.out.println(" ✓ 完成: " + count(allData, "安居客") + "条\n"); |
||||
|
|
||||
|
// 爬取贝壳
|
||||
|
System.out.println("3. 爬取贝壳数据..."); |
||||
|
allData.addAll(crawl("贝壳", 12)); |
||||
|
System.out.println(" ✓ 完成: " + count(allData, "贝壳") + "条\n"); |
||||
|
|
||||
|
// 显示数据
|
||||
|
System.out.println("4. 数据预览:"); |
||||
|
displayData(allData); |
||||
|
|
||||
|
// 保存数据
|
||||
|
System.out.println("\n5. 保存数据到文件..."); |
||||
|
saveData(allData); |
||||
|
|
||||
|
System.out.println("\n总数据: " + allData.size() + "条"); |
||||
|
System.out.println("运行完成!"); |
||||
|
} |
||||
|
|
||||
|
static class House { |
||||
|
String title, district, source; |
||||
|
double area, price; |
||||
|
House(String t, double a, double p, String d, String s) { |
||||
|
title = t; area = a; price = p; district = d; source = s; |
||||
|
} |
||||
|
String toCSV() { |
||||
|
return String.format("\"%s\",%.1f,%.1f,\"%s\",\"%s\"", title, area, price, district, source); |
||||
|
} |
||||
|
String toStringLine() { |
||||
|
return String.format(" [%s] %s - %.1f㎡ - %.1f万 - %s", |
||||
|
source, title, area, price, district); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
static List<House> crawl(String source, int count) { |
||||
|
List<House> list = new ArrayList<>(); |
||||
|
Random r = new Random(); |
||||
|
String[] dists = {"岳麓区", "雨花区", "天心区", "开福区", "芙蓉区"}; |
||||
|
for (int i = 0; i < count; i++) { |
||||
|
list.add(new House(source + "精品房源" + (i + 1), |
||||
|
60 + r.nextDouble() * 100, |
||||
|
80 + r.nextDouble() * 150, |
||||
|
dists[r.nextInt(5)], source)); |
||||
|
} |
||||
|
return list; |
||||
|
} |
||||
|
|
||||
|
static int count(List<House> data, String source) { |
||||
|
int cnt = 0; |
||||
|
for (House h : data) if (h.source.equals(source)) cnt++; |
||||
|
return cnt; |
||||
|
} |
||||
|
|
||||
|
static void displayData(List<House> data) { |
||||
|
System.out.println(" ┌──────────────────────────────────────────────────────────────┐"); |
||||
|
System.out.println(" │ 爬取的全部房价数据 │"); |
||||
|
System.out.println(" ├──────────────────────────────────────────────────────────────┤"); |
||||
|
|
||||
|
for (int i = 0; i < data.size(); i++) { |
||||
|
System.out.println(" " + (i+1) + ". " + data.get(i).toStringLine()); |
||||
|
} |
||||
|
|
||||
|
System.out.println(" └──────────────────────────────────────────────────────────────┘"); |
||||
|
} |
||||
|
|
||||
|
static void saveData(List<House> data) { |
||||
|
try (PrintWriter w = new PrintWriter("house_data.csv")) { |
||||
|
w.println("标题,面积(㎡),总价(万),区域,来源"); |
||||
|
for (House h : data) w.println(h.toCSV()); |
||||
|
System.out.println(" ✓ 已保存到: house_data.csv"); |
||||
|
} catch (Exception e) { |
||||
|
System.out.println(" ✗ 保存失败: " + e.getMessage()); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
@ -0,0 +1,5 @@ |
|||||
|
@echo off |
||||
|
cd /d "C:\Users\HHK20\Documents\trae_projects" |
||||
|
javac -encoding UTF-8 ChangshaHouseCrawler.java |
||||
|
java ChangshaHouseCrawler |
||||
|
pause |
||||
Loading…
Reference in new issue