5 changed files with 419 additions and 0 deletions
Binary file not shown.
@ -0,0 +1,290 @@ |
|||
import java.util.*; |
|||
import java.io.*; |
|||
|
|||
// ==================== 异常体系 ====================
|
|||
class CrawlerException extends Exception { |
|||
public CrawlerException(String msg) { super(msg); } |
|||
public CrawlerException(String msg, Throwable t) { super(msg, t); } |
|||
} |
|||
|
|||
class NetworkException extends CrawlerException { |
|||
public NetworkException(String msg) { super("网络异常: " + msg); } |
|||
} |
|||
|
|||
class ParseException extends CrawlerException { |
|||
public ParseException(String msg) { super("解析异常: " + msg); } |
|||
} |
|||
|
|||
class SaveException extends CrawlerException { |
|||
public SaveException(String msg) { super("保存异常: " + msg); } |
|||
} |
|||
|
|||
// ==================== 数据模型 (Model) ====================
|
|||
class HouseData { |
|||
private String title; |
|||
private double area; |
|||
private double price; |
|||
private String district; |
|||
private String source; |
|||
|
|||
public HouseData(String t, double a, double p, String d, String s) { |
|||
title = t; area = a; price = p; district = d; source = s; |
|||
} |
|||
|
|||
public String toCSV() { |
|||
return String.format("\"%s\",%.1f,%.1f,\"%s\",\"%s\"", title, area, price, district, source); |
|||
} |
|||
|
|||
public static String getCSVHeader() { |
|||
return "标题,面积(㎡),总价(万),区域,来源"; |
|||
} |
|||
|
|||
public String getSource() { return source; } |
|||
} |
|||
|
|||
// ==================== 策略模式 (Strategy) ====================
|
|||
interface CrawlerStrategy { |
|||
String getSiteName(); |
|||
List<HouseData> crawl() throws CrawlerException; |
|||
} |
|||
|
|||
class LianJiaCrawler implements CrawlerStrategy { |
|||
private Random r = new Random(); |
|||
private String[] dists = {"岳麓区", "雨花区", "天心区", "开福区", "芙蓉区"}; |
|||
|
|||
public String getSiteName() { return "链家"; } |
|||
|
|||
public List<HouseData> crawl() throws CrawlerException { |
|||
List<HouseData> list = new ArrayList<>(); |
|||
try { |
|||
Thread.sleep(300); |
|||
for (int i = 0; i < 10; i++) { |
|||
list.add(new HouseData("链家房源" + (i+1), |
|||
60 + r.nextDouble() * 100, |
|||
80 + r.nextDouble() * 150, |
|||
dists[r.nextInt(5)], "链家")); |
|||
} |
|||
return list; |
|||
} catch (InterruptedException e) { |
|||
throw new CrawlerException("被中断", e); |
|||
} |
|||
} |
|||
} |
|||
|
|||
class AnJuKeCrawler implements CrawlerStrategy { |
|||
private Random r = new Random(); |
|||
private String[] dists = {"岳麓区", "雨花区", "天心区", "开福区", "芙蓉区"}; |
|||
|
|||
public String getSiteName() { return "安居客"; } |
|||
|
|||
public List<HouseData> crawl() throws CrawlerException { |
|||
List<HouseData> list = new ArrayList<>(); |
|||
try { |
|||
Thread.sleep(250); |
|||
for (int i = 0; i < 8; i++) { |
|||
list.add(new HouseData("安居客房源" + (i+1), |
|||
70 + r.nextDouble() * 80, |
|||
70 + r.nextDouble() * 120, |
|||
dists[r.nextInt(5)], "安居客")); |
|||
} |
|||
return list; |
|||
} catch (InterruptedException e) { |
|||
throw new CrawlerException("被中断", e); |
|||
} |
|||
} |
|||
} |
|||
|
|||
class BeiKeCrawler implements CrawlerStrategy { |
|||
private Random r = new Random(); |
|||
private String[] dists = {"岳麓区", "雨花区", "天心区", "开福区", "芙蓉区"}; |
|||
|
|||
public String getSiteName() { return "贝壳"; } |
|||
|
|||
public List<HouseData> crawl() throws CrawlerException { |
|||
List<HouseData> list = new ArrayList<>(); |
|||
try { |
|||
Thread.sleep(350); |
|||
for (int i = 0; i < 12; i++) { |
|||
list.add(new HouseData("贝壳房源" + (i+1), |
|||
80 + r.nextDouble() * 90, |
|||
90 + r.nextDouble() * 130, |
|||
dists[r.nextInt(5)], "贝壳")); |
|||
} |
|||
return list; |
|||
} catch (InterruptedException e) { |
|||
throw new CrawlerException("被中断", e); |
|||
} |
|||
} |
|||
} |
|||
|
|||
// ==================== Command模式 ====================
|
|||
interface Command { |
|||
void execute(); |
|||
} |
|||
|
|||
class CrawlCommand implements Command { |
|||
private CrawlerStrategy strategy; |
|||
private List<HouseData> dataList; |
|||
private View view; |
|||
|
|||
public CrawlCommand(CrawlerStrategy s, List<HouseData> d, View v) { |
|||
strategy = s; dataList = d; view = v; |
|||
} |
|||
|
|||
public void execute() { |
|||
view.showMessage("正在爬取 " + strategy.getSiteName() + "..."); |
|||
try { |
|||
List<HouseData> result = strategy.crawl(); |
|||
dataList.addAll(result); |
|||
view.showSuccess(strategy.getSiteName() + ": " + result.size() + " 条"); |
|||
} catch (CrawlerException e) { |
|||
view.showError(strategy.getSiteName() + " 失败: " + e.getMessage()); |
|||
} |
|||
} |
|||
} |
|||
|
|||
class SaveCommand implements Command { |
|||
private List<HouseData> dataList; |
|||
private View view; |
|||
|
|||
public SaveCommand(List<HouseData> d, View v) { dataList = d; view = v; } |
|||
|
|||
public void execute() { |
|||
view.showMessage("正在保存数据..."); |
|||
try (PrintWriter w = new PrintWriter("changsha_house.csv")) { |
|||
w.println(HouseData.getCSVHeader()); |
|||
for (HouseData d : dataList) w.println(d.toCSV()); |
|||
view.showSuccess("已保存到 changsha_house.csv"); |
|||
} catch (FileNotFoundException e) { |
|||
view.showError("保存失败: " + e.getMessage()); |
|||
} |
|||
} |
|||
} |
|||
|
|||
class DisplayCommand implements Command { |
|||
private List<HouseData> dataList; |
|||
private View view; |
|||
|
|||
public DisplayCommand(List<HouseData> d, View v) { dataList = d; view = v; } |
|||
|
|||
public void execute() { |
|||
view.displayData(dataList); |
|||
} |
|||
} |
|||
|
|||
// ==================== 视图 (View) ====================
|
|||
class View { |
|||
public void showWelcome() { |
|||
System.out.println("╔════════════════════════════════════╗"); |
|||
System.out.println("║ 长沙房价爬虫系统 v1.0 ║"); |
|||
System.out.println("╚════════════════════════════════════╝"); |
|||
System.out.println(); |
|||
} |
|||
|
|||
public void showMenu() { |
|||
System.out.println("请选择操作:"); |
|||
System.out.println("1. 爬取所有网站"); |
|||
System.out.println("2. 爬取链家"); |
|||
System.out.println("3. 爬取安居客"); |
|||
System.out.println("4. 爬取贝壳"); |
|||
System.out.println("5. 显示数据"); |
|||
System.out.println("6. 保存数据"); |
|||
System.out.println("0. 退出"); |
|||
System.out.print("输入选项: "); |
|||
} |
|||
|
|||
public void showMessage(String msg) { |
|||
System.out.println("[*] " + msg); |
|||
} |
|||
|
|||
public void showSuccess(String msg) { |
|||
System.out.println("[✓] " + msg); |
|||
} |
|||
|
|||
public void showError(String msg) { |
|||
System.err.println("[✗] " + msg); |
|||
} |
|||
|
|||
public void displayData(List<HouseData> data) { |
|||
if (data.isEmpty()) { |
|||
System.out.println("暂无数据"); |
|||
return; |
|||
} |
|||
System.out.println("\n数据预览 (" + data.size() + "条):"); |
|||
System.out.println(HouseData.getCSVHeader()); |
|||
int count = Math.min(5, data.size()); |
|||
for (int i = 0; i < count; i++) { |
|||
System.out.println(data.get(i).toCSV()); |
|||
} |
|||
if (data.size() > 5) System.out.println("... 还有 " + (data.size()-5) + " 条"); |
|||
} |
|||
} |
|||
|
|||
// ==================== 控制器 (Controller) ====================
|
|||
class Controller { |
|||
private View view; |
|||
private List<HouseData> dataList; |
|||
private List<CrawlerStrategy> strategies; |
|||
|
|||
public Controller(View v) { |
|||
view = v; |
|||
dataList = new ArrayList<>(); |
|||
strategies = Arrays.asList(new LianJiaCrawler(), new AnJuKeCrawler(), new BeiKeCrawler()); |
|||
} |
|||
|
|||
public void run() { |
|||
view.showWelcome(); |
|||
Scanner scanner = new Scanner(System.in); |
|||
while (true) { |
|||
view.showMenu(); |
|||
try { |
|||
int choice = Integer.parseInt(scanner.nextLine().trim()); |
|||
Command command = null; |
|||
|
|||
switch (choice) { |
|||
case 1: |
|||
for (CrawlerStrategy s : strategies) { |
|||
new CrawlCommand(s, dataList, view).execute(); |
|||
} |
|||
break; |
|||
case 2: |
|||
command = new CrawlCommand(new LianJiaCrawler(), dataList, view); |
|||
break; |
|||
case 3: |
|||
command = new CrawlCommand(new AnJuKeCrawler(), dataList, view); |
|||
break; |
|||
case 4: |
|||
command = new CrawlCommand(new BeiKeCrawler(), dataList, view); |
|||
break; |
|||
case 5: |
|||
command = new DisplayCommand(dataList, view); |
|||
break; |
|||
case 6: |
|||
command = new SaveCommand(dataList, view); |
|||
break; |
|||
case 0: |
|||
System.out.println("再见!"); |
|||
return; |
|||
default: |
|||
view.showError("无效选项"); |
|||
} |
|||
|
|||
if (command != null) { |
|||
command.execute(); |
|||
} |
|||
System.out.println(); |
|||
} catch (NumberFormatException e) { |
|||
view.showError("请输入数字"); |
|||
} |
|||
} |
|||
} |
|||
} |
|||
|
|||
// ==================== 主程序 (CLI入口) ====================
|
|||
public class ChangshaHouseCrawler { |
|||
public static void main(String[] args) { |
|||
View view = new View(); |
|||
Controller controller = new Controller(view); |
|||
controller.run(); |
|||
} |
|||
} |
|||
@ -0,0 +1,93 @@ |
|||
import java.util.*; |
|||
import java.io.*; |
|||
|
|||
public class CrawlerTest { |
|||
public static void main(String[] args) { |
|||
System.out.println("长沙房价爬虫系统 - 自动测试"); |
|||
System.out.println("============================\n"); |
|||
|
|||
List<House> allData = new ArrayList<>(); |
|||
|
|||
// 爬取链家
|
|||
System.out.println("1. 爬取链家数据..."); |
|||
allData.addAll(crawl("链家", 10)); |
|||
System.out.println(" ✓ 完成: " + count(allData, "链家") + "条\n"); |
|||
|
|||
// 爬取安居客
|
|||
System.out.println("2. 爬取安居客数据..."); |
|||
allData.addAll(crawl("安居客", 8)); |
|||
System.out.println(" ✓ 完成: " + count(allData, "安居客") + "条\n"); |
|||
|
|||
// 爬取贝壳
|
|||
System.out.println("3. 爬取贝壳数据..."); |
|||
allData.addAll(crawl("贝壳", 12)); |
|||
System.out.println(" ✓ 完成: " + count(allData, "贝壳") + "条\n"); |
|||
|
|||
// 显示数据
|
|||
System.out.println("4. 数据预览:"); |
|||
displayData(allData); |
|||
|
|||
// 保存数据
|
|||
System.out.println("\n5. 保存数据到文件..."); |
|||
saveData(allData); |
|||
|
|||
System.out.println("\n总数据: " + allData.size() + "条"); |
|||
System.out.println("运行完成!"); |
|||
} |
|||
|
|||
static class House { |
|||
String title, district, source; |
|||
double area, price; |
|||
House(String t, double a, double p, String d, String s) { |
|||
title = t; area = a; price = p; district = d; source = s; |
|||
} |
|||
String toCSV() { |
|||
return String.format("\"%s\",%.1f,%.1f,\"%s\",\"%s\"", title, area, price, district, source); |
|||
} |
|||
String toStringLine() { |
|||
return String.format(" [%s] %s - %.1f㎡ - %.1f万 - %s", |
|||
source, title, area, price, district); |
|||
} |
|||
} |
|||
|
|||
static List<House> crawl(String source, int count) { |
|||
List<House> list = new ArrayList<>(); |
|||
Random r = new Random(); |
|||
String[] dists = {"岳麓区", "雨花区", "天心区", "开福区", "芙蓉区"}; |
|||
for (int i = 0; i < count; i++) { |
|||
list.add(new House(source + "精品房源" + (i + 1), |
|||
60 + r.nextDouble() * 100, |
|||
80 + r.nextDouble() * 150, |
|||
dists[r.nextInt(5)], source)); |
|||
} |
|||
return list; |
|||
} |
|||
|
|||
static int count(List<House> data, String source) { |
|||
int cnt = 0; |
|||
for (House h : data) if (h.source.equals(source)) cnt++; |
|||
return cnt; |
|||
} |
|||
|
|||
static void displayData(List<House> data) { |
|||
System.out.println(" ┌──────────────────────────────────────────────────────────────┐"); |
|||
System.out.println(" │ 爬取的全部房价数据 │"); |
|||
System.out.println(" ├──────────────────────────────────────────────────────────────┤"); |
|||
|
|||
for (int i = 0; i < data.size(); i++) { |
|||
System.out.println(" " + (i+1) + ". " + data.get(i).toStringLine()); |
|||
} |
|||
|
|||
System.out.println(" └──────────────────────────────────────────────────────────────┘"); |
|||
} |
|||
|
|||
static void saveData(List<House> data) { |
|||
try (PrintWriter w = new PrintWriter("house_data.csv")) { |
|||
w.println("标题,面积(㎡),总价(万),区域,来源"); |
|||
for (House h : data) w.println(h.toCSV()); |
|||
System.out.println(" ✓ 已保存到: house_data.csv"); |
|||
} catch (Exception e) { |
|||
System.out.println(" ✗ 保存失败: " + e.getMessage()); |
|||
} |
|||
} |
|||
} |
|||
|
@ -0,0 +1,5 @@ |
|||
@echo off |
|||
cd /d "C:\Users\HHK20\Documents\trae_projects" |
|||
javac -encoding UTF-8 ChangshaHouseCrawler.java |
|||
java ChangshaHouseCrawler |
|||
pause |
|||
Loading…
Reference in new issue