You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
290 lines
9.8 KiB
290 lines
9.8 KiB
import java.util.*;
|
|
import java.io.*;
|
|
|
|
// ==================== 异常体系 ====================
|
|
class CrawlerException extends Exception {
|
|
public CrawlerException(String msg) { super(msg); }
|
|
public CrawlerException(String msg, Throwable t) { super(msg, t); }
|
|
}
|
|
|
|
class NetworkException extends CrawlerException {
|
|
public NetworkException(String msg) { super("网络异常: " + msg); }
|
|
}
|
|
|
|
class ParseException extends CrawlerException {
|
|
public ParseException(String msg) { super("解析异常: " + msg); }
|
|
}
|
|
|
|
class SaveException extends CrawlerException {
|
|
public SaveException(String msg) { super("保存异常: " + msg); }
|
|
}
|
|
|
|
// ==================== 数据模型 (Model) ====================
|
|
class HouseData {
|
|
private String title;
|
|
private double area;
|
|
private double price;
|
|
private String district;
|
|
private String source;
|
|
|
|
public HouseData(String t, double a, double p, String d, String s) {
|
|
title = t; area = a; price = p; district = d; source = s;
|
|
}
|
|
|
|
public String toCSV() {
|
|
return String.format("\"%s\",%.1f,%.1f,\"%s\",\"%s\"", title, area, price, district, source);
|
|
}
|
|
|
|
public static String getCSVHeader() {
|
|
return "标题,面积(㎡),总价(万),区域,来源";
|
|
}
|
|
|
|
public String getSource() { return source; }
|
|
}
|
|
|
|
// ==================== 策略模式 (Strategy) ====================
|
|
interface CrawlerStrategy {
|
|
String getSiteName();
|
|
List<HouseData> crawl() throws CrawlerException;
|
|
}
|
|
|
|
class LianJiaCrawler implements CrawlerStrategy {
|
|
private Random r = new Random();
|
|
private String[] dists = {"岳麓区", "雨花区", "天心区", "开福区", "芙蓉区"};
|
|
|
|
public String getSiteName() { return "链家"; }
|
|
|
|
public List<HouseData> crawl() throws CrawlerException {
|
|
List<HouseData> list = new ArrayList<>();
|
|
try {
|
|
Thread.sleep(300);
|
|
for (int i = 0; i < 10; i++) {
|
|
list.add(new HouseData("链家房源" + (i+1),
|
|
60 + r.nextDouble() * 100,
|
|
80 + r.nextDouble() * 150,
|
|
dists[r.nextInt(5)], "链家"));
|
|
}
|
|
return list;
|
|
} catch (InterruptedException e) {
|
|
throw new CrawlerException("被中断", e);
|
|
}
|
|
}
|
|
}
|
|
|
|
class AnJuKeCrawler implements CrawlerStrategy {
|
|
private Random r = new Random();
|
|
private String[] dists = {"岳麓区", "雨花区", "天心区", "开福区", "芙蓉区"};
|
|
|
|
public String getSiteName() { return "安居客"; }
|
|
|
|
public List<HouseData> crawl() throws CrawlerException {
|
|
List<HouseData> list = new ArrayList<>();
|
|
try {
|
|
Thread.sleep(250);
|
|
for (int i = 0; i < 8; i++) {
|
|
list.add(new HouseData("安居客房源" + (i+1),
|
|
70 + r.nextDouble() * 80,
|
|
70 + r.nextDouble() * 120,
|
|
dists[r.nextInt(5)], "安居客"));
|
|
}
|
|
return list;
|
|
} catch (InterruptedException e) {
|
|
throw new CrawlerException("被中断", e);
|
|
}
|
|
}
|
|
}
|
|
|
|
class BeiKeCrawler implements CrawlerStrategy {
|
|
private Random r = new Random();
|
|
private String[] dists = {"岳麓区", "雨花区", "天心区", "开福区", "芙蓉区"};
|
|
|
|
public String getSiteName() { return "贝壳"; }
|
|
|
|
public List<HouseData> crawl() throws CrawlerException {
|
|
List<HouseData> list = new ArrayList<>();
|
|
try {
|
|
Thread.sleep(350);
|
|
for (int i = 0; i < 12; i++) {
|
|
list.add(new HouseData("贝壳房源" + (i+1),
|
|
80 + r.nextDouble() * 90,
|
|
90 + r.nextDouble() * 130,
|
|
dists[r.nextInt(5)], "贝壳"));
|
|
}
|
|
return list;
|
|
} catch (InterruptedException e) {
|
|
throw new CrawlerException("被中断", e);
|
|
}
|
|
}
|
|
}
|
|
|
|
// ==================== Command模式 ====================
|
|
interface Command {
|
|
void execute();
|
|
}
|
|
|
|
class CrawlCommand implements Command {
|
|
private CrawlerStrategy strategy;
|
|
private List<HouseData> dataList;
|
|
private View view;
|
|
|
|
public CrawlCommand(CrawlerStrategy s, List<HouseData> d, View v) {
|
|
strategy = s; dataList = d; view = v;
|
|
}
|
|
|
|
public void execute() {
|
|
view.showMessage("正在爬取 " + strategy.getSiteName() + "...");
|
|
try {
|
|
List<HouseData> result = strategy.crawl();
|
|
dataList.addAll(result);
|
|
view.showSuccess(strategy.getSiteName() + ": " + result.size() + " 条");
|
|
} catch (CrawlerException e) {
|
|
view.showError(strategy.getSiteName() + " 失败: " + e.getMessage());
|
|
}
|
|
}
|
|
}
|
|
|
|
class SaveCommand implements Command {
|
|
private List<HouseData> dataList;
|
|
private View view;
|
|
|
|
public SaveCommand(List<HouseData> d, View v) { dataList = d; view = v; }
|
|
|
|
public void execute() {
|
|
view.showMessage("正在保存数据...");
|
|
try (PrintWriter w = new PrintWriter("changsha_house.csv")) {
|
|
w.println(HouseData.getCSVHeader());
|
|
for (HouseData d : dataList) w.println(d.toCSV());
|
|
view.showSuccess("已保存到 changsha_house.csv");
|
|
} catch (FileNotFoundException e) {
|
|
view.showError("保存失败: " + e.getMessage());
|
|
}
|
|
}
|
|
}
|
|
|
|
class DisplayCommand implements Command {
|
|
private List<HouseData> dataList;
|
|
private View view;
|
|
|
|
public DisplayCommand(List<HouseData> d, View v) { dataList = d; view = v; }
|
|
|
|
public void execute() {
|
|
view.displayData(dataList);
|
|
}
|
|
}
|
|
|
|
// ==================== 视图 (View) ====================
|
|
class View {
|
|
public void showWelcome() {
|
|
System.out.println("╔════════════════════════════════════╗");
|
|
System.out.println("║ 长沙房价爬虫系统 v1.0 ║");
|
|
System.out.println("╚════════════════════════════════════╝");
|
|
System.out.println();
|
|
}
|
|
|
|
public void showMenu() {
|
|
System.out.println("请选择操作:");
|
|
System.out.println("1. 爬取所有网站");
|
|
System.out.println("2. 爬取链家");
|
|
System.out.println("3. 爬取安居客");
|
|
System.out.println("4. 爬取贝壳");
|
|
System.out.println("5. 显示数据");
|
|
System.out.println("6. 保存数据");
|
|
System.out.println("0. 退出");
|
|
System.out.print("输入选项: ");
|
|
}
|
|
|
|
public void showMessage(String msg) {
|
|
System.out.println("[*] " + msg);
|
|
}
|
|
|
|
public void showSuccess(String msg) {
|
|
System.out.println("[✓] " + msg);
|
|
}
|
|
|
|
public void showError(String msg) {
|
|
System.err.println("[✗] " + msg);
|
|
}
|
|
|
|
public void displayData(List<HouseData> data) {
|
|
if (data.isEmpty()) {
|
|
System.out.println("暂无数据");
|
|
return;
|
|
}
|
|
System.out.println("\n数据预览 (" + data.size() + "条):");
|
|
System.out.println(HouseData.getCSVHeader());
|
|
int count = Math.min(5, data.size());
|
|
for (int i = 0; i < count; i++) {
|
|
System.out.println(data.get(i).toCSV());
|
|
}
|
|
if (data.size() > 5) System.out.println("... 还有 " + (data.size()-5) + " 条");
|
|
}
|
|
}
|
|
|
|
// ==================== 控制器 (Controller) ====================
|
|
class Controller {
|
|
private View view;
|
|
private List<HouseData> dataList;
|
|
private List<CrawlerStrategy> strategies;
|
|
|
|
public Controller(View v) {
|
|
view = v;
|
|
dataList = new ArrayList<>();
|
|
strategies = Arrays.asList(new LianJiaCrawler(), new AnJuKeCrawler(), new BeiKeCrawler());
|
|
}
|
|
|
|
public void run() {
|
|
view.showWelcome();
|
|
Scanner scanner = new Scanner(System.in);
|
|
while (true) {
|
|
view.showMenu();
|
|
try {
|
|
int choice = Integer.parseInt(scanner.nextLine().trim());
|
|
Command command = null;
|
|
|
|
switch (choice) {
|
|
case 1:
|
|
for (CrawlerStrategy s : strategies) {
|
|
new CrawlCommand(s, dataList, view).execute();
|
|
}
|
|
break;
|
|
case 2:
|
|
command = new CrawlCommand(new LianJiaCrawler(), dataList, view);
|
|
break;
|
|
case 3:
|
|
command = new CrawlCommand(new AnJuKeCrawler(), dataList, view);
|
|
break;
|
|
case 4:
|
|
command = new CrawlCommand(new BeiKeCrawler(), dataList, view);
|
|
break;
|
|
case 5:
|
|
command = new DisplayCommand(dataList, view);
|
|
break;
|
|
case 6:
|
|
command = new SaveCommand(dataList, view);
|
|
break;
|
|
case 0:
|
|
System.out.println("再见!");
|
|
return;
|
|
default:
|
|
view.showError("无效选项");
|
|
}
|
|
|
|
if (command != null) {
|
|
command.execute();
|
|
}
|
|
System.out.println();
|
|
} catch (NumberFormatException e) {
|
|
view.showError("请输入数字");
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// ==================== 主程序 (CLI入口) ====================
|
|
public class ChangshaHouseCrawler {
|
|
public static void main(String[] args) {
|
|
View view = new View();
|
|
Controller controller = new Controller(view);
|
|
controller.run();
|
|
}
|
|
}
|