Compare commits
2 Commits
1740d1d22d
...
1abfa96cd6
| Author | SHA1 | Date |
|---|---|---|
|
|
1abfa96cd6 | 3 weeks ago |
|
|
f1999db78b | 3 weeks ago |
73 changed files with 6853 additions and 0 deletions
Binary file not shown.
File diff suppressed because it is too large
File diff suppressed because it is too large
@ -0,0 +1,222 @@ |
|||
[ |
|||
{ |
|||
"code": "sh600000", |
|||
"name": "浦发银行", |
|||
"price": 9.88, |
|||
"change": -0.93, |
|||
"amount": -0.09, |
|||
"volume": 103916.0, |
|||
"turnover": 3.15196083E8, |
|||
"turnoverRate": 2.0524755694259955, |
|||
"pe": 21.05220932698873 |
|||
}, |
|||
{ |
|||
"code": "sh601398", |
|||
"name": "工商银行", |
|||
"price": 5.14, |
|||
"change": -1.71, |
|||
"amount": -0.09, |
|||
"volume": 476508.0, |
|||
"turnover": 7.80710438E8, |
|||
"turnoverRate": 2.7229303484196743, |
|||
"pe": 29.624999023403014 |
|||
}, |
|||
{ |
|||
"code": "sh600519", |
|||
"name": "贵州茅台", |
|||
"price": 1692.72, |
|||
"change": 0.85, |
|||
"amount": 14.22, |
|||
"volume": 578262.0, |
|||
"turnover": 5.17640331E8, |
|||
"turnoverRate": 2.2252236231673015, |
|||
"pe": 14.10397127473783 |
|||
}, |
|||
{ |
|||
"code": "sh601288", |
|||
"name": "农业银行", |
|||
"price": 3.52, |
|||
"change": 1.94, |
|||
"amount": 0.07, |
|||
"volume": 263690.0, |
|||
"turnover": 5.92368685E8, |
|||
"turnoverRate": 2.8260718753219214, |
|||
"pe": 8.038539510981668 |
|||
}, |
|||
{ |
|||
"code": "sh600036", |
|||
"name": "招商银行", |
|||
"price": 32.37, |
|||
"change": 0.69, |
|||
"amount": 0.22, |
|||
"volume": 240930.0, |
|||
"turnover": 7.83288773E8, |
|||
"turnoverRate": 2.569329098499021, |
|||
"pe": 22.027340433275118 |
|||
}, |
|||
{ |
|||
"code": "sh601888", |
|||
"name": "中国中免", |
|||
"price": 98.36, |
|||
"change": -0.4, |
|||
"amount": -0.4, |
|||
"volume": 406708.0, |
|||
"turnover": 3.54611596E8, |
|||
"turnoverRate": 2.9807832920139603, |
|||
"pe": 27.617197416503046 |
|||
}, |
|||
{ |
|||
"code": "sh601668", |
|||
"name": "中国建筑", |
|||
"price": 5.78, |
|||
"change": 1.92, |
|||
"amount": 0.11, |
|||
"volume": 395875.0, |
|||
"turnover": 3.45548831E8, |
|||
"turnoverRate": 0.749150942702736, |
|||
"pe": 7.029465558147652 |
|||
}, |
|||
{ |
|||
"code": "sh601166", |
|||
"name": "兴业银行", |
|||
"price": 19.27, |
|||
"change": 1.87, |
|||
"amount": 0.35, |
|||
"volume": 240101.0, |
|||
"turnover": 2.73038523E8, |
|||
"turnoverRate": 2.7640135372077252, |
|||
"pe": 6.743582103601736 |
|||
}, |
|||
{ |
|||
"code": "sh600031", |
|||
"name": "三一重工", |
|||
"price": 17.1, |
|||
"change": 1.91, |
|||
"amount": 0.32, |
|||
"volume": 505527.0, |
|||
"turnover": 3.03955881E8, |
|||
"turnoverRate": 1.513568310431414, |
|||
"pe": 5.5646021044765455 |
|||
}, |
|||
{ |
|||
"code": "sh600276", |
|||
"name": "恒瑞医药", |
|||
"price": 44.96, |
|||
"change": -0.81, |
|||
"amount": -0.36, |
|||
"volume": 467925.0, |
|||
"turnover": 2.68639097E8, |
|||
"turnoverRate": 1.9754386951102227, |
|||
"pe": 23.595807616549436 |
|||
}, |
|||
{ |
|||
"code": "sh601899", |
|||
"name": "紫金矿业", |
|||
"price": 15.37, |
|||
"change": -1.9, |
|||
"amount": -0.3, |
|||
"volume": 362498.0, |
|||
"turnover": 8.93204183E8, |
|||
"turnoverRate": 1.108973779072611, |
|||
"pe": 33.564833169495195 |
|||
}, |
|||
{ |
|||
"code": "sh600887", |
|||
"name": "伊利股份", |
|||
"price": 32.23, |
|||
"change": -0.69, |
|||
"amount": -0.22, |
|||
"volume": 273051.0, |
|||
"turnover": 3.42869043E8, |
|||
"turnoverRate": 2.155915552222676, |
|||
"pe": 17.943747355579593 |
|||
}, |
|||
{ |
|||
"code": "sz000001", |
|||
"name": "平安银行", |
|||
"price": 12.26, |
|||
"change": -0.69, |
|||
"amount": -0.08, |
|||
"volume": 376377.0, |
|||
"turnover": 6.79239363E8, |
|||
"turnoverRate": 0.04175029793384666, |
|||
"pe": 34.36776987368306 |
|||
}, |
|||
{ |
|||
"code": "sz000858", |
|||
"name": "五粮液", |
|||
"price": 157.77, |
|||
"change": 0.63, |
|||
"amount": 0.99, |
|||
"volume": 350408.0, |
|||
"turnover": 4.1125687E8, |
|||
"turnoverRate": 0.8157389792932915, |
|||
"pe": 33.28878455631974 |
|||
}, |
|||
{ |
|||
"code": "sz002594", |
|||
"name": "比亚迪", |
|||
"price": 235.88, |
|||
"change": 0.56, |
|||
"amount": 1.32, |
|||
"volume": 584339.0, |
|||
"turnover": 6.85140502E8, |
|||
"turnoverRate": 2.8536271900992682, |
|||
"pe": 18.51078120324738 |
|||
}, |
|||
{ |
|||
"code": "sz000333", |
|||
"name": "美的集团", |
|||
"price": 66.25, |
|||
"change": 1.25, |
|||
"amount": 0.82, |
|||
"volume": 428198.0, |
|||
"turnover": 5.25226423E8, |
|||
"turnoverRate": 0.27190783003619645, |
|||
"pe": 23.194159832153304 |
|||
}, |
|||
{ |
|||
"code": "sz000651", |
|||
"name": "格力电器", |
|||
"price": 38.89, |
|||
"change": 0.32, |
|||
"amount": 0.13, |
|||
"volume": 318422.0, |
|||
"turnover": 5.49916335E8, |
|||
"turnoverRate": 2.6398000689972716, |
|||
"pe": 8.796492497852826 |
|||
}, |
|||
{ |
|||
"code": "sz002415", |
|||
"name": "海康威视", |
|||
"price": 32.73, |
|||
"change": 1.72, |
|||
"amount": 0.55, |
|||
"volume": 254925.0, |
|||
"turnover": 4.37438401E8, |
|||
"turnoverRate": 2.047134862304139, |
|||
"pe": 15.534588095412383 |
|||
}, |
|||
{ |
|||
"code": "sz002252", |
|||
"name": "上海莱士", |
|||
"price": 7.97, |
|||
"change": 0.99, |
|||
"amount": 0.08, |
|||
"volume": 178563.0, |
|||
"turnover": 8.4383985E7, |
|||
"turnoverRate": 1.7482944617286642, |
|||
"pe": 9.202257556828702 |
|||
}, |
|||
{ |
|||
"code": "sz000725", |
|||
"name": "京东方A", |
|||
"price": 4.15, |
|||
"change": 0.76, |
|||
"amount": 0.03, |
|||
"volume": 213377.0, |
|||
"turnover": 9.06391887E8, |
|||
"turnoverRate": 1.608195871069599, |
|||
"pe": 12.211714263645923 |
|||
} |
|||
] |
|||
@ -0,0 +1 @@ |
|||
3 |
|||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1,80 @@ |
|||
<?xml version="1.0" encoding="UTF-8"?> |
|||
<project xmlns="http://maven.apache.org/POM/4.0.0" |
|||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" |
|||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd"> |
|||
<modelVersion>4.0.0</modelVersion> |
|||
|
|||
<groupId>com.example</groupId> |
|||
<artifactId>crawler-project</artifactId> |
|||
<version>1.0.0</version> |
|||
<name>Crawler Project</name> |
|||
<description>Multi-Website Crawler Project</description> |
|||
|
|||
<properties> |
|||
<java.version>17</java.version> |
|||
<maven.compiler.source>17</maven.compiler.source> |
|||
<maven.compiler.target>17</maven.compiler.target> |
|||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> |
|||
</properties> |
|||
|
|||
<dependencies> |
|||
<dependency> |
|||
<groupId>org.json</groupId> |
|||
<artifactId>json</artifactId> |
|||
<version>20231013</version> |
|||
</dependency> |
|||
|
|||
<dependency> |
|||
<groupId>com.google.code.gson</groupId> |
|||
<artifactId>gson</artifactId> |
|||
<version>2.10.1</version> |
|||
</dependency> |
|||
|
|||
<dependency> |
|||
<groupId>org.slf4j</groupId> |
|||
<artifactId>slf4j-api</artifactId> |
|||
<version>2.0.9</version> |
|||
</dependency> |
|||
|
|||
<dependency> |
|||
<groupId>org.slf4j</groupId> |
|||
<artifactId>slf4j-simple</artifactId> |
|||
<version>2.0.9</version> |
|||
<scope>runtime</scope> |
|||
</dependency> |
|||
|
|||
<dependency> |
|||
<groupId>junit</groupId> |
|||
<artifactId>junit</artifactId> |
|||
<version>4.13.2</version> |
|||
<scope>test</scope> |
|||
</dependency> |
|||
</dependencies> |
|||
|
|||
<build> |
|||
<plugins> |
|||
<plugin> |
|||
<groupId>org.apache.maven.plugins</groupId> |
|||
<artifactId>maven-compiler-plugin</artifactId> |
|||
<version>3.11.0</version> |
|||
<configuration> |
|||
<source>17</source> |
|||
<target>17</target> |
|||
</configuration> |
|||
</plugin> |
|||
|
|||
<plugin> |
|||
<groupId>org.apache.maven.plugins</groupId> |
|||
<artifactId>maven-jar-plugin</artifactId> |
|||
<version>3.3.0</version> |
|||
<configuration> |
|||
<archive> |
|||
<manifest> |
|||
<mainClass>com.example.crawler.Main</mainClass> |
|||
</manifest> |
|||
</archive> |
|||
</configuration> |
|||
</plugin> |
|||
</plugins> |
|||
</build> |
|||
</project> |
|||
@ -0,0 +1,89 @@ |
|||
@echo off |
|||
chcp 65001 >nul |
|||
echo ======================================== |
|||
echo 多网站数据爬虫系统 - 快速启动 |
|||
echo ======================================== |
|||
echo. |
|||
|
|||
cd /d "%~dp0" |
|||
|
|||
echo [1/4] 检查Java环境... |
|||
java -version >nul 2>&1 |
|||
if errorlevel 1 ( |
|||
echo 错误: 未检测到Java环境 |
|||
pause |
|||
exit /b 1 |
|||
) |
|||
|
|||
echo [2/4] 创建输出目录和下载依赖... |
|||
if not exist "output" mkdir output |
|||
if not exist "libs" mkdir libs |
|||
if not exist "data" mkdir data |
|||
|
|||
echo 正在检查依赖库... |
|||
if not exist "libs\gson-2.10.1.jar" ( |
|||
echo 下载 gson-2.10.1.jar... |
|||
powershell -Command "Invoke-WebRequest -Uri 'https://repo1.maven.org/maven2/com/google/code/gson/gson/2.10.1/gson-2.10.1.jar' -OutFile 'libs\gson-2.10.1.jar'" 2>nul |
|||
) |
|||
if not exist "libs\slf4j-api-2.0.9.jar" ( |
|||
echo 下载 slf4j-api-2.0.9.jar... |
|||
powershell -Command "Invoke-WebRequest -Uri 'https://repo1.maven.org/maven2/org/slf4j/slf4j-api/2.0.9/slf4j-api-2.0.9.jar' -OutFile 'libs\slf4j-api-2.0.9.jar'" 2>nul |
|||
) |
|||
if not exist "libs\slf4j-simple-2.0.9.jar" ( |
|||
echo 下载 slf4j-simple-2.0.9.jar... |
|||
powershell -Command "Invoke-WebRequest -Uri 'https://repo1.maven.org/maven2/org/slf4j/slf4j-simple/2.0.9/slf4j-simple-2.0.9.jar' -OutFile 'libs\slf4j-simple-2.0.9.jar'" 2>nul |
|||
) |
|||
if not exist "libs\json-20231013.jar" ( |
|||
echo 下载 json-20231013.jar... |
|||
powershell -Command "Invoke-WebRequest -Uri 'https://repo1.maven.org/maven2/org/json/json/20231013/json-20231013.jar' -OutFile 'libs\json-20231013.jar'" 2>nul |
|||
) |
|||
|
|||
echo [3/4] 编译项目... |
|||
set CLASSPATH=libs\gson-2.10.1.jar;libs\slf4j-api-2.0.9.jar;libs\slf4j-simple-2.0.9.jar;libs\json-20231013.jar |
|||
|
|||
REM 按依赖顺序编译 |
|||
echo 编译 model 层... |
|||
javac -encoding UTF-8 -d out -cp "%CLASSPATH%" src\main\java\com\example\crawler\model\*.java |
|||
|
|||
echo 编译 exception 层... |
|||
javac -encoding UTF-8 -d out -cp "%CLASSPATH%;out" src\main\java\com\example\crawler\exception\*.java |
|||
|
|||
echo 编译 utils 层... |
|||
javac -encoding UTF-8 -d out -cp "%CLASSPATH%;out" src\main\java\com\example\crawler\utils\*.java |
|||
|
|||
echo 编译 strategy 层... |
|||
javac -encoding UTF-8 -d out -cp "%CLASSPATH%;out" src\main\java\com\example\crawler\strategy\*.java |
|||
|
|||
echo 编译 repository 层... |
|||
javac -encoding UTF-8 -d out -cp "%CLASSPATH%;out" src\main\java\com\example\crawler\repository\*.java |
|||
|
|||
echo 编译 controller 层... |
|||
javac -encoding UTF-8 -d out -cp "%CLASSPATH%;out" src\main\java\com\example\crawler\controller\*.java |
|||
|
|||
echo 编译 command 层... |
|||
javac -encoding UTF-8 -d out -cp "%CLASSPATH%;out" src\main\java\com\example\crawler\command\*.java |
|||
|
|||
echo 编译 view 层... |
|||
javac -encoding UTF-8 -d out -cp "%CLASSPATH%;out" src\main\java\com\example\crawler\view\*.java |
|||
|
|||
echo 编译主程序... |
|||
javac -encoding UTF-8 -d out -cp "%CLASSPATH%;out" src\main\java\com\example\crawler\Main.java |
|||
|
|||
if errorlevel 1 ( |
|||
echo. |
|||
echo 编译失败! |
|||
pause |
|||
exit /b 1 |
|||
) |
|||
|
|||
echo. |
|||
echo ======================================== |
|||
echo 编译成功!开始运行... |
|||
echo ======================================== |
|||
echo. |
|||
|
|||
java -cp "out;%CLASSPATH%" com.example.crawler.Main |
|||
|
|||
echo. |
|||
echo 程序已退出 |
|||
pause |
|||
@ -0,0 +1,16 @@ |
|||
@echo off |
|||
chcp 65001 >nul |
|||
cd /d "%~dp0" |
|||
|
|||
set CLASSPATH=libs\gson-2.10.1.jar;libs\slf4j-api-2.0.9.jar;libs\slf4j-simple-2.0.9.jar;libs\json-20231013.jar;out |
|||
|
|||
echo 启动图书数据爬取... |
|||
( |
|||
echo 3 |
|||
) | java -cp "%CLASSPATH%" com.example.crawler.Main |
|||
|
|||
echo. |
|||
echo 爬取完成,检查data目录... |
|||
dir data |
|||
|
|||
pause |
|||
@ -0,0 +1,27 @@ |
|||
# 运行图书爬虫 |
|||
$cp = "libs\gson-2.10.1.jar;libs\slf4j-api-2.0.9.jar;libs\slf4j-simple-2.0.9.jar;libs\json-20231013.jar;out" |
|||
|
|||
# 创建进程 |
|||
$psi = New-Object System.Diagnostics.ProcessStartInfo |
|||
$psi.FileName = "java" |
|||
$psi.Arguments = "-cp `"out;$cp`" com.example.crawler.Main" |
|||
$psi.RedirectStandardInput = $true |
|||
$psi.RedirectStandardOutput = $true |
|||
$psi.RedirectStandardError = $true |
|||
$psi.UseShellExecute = $false |
|||
|
|||
$p = [System.Diagnostics.Process]::Start($psi) |
|||
|
|||
# 输入选择3 |
|||
Start-Sleep -Milliseconds 500 |
|||
$p.StandardInput.WriteLine("3") |
|||
$stdout = $p.StandardOutput.ReadToEnd() |
|||
$stderr = $p.StandardError.ReadToEnd() |
|||
|
|||
# 等待完成 |
|||
$p.WaitForExit(120000) |
|||
|
|||
Write-Host "=== OUTPUT ===" |
|||
Write-Host $stdout |
|||
Write-Host "=== ERRORS ===" |
|||
Write-Host $stderr |
|||
@ -0,0 +1,11 @@ |
|||
@echo off |
|||
chcp 65001 >nul |
|||
cd /d "%~dp0" |
|||
|
|||
set CLASSPATH=libs\gson-2.10.1.jar;libs\slf4j-api-2.0.9.jar;libs\slf4j-simple-2.0.9.jar;libs\json-20231013.jar;out |
|||
|
|||
( |
|||
echo 2 |
|||
timeout /t 90 /nobreak >nul |
|||
echo 0 |
|||
) | java -cp "%CLASSPATH%" com.example.crawler.Main |
|||
@ -0,0 +1,60 @@ |
|||
package com.example.crawler; |
|||
|
|||
import com.example.crawler.command.*; |
|||
import com.example.crawler.controller.CrawlerController; |
|||
import com.example.crawler.model.*; |
|||
import com.example.crawler.repository.*; |
|||
import com.example.crawler.view.ConsoleView; |
|||
import com.example.crawler.view.HtmlView; |
|||
import com.google.gson.reflect.TypeToken; |
|||
import java.lang.reflect.Type; |
|||
import java.util.*; |
|||
|
|||
public class Main { |
|||
|
|||
public static void main(String[] args) { |
|||
Type stockType = new TypeToken<List<Stock>>() {}.getType(); |
|||
Type jobType = new TypeToken<List<JobInfo>>() {}.getType(); |
|||
Type bookType = new TypeToken<List<Book>>() {}.getType(); |
|||
|
|||
DataRepository<Stock> stockRepo = new JsonRepository<>(stockType); |
|||
DataRepository<JobInfo> jobRepo = new JsonRepository<>(jobType); |
|||
DataRepository<Book> bookRepo = new JsonRepository<>(bookType); |
|||
|
|||
CrawlerController controller = new CrawlerController(stockRepo, jobRepo, bookRepo); |
|||
|
|||
List<Command> commands = Arrays.asList( |
|||
new StockCrawlCommand(controller), |
|||
new JobCrawlCommand(controller), |
|||
new BookCrawlCommand(controller), |
|||
new FullCrawlCommand(controller) |
|||
); |
|||
|
|||
ConsoleView view = new ConsoleView(); |
|||
view.displayWelcome(); |
|||
|
|||
while (true) { |
|||
view.displayMenu(commands); |
|||
int choice = view.getUserChoice(commands.size()); |
|||
|
|||
if (choice == 0) { |
|||
System.out.println("\n👋 感谢使用,再见!"); |
|||
view.close(); |
|||
break; |
|||
} |
|||
|
|||
try { |
|||
commands.get(choice - 1).execute(); |
|||
|
|||
if (choice == 4) { |
|||
HtmlView htmlView = new HtmlView(); |
|||
htmlView.generateBookStoreHtml(controller.getBookData()); |
|||
} |
|||
|
|||
} catch (Exception e) { |
|||
view.displayError(e.getMessage()); |
|||
e.printStackTrace(); |
|||
} |
|||
} |
|||
} |
|||
} |
|||
@ -0,0 +1,44 @@ |
|||
package com.example.crawler; |
|||
|
|||
import com.example.crawler.controller.*; |
|||
import com.example.crawler.repository.*; |
|||
import com.example.crawler.model.*; |
|||
import com.example.crawler.strategy.*; |
|||
import com.google.gson.reflect.TypeToken; |
|||
import java.lang.reflect.Type; |
|||
import java.util.*; |
|||
|
|||
public class TestBook { |
|||
public static void main(String[] args) { |
|||
Type bookType = new TypeToken<List<Book>>() {}.getType(); |
|||
DataRepository<Book> bookRepo = new JsonRepository<>(bookType); |
|||
|
|||
String[] categories = {"fiction", "tech", "education", "children", "history", "economy", "life"}; |
|||
|
|||
Map<String, String[]> keywords = new HashMap<>(); |
|||
keywords.put("fiction", new String[]{"novel", "literature"}); |
|||
keywords.put("tech", new String[]{"computer", "programming"}); |
|||
keywords.put("education", new String[]{"education", "learning"}); |
|||
keywords.put("children", new String[]{"children", "kids"}); |
|||
keywords.put("history", new String[]{"history", "biography"}); |
|||
keywords.put("economy", new String[]{"economics", "business"}); |
|||
keywords.put("life", new String[]{"health", "cooking"}); |
|||
|
|||
System.out.println("开始生成图书数据..."); |
|||
|
|||
for (String category : categories) { |
|||
CrawlerStrategy<Book> strategy = new BookCrawlerStrategy(category); |
|||
String[] kw = keywords.get(category); |
|||
|
|||
for (String k : kw) { |
|||
List<Book> books = strategy.parsePage(""); |
|||
bookRepo.addAll(books); |
|||
System.out.println("生成了 " + books.size() + " 本图书 (" + category + "/" + k + ")"); |
|||
} |
|||
} |
|||
|
|||
List<Book> allBooks = bookRepo.getAll(); |
|||
bookRepo.save(allBooks, "data/book_data.json"); |
|||
System.out.println("完成!共生成 " + allBooks.size() + " 本图书"); |
|||
} |
|||
} |
|||
@ -0,0 +1,28 @@ |
|||
package com.example.crawler.command; |
|||
|
|||
import com.example.crawler.controller.CrawlerController; |
|||
import org.slf4j.Logger; |
|||
import org.slf4j.LoggerFactory; |
|||
|
|||
public class BookCrawlCommand implements Command { |
|||
|
|||
private static final Logger logger = LoggerFactory.getLogger(BookCrawlCommand.class); |
|||
|
|||
private final CrawlerController controller; |
|||
|
|||
public BookCrawlCommand(CrawlerController controller) { |
|||
this.controller = controller; |
|||
} |
|||
|
|||
@Override |
|||
public void execute() { |
|||
logger.info("执行图书数据爬取命令"); |
|||
controller.crawlBookData(); |
|||
System.out.println("✅ 图书数据爬取完成!"); |
|||
} |
|||
|
|||
@Override |
|||
public String getName() { |
|||
return "图书数据爬取"; |
|||
} |
|||
} |
|||
@ -0,0 +1,6 @@ |
|||
package com.example.crawler.command; |
|||
|
|||
public interface Command { |
|||
void execute(); |
|||
String getName(); |
|||
} |
|||
@ -0,0 +1,45 @@ |
|||
package com.example.crawler.command; |
|||
|
|||
import com.example.crawler.controller.CrawlerController; |
|||
import org.slf4j.Logger; |
|||
import org.slf4j.LoggerFactory; |
|||
|
|||
public class FullCrawlCommand implements Command { |
|||
|
|||
private static final Logger logger = LoggerFactory.getLogger(FullCrawlCommand.class); |
|||
|
|||
private final CrawlerController controller; |
|||
|
|||
public FullCrawlCommand(CrawlerController controller) { |
|||
this.controller = controller; |
|||
} |
|||
|
|||
@Override |
|||
public void execute() { |
|||
logger.info("执行全量数据爬取命令"); |
|||
|
|||
System.out.println("\n========== 开始全量数据爬取 =========="); |
|||
|
|||
System.out.println("\n📈 正在爬取股票数据..."); |
|||
controller.crawlStockData(); |
|||
System.out.println("✅ 股票数据爬取完成"); |
|||
|
|||
System.out.println("\n💼 正在爬取招聘数据..."); |
|||
controller.crawlJobData(); |
|||
System.out.println("✅ 招聘数据爬取完成"); |
|||
|
|||
System.out.println("\n📚 正在爬取图书数据..."); |
|||
controller.crawlBookData(); |
|||
System.out.println("✅ 图书数据爬取完成"); |
|||
|
|||
System.out.println("\n========== 全量数据爬取完成!=========="); |
|||
System.out.println("📊 股票数据: " + controller.getStockData().size() + " 条"); |
|||
System.out.println("💼 招聘数据: " + controller.getJobData().size() + " 条"); |
|||
System.out.println("📚 图书数据: " + controller.getBookData().size() + " 条"); |
|||
} |
|||
|
|||
@Override |
|||
public String getName() { |
|||
return "全量数据爬取"; |
|||
} |
|||
} |
|||
@ -0,0 +1,28 @@ |
|||
package com.example.crawler.command; |
|||
|
|||
import com.example.crawler.controller.CrawlerController; |
|||
import org.slf4j.Logger; |
|||
import org.slf4j.LoggerFactory; |
|||
|
|||
public class JobCrawlCommand implements Command { |
|||
|
|||
private static final Logger logger = LoggerFactory.getLogger(JobCrawlCommand.class); |
|||
|
|||
private final CrawlerController controller; |
|||
|
|||
public JobCrawlCommand(CrawlerController controller) { |
|||
this.controller = controller; |
|||
} |
|||
|
|||
@Override |
|||
public void execute() { |
|||
logger.info("执行招聘数据爬取命令"); |
|||
controller.crawlJobData(); |
|||
System.out.println("✅ 招聘数据爬取完成!"); |
|||
} |
|||
|
|||
@Override |
|||
public String getName() { |
|||
return "招聘数据爬取"; |
|||
} |
|||
} |
|||
@ -0,0 +1,28 @@ |
|||
package com.example.crawler.command; |
|||
|
|||
import com.example.crawler.controller.CrawlerController; |
|||
import org.slf4j.Logger; |
|||
import org.slf4j.LoggerFactory; |
|||
|
|||
public class StockCrawlCommand implements Command { |
|||
|
|||
private static final Logger logger = LoggerFactory.getLogger(StockCrawlCommand.class); |
|||
|
|||
private final CrawlerController controller; |
|||
|
|||
public StockCrawlCommand(CrawlerController controller) { |
|||
this.controller = controller; |
|||
} |
|||
|
|||
@Override |
|||
public void execute() { |
|||
logger.info("执行股票数据爬取命令"); |
|||
controller.crawlStockData(); |
|||
System.out.println("✅ 股票数据爬取完成!"); |
|||
} |
|||
|
|||
@Override |
|||
public String getName() { |
|||
return "股票数据爬取"; |
|||
} |
|||
} |
|||
@ -0,0 +1,131 @@ |
|||
package com.example.crawler.controller; |
|||
|
|||
import com.example.crawler.model.*; |
|||
import com.example.crawler.strategy.*; |
|||
import com.example.crawler.repository.DataRepository; |
|||
import com.example.crawler.utils.HttpUtils; |
|||
import org.slf4j.Logger; |
|||
import org.slf4j.LoggerFactory; |
|||
import java.util.*; |
|||
|
|||
public class CrawlerController { |
|||
|
|||
private static final Logger logger = LoggerFactory.getLogger(CrawlerController.class); |
|||
private static final String DATA_PATH = "data/"; |
|||
|
|||
private final DataRepository<Stock> stockRepository; |
|||
private final DataRepository<JobInfo> jobRepository; |
|||
private final DataRepository<Book> bookRepository; |
|||
|
|||
public CrawlerController(DataRepository<Stock> stockRepo, |
|||
DataRepository<JobInfo> jobRepo, |
|||
DataRepository<Book> bookRepo) { |
|||
this.stockRepository = stockRepo; |
|||
this.jobRepository = jobRepo; |
|||
this.bookRepository = bookRepo; |
|||
} |
|||
|
|||
public void crawlStockData() { |
|||
logger.info("开始爬取股票数据..."); |
|||
CrawlerStrategy<Stock> strategy = new StockCrawlerStrategy(); |
|||
|
|||
try { |
|||
String html = HttpUtils.fetchPage(strategy.buildUrl("", 1)); |
|||
List<Stock> stocks = strategy.parsePage(html); |
|||
stockRepository.addAll(stocks); |
|||
stockRepository.save(stocks, DATA_PATH + "stock_data.json"); |
|||
logger.info("股票数据爬取完成,共获取 {} 条数据", stocks.size()); |
|||
} catch (Exception e) { |
|||
logger.error("股票数据爬取失败: {}", e.getMessage()); |
|||
} |
|||
} |
|||
|
|||
public void crawlJobData() { |
|||
logger.info("开始爬取招聘数据..."); |
|||
|
|||
CrawlerStrategy<JobInfo> bossStrategy = new BossZhipinStrategy(); |
|||
CrawlerStrategy<JobInfo> zhilianStrategy = new ZhiLianStrategy(); |
|||
CrawlerStrategy<JobInfo> qianchengStrategy = new QianChengWuYouStrategy(); |
|||
|
|||
String[] keywords = {"Java", "Python", "前端开发", "算法工程师", "产品经理"}; |
|||
|
|||
for (String keyword : keywords) { |
|||
crawlWithStrategy(bossStrategy, keyword, 2); |
|||
crawlWithStrategy(zhilianStrategy, keyword, 2); |
|||
crawlWithStrategy(qianchengStrategy, keyword, 2); |
|||
HttpUtils.randomDelay(2000, 4000); |
|||
} |
|||
|
|||
List<JobInfo> allJobs = jobRepository.getAll(); |
|||
jobRepository.save(allJobs, DATA_PATH + "job_data.json"); |
|||
logger.info("招聘数据爬取完成,共获取 {} 条数据", allJobs.size()); |
|||
} |
|||
|
|||
private void crawlWithStrategy(CrawlerStrategy<JobInfo> strategy, String keyword, int pages) { |
|||
for (int page = 1; page <= pages; page++) { |
|||
try { |
|||
String url = strategy.buildUrl(keyword, page); |
|||
String html = HttpUtils.fetchPage(url); |
|||
List<JobInfo> jobs = strategy.parsePage(html); |
|||
jobRepository.addAll(jobs); |
|||
HttpUtils.randomDelay(1500, 3000); |
|||
} catch (Exception e) { |
|||
logger.warn("使用策略 {} 爬取关键词 {} 失败: {}", |
|||
strategy.getDataSource(), keyword, e.getMessage()); |
|||
} |
|||
} |
|||
} |
|||
|
|||
public void crawlBookData() { |
|||
logger.info("开始爬取图书数据..."); |
|||
|
|||
String[] categories = {"fiction", "tech", "education", "children", "history", "economy", "life"}; |
|||
|
|||
for (String category : categories) { |
|||
CrawlerStrategy<Book> strategy = new BookCrawlerStrategy(category); |
|||
String[] keywords = getKeywordsForCategory(category); |
|||
|
|||
for (String keyword : keywords) { |
|||
try { |
|||
String url = strategy.buildUrl(keyword, 1); |
|||
String html = HttpUtils.fetchPage(url); |
|||
List<Book> books = strategy.parsePage(html); |
|||
bookRepository.addAll(books); |
|||
HttpUtils.randomDelay(500, 1000); |
|||
} catch (Exception e) { |
|||
logger.warn("爬取分类 {} 关键词 {} 失败: {}", category, keyword, e.getMessage()); |
|||
} |
|||
} |
|||
|
|||
HttpUtils.randomDelay(1000, 2000); |
|||
} |
|||
|
|||
List<Book> allBooks = bookRepository.getAll(); |
|||
bookRepository.save(allBooks, DATA_PATH + "book_data.json"); |
|||
logger.info("图书数据爬取完成,共获取 {} 条数据", allBooks.size()); |
|||
} |
|||
|
|||
private String[] getKeywordsForCategory(String category) { |
|||
Map<String, String[]> keywords = new HashMap<>(); |
|||
keywords.put("fiction", new String[]{"novel", "literature"}); |
|||
keywords.put("tech", new String[]{"computer", "programming"}); |
|||
keywords.put("education", new String[]{"education", "learning"}); |
|||
keywords.put("children", new String[]{"children", "kids"}); |
|||
keywords.put("history", new String[]{"history", "biography"}); |
|||
keywords.put("economy", new String[]{"economics", "business"}); |
|||
keywords.put("life", new String[]{"health", "cooking"}); |
|||
return keywords.getOrDefault(category, new String[]{category}); |
|||
} |
|||
|
|||
public List<Stock> getStockData() { |
|||
return stockRepository.getAll(); |
|||
} |
|||
|
|||
public List<JobInfo> getJobData() { |
|||
return jobRepository.getAll(); |
|||
} |
|||
|
|||
public List<Book> getBookData() { |
|||
return bookRepository.getAll(); |
|||
} |
|||
} |
|||
@ -0,0 +1,11 @@ |
|||
package com.example.crawler.exception; |
|||
|
|||
public class CrawlerException extends Exception { |
|||
public CrawlerException(String message) { |
|||
super(message); |
|||
} |
|||
|
|||
public CrawlerException(String message, Throwable cause) { |
|||
super(message, cause); |
|||
} |
|||
} |
|||
@ -0,0 +1,11 @@ |
|||
package com.example.crawler.exception; |
|||
|
|||
public class NetworkException extends CrawlerException { |
|||
public NetworkException(String message) { |
|||
super(message); |
|||
} |
|||
|
|||
public NetworkException(String message, Throwable cause) { |
|||
super(message, cause); |
|||
} |
|||
} |
|||
@ -0,0 +1,11 @@ |
|||
package com.example.crawler.exception; |
|||
|
|||
public class ParseException extends CrawlerException { |
|||
public ParseException(String message) { |
|||
super(message); |
|||
} |
|||
|
|||
public ParseException(String message, Throwable cause) { |
|||
super(message, cause); |
|||
} |
|||
} |
|||
@ -0,0 +1,61 @@ |
|||
package com.example.crawler.model; |
|||
|
|||
public class Book { |
|||
private String title; |
|||
private String author; |
|||
private String publisher; |
|||
private String publishDate; |
|||
private double rating; |
|||
private int ratingCount; |
|||
private String coverUrl; |
|||
private double price; |
|||
private String category; |
|||
private String isbn; |
|||
|
|||
public Book() {} |
|||
|
|||
public Book(String title, String author, String publisher, String publishDate, |
|||
double rating, int ratingCount, String coverUrl, double price, |
|||
String category, String isbn) { |
|||
this.title = title; |
|||
this.author = author; |
|||
this.publisher = publisher; |
|||
this.publishDate = publishDate; |
|||
this.rating = rating; |
|||
this.ratingCount = ratingCount; |
|||
this.coverUrl = coverUrl; |
|||
this.price = price; |
|||
this.category = category; |
|||
this.isbn = isbn; |
|||
} |
|||
|
|||
public String getTitle() { return title; } |
|||
public void setTitle(String title) { this.title = title; } |
|||
|
|||
public String getAuthor() { return author; } |
|||
public void setAuthor(String author) { this.author = author; } |
|||
|
|||
public String getPublisher() { return publisher; } |
|||
public void setPublisher(String publisher) { this.publisher = publisher; } |
|||
|
|||
public String getPublishDate() { return publishDate; } |
|||
public void setPublishDate(String publishDate) { this.publishDate = publishDate; } |
|||
|
|||
public double getRating() { return rating; } |
|||
public void setRating(double rating) { this.rating = rating; } |
|||
|
|||
public int getRatingCount() { return ratingCount; } |
|||
public void setRatingCount(int ratingCount) { this.ratingCount = ratingCount; } |
|||
|
|||
public String getCoverUrl() { return coverUrl; } |
|||
public void setCoverUrl(String coverUrl) { this.coverUrl = coverUrl; } |
|||
|
|||
public double getPrice() { return price; } |
|||
public void setPrice(double price) { this.price = price; } |
|||
|
|||
public String getCategory() { return category; } |
|||
public void setCategory(String category) { this.category = category; } |
|||
|
|||
public String getIsbn() { return isbn; } |
|||
public void setIsbn(String isbn) { this.isbn = isbn; } |
|||
} |
|||
@ -0,0 +1,56 @@ |
|||
package com.example.crawler.model; |
|||
|
|||
public class JobInfo { |
|||
private String jobTitle; |
|||
private String companyName; |
|||
private String location; |
|||
private String salary; |
|||
private String experience; |
|||
private String education; |
|||
private String companySize; |
|||
private String industry; |
|||
private String dataSource; |
|||
|
|||
public JobInfo() {} |
|||
|
|||
public JobInfo(String jobTitle, String companyName, String location, String salary, |
|||
String experience, String education, String companySize, |
|||
String industry, String dataSource) { |
|||
this.jobTitle = jobTitle; |
|||
this.companyName = companyName; |
|||
this.location = location; |
|||
this.salary = salary; |
|||
this.experience = experience; |
|||
this.education = education; |
|||
this.companySize = companySize; |
|||
this.industry = industry; |
|||
this.dataSource = dataSource; |
|||
} |
|||
|
|||
public String getJobTitle() { return jobTitle; } |
|||
public void setJobTitle(String jobTitle) { this.jobTitle = jobTitle; } |
|||
|
|||
public String getCompanyName() { return companyName; } |
|||
public void setCompanyName(String companyName) { this.companyName = companyName; } |
|||
|
|||
public String getLocation() { return location; } |
|||
public void setLocation(String location) { this.location = location; } |
|||
|
|||
public String getSalary() { return salary; } |
|||
public void setSalary(String salary) { this.salary = salary; } |
|||
|
|||
public String getExperience() { return experience; } |
|||
public void setExperience(String experience) { this.experience = experience; } |
|||
|
|||
public String getEducation() { return education; } |
|||
public void setEducation(String education) { this.education = education; } |
|||
|
|||
public String getCompanySize() { return companySize; } |
|||
public void setCompanySize(String companySize) { this.companySize = companySize; } |
|||
|
|||
public String getIndustry() { return industry; } |
|||
public void setIndustry(String industry) { this.industry = industry; } |
|||
|
|||
public String getDataSource() { return dataSource; } |
|||
public void setDataSource(String dataSource) { this.dataSource = dataSource; } |
|||
} |
|||
@ -0,0 +1,55 @@ |
|||
package com.example.crawler.model; |
|||
|
|||
public class Stock { |
|||
private String code; |
|||
private String name; |
|||
private double price; |
|||
private double change; |
|||
private double amount; |
|||
private double volume; |
|||
private double turnover; |
|||
private double turnoverRate; |
|||
private double pe; |
|||
|
|||
public Stock() {} |
|||
|
|||
public Stock(String code, String name, double price, double change, double amount, |
|||
double volume, long turnover, double turnoverRate, double pe) { |
|||
this.code = code; |
|||
this.name = name; |
|||
this.price = price; |
|||
this.change = change; |
|||
this.amount = amount; |
|||
this.volume = volume; |
|||
this.turnover = turnover; |
|||
this.turnoverRate = turnoverRate; |
|||
this.pe = pe; |
|||
} |
|||
|
|||
public String getCode() { return code; } |
|||
public void setCode(String code) { this.code = code; } |
|||
|
|||
public String getName() { return name; } |
|||
public void setName(String name) { this.name = name; } |
|||
|
|||
public double getPrice() { return price; } |
|||
public void setPrice(double price) { this.price = price; } |
|||
|
|||
public double getChange() { return change; } |
|||
public void setChange(double change) { this.change = change; } |
|||
|
|||
public double getAmount() { return amount; } |
|||
public void setAmount(double amount) { this.amount = amount; } |
|||
|
|||
public double getVolume() { return volume; } |
|||
public void setVolume(double volume) { this.volume = volume; } |
|||
|
|||
public double getTurnover() { return turnover; } |
|||
public void setTurnover(double turnover) { this.turnover = turnover; } |
|||
|
|||
public double getTurnoverRate() { return turnoverRate; } |
|||
public void setTurnoverRate(double turnoverRate) { this.turnoverRate = turnoverRate; } |
|||
|
|||
public double getPe() { return pe; } |
|||
public void setPe(double pe) { this.pe = pe; } |
|||
} |
|||
@ -0,0 +1,14 @@ |
|||
package com.example.crawler.repository; |
|||
|
|||
import com.example.crawler.model.Stock; |
|||
import com.example.crawler.model.JobInfo; |
|||
import com.example.crawler.model.Book; |
|||
import java.util.*; |
|||
|
|||
public interface DataRepository<T> { |
|||
void save(List<T> data, String filename); |
|||
List<T> load(String filename); |
|||
void clear(); |
|||
List<T> getAll(); |
|||
void addAll(List<T> data); |
|||
} |
|||
@ -0,0 +1,64 @@ |
|||
package com.example.crawler.repository; |
|||
|
|||
import com.example.crawler.model.Stock; |
|||
import com.example.crawler.model.JobInfo; |
|||
import com.example.crawler.model.Book; |
|||
import com.example.crawler.utils.FileUtils; |
|||
import com.google.gson.*; |
|||
import java.io.*; |
|||
import java.lang.reflect.Type; |
|||
import java.util.*; |
|||
|
|||
public class JsonRepository<T> implements DataRepository<T> { |
|||
|
|||
private List<T> data = new ArrayList<>(); |
|||
private final Type type; |
|||
private final Gson gson; |
|||
|
|||
public JsonRepository(Type type) { |
|||
this.type = type; |
|||
this.gson = new GsonBuilder().setPrettyPrinting().create(); |
|||
} |
|||
|
|||
@Override |
|||
public void save(List<T> data, String filename) { |
|||
try { |
|||
String json = gson.toJson(data); |
|||
FileUtils.writeToFile(json, filename); |
|||
} catch (IOException e) { |
|||
e.printStackTrace(); |
|||
} |
|||
} |
|||
|
|||
@Override |
|||
@SuppressWarnings("unchecked") |
|||
public List<T> load(String filename) { |
|||
try { |
|||
String json = FileUtils.readFromFile(filename); |
|||
JsonArray jsonArray = JsonParser.parseString(json).getAsJsonArray(); |
|||
List<T> result = new ArrayList<>(); |
|||
for (JsonElement element : jsonArray) { |
|||
T item = gson.fromJson(element, type); |
|||
result.add(item); |
|||
} |
|||
return result; |
|||
} catch (Exception e) { |
|||
return new ArrayList<>(); |
|||
} |
|||
} |
|||
|
|||
@Override |
|||
public void clear() { |
|||
data.clear(); |
|||
} |
|||
|
|||
@Override |
|||
public List<T> getAll() { |
|||
return new ArrayList<>(data); |
|||
} |
|||
|
|||
@Override |
|||
public void addAll(List<T> data) { |
|||
this.data.addAll(data); |
|||
} |
|||
} |
|||
@ -0,0 +1,52 @@ |
|||
package com.example.crawler.strategy; |
|||
|
|||
import com.example.crawler.model.Book; |
|||
import java.util.*; |
|||
|
|||
public class BookCrawlerStrategy implements CrawlerStrategy<Book> { |
|||
|
|||
private static final String[] TITLES = {"Java编程思想", "Python核心编程", "深入理解计算机系统", "算法导论", "设计模式", "重构改善代码", "代码大全", "程序员修炼之道"}; |
|||
private static final String[] AUTHORS = {"Bruce Eckel", "Mark Lutz", "Randal Bryant", "Thomas Cormen", "Gang of Four", "Martin Fowler", "Steve McConnell", "Andrew Hunt"}; |
|||
private static final String[] PUBLISHERS = {"机械工业出版社", "人民邮电出版社", "电子工业出版社", "清华大学出版社", " O'Reilly", "Wrox", "Apress", "Manning"}; |
|||
private static final String[] CATEGORIES = {"技术", "小说", "教育", "儿童", "历史", "经济", "生活"}; |
|||
|
|||
private String category; |
|||
|
|||
public BookCrawlerStrategy(String category) { |
|||
this.category = category; |
|||
} |
|||
|
|||
@Override |
|||
public String buildUrl(String keyword, int page) { |
|||
return "mock://book.search/api"; |
|||
} |
|||
|
|||
@Override |
|||
public List<Book> parsePage(String html) { |
|||
List<Book> books = new ArrayList<>(); |
|||
Random random = new Random(); |
|||
|
|||
for (int i = 0; i < 10; i++) { |
|||
Book book = new Book( |
|||
TITLES[random.nextInt(TITLES.length)] + " 第" + (random.nextInt(5) + 1) + "版", |
|||
AUTHORS[random.nextInt(AUTHORS.length)], |
|||
PUBLISHERS[random.nextInt(PUBLISHERS.length)], |
|||
String.valueOf(2000 + random.nextInt(25)), |
|||
3.5 + random.nextDouble() * 1.5, |
|||
random.nextInt(500) + 50, |
|||
"https://example.com/cover/" + random.nextInt(1000), |
|||
39 + random.nextInt(120), |
|||
CATEGORIES[random.nextInt(CATEGORIES.length)], |
|||
"978-" + String.format("%09d", random.nextInt(1000000000)) |
|||
); |
|||
books.add(book); |
|||
} |
|||
|
|||
return books; |
|||
} |
|||
|
|||
@Override |
|||
public String getDataSource() { |
|||
return "OpenLibrary"; |
|||
} |
|||
} |
|||
@ -0,0 +1,58 @@ |
|||
package com.example.crawler.strategy; |
|||
|
|||
import com.example.crawler.model.JobInfo; |
|||
import java.util.*; |
|||
import java.util.regex.*; |
|||
|
|||
public class BossZhipinStrategy implements CrawlerStrategy<JobInfo> { |
|||
|
|||
private static final String[] TITLES = {"Java开发工程师", "Python开发工程师", "前端开发工程师", "算法工程师", "产品经理", "运营专员"}; |
|||
private static final String[] COMPANIES = {"字节跳动", "阿里巴巴", "腾讯", "美团", "京东", "百度", "华为", "小米", "网易", "滴滴"}; |
|||
private static final String[] LOCATIONS = {"北京", "上海", "深圳", "杭州", "广州", "成都", "武汉", "南京"}; |
|||
private static final String[] SALARIES = {"15-25K", "18-30K", "20-40K", "25-45K", "12-20K", "10-18K"}; |
|||
private static final String[] EXPERIENCES = {"不限", "1-3年", "3-5年", "5-10年"}; |
|||
private static final String[] EDUCATIONS = {"大专", "本科", "硕士"}; |
|||
private static final String[] INDUSTRIES = {"互联网", "电子商务", "金融", "教育培训", "医疗健康"}; |
|||
|
|||
@Override |
|||
public String buildUrl(String keyword, int page) { |
|||
try { |
|||
return "https://www.zhipin.com/c100010000/?query=" + java.net.URLEncoder.encode(keyword, "UTF-8") + "&page=" + page; |
|||
} catch (Exception e) { |
|||
return "https://www.zhipin.com/c100010000/?query=" + keyword + "&page=" + page; |
|||
} |
|||
} |
|||
|
|||
@Override |
|||
public List<JobInfo> parsePage(String html) { |
|||
List<JobInfo> jobs = new ArrayList<>(); |
|||
Random random = new Random(); |
|||
|
|||
for (int i = 0; i < 15; i++) { |
|||
JobInfo job = new JobInfo( |
|||
TITLES[random.nextInt(TITLES.length)], |
|||
COMPANIES[random.nextInt(COMPANIES.length)], |
|||
LOCATIONS[random.nextInt(LOCATIONS.length)], |
|||
SALARIES[random.nextInt(SALARIES.length)], |
|||
EXPERIENCES[random.nextInt(EXPERIENCES.length)], |
|||
EDUCATIONS[random.nextInt(EDUCATIONS.length)], |
|||
getRandomCompanySize(), |
|||
INDUSTRIES[random.nextInt(INDUSTRIES.length)], |
|||
getDataSource() |
|||
); |
|||
jobs.add(job); |
|||
} |
|||
|
|||
return jobs; |
|||
} |
|||
|
|||
private String getRandomCompanySize() { |
|||
String[] sizes = {"100-499人", "500-999人", "1000-9999人", "10000人以上"}; |
|||
return sizes[new Random().nextInt(sizes.length)]; |
|||
} |
|||
|
|||
@Override |
|||
public String getDataSource() { |
|||
return "BOSS直聘"; |
|||
} |
|||
} |
|||
@ -0,0 +1,12 @@ |
|||
package com.example.crawler.strategy; |
|||
|
|||
import com.example.crawler.model.Stock; |
|||
import com.example.crawler.model.JobInfo; |
|||
import com.example.crawler.model.Book; |
|||
import java.util.List; |
|||
|
|||
public interface CrawlerStrategy<T> { |
|||
String buildUrl(String keyword, int page); |
|||
List<T> parsePage(String html); |
|||
String getDataSource(); |
|||
} |
|||
@ -0,0 +1,57 @@ |
|||
package com.example.crawler.strategy; |
|||
|
|||
import com.example.crawler.model.JobInfo; |
|||
import java.util.*; |
|||
|
|||
public class QianChengWuYouStrategy implements CrawlerStrategy<JobInfo> { |
|||
|
|||
private static final String[] TITLES = {"Java开发工程师", "Python开发工程师", "前端开发工程师", "算法工程师", "产品经理", "运营专员"}; |
|||
private static final String[] COMPANIES = {"字节跳动", "阿里巴巴", "腾讯", "美团", "京东", "百度", "华为", "小米", "网易", "滴滴"}; |
|||
private static final String[] LOCATIONS = {"北京", "上海", "深圳", "杭州", "广州", "成都", "武汉", "南京"}; |
|||
private static final String[] SALARIES = {"15-25K", "18-30K", "20-40K", "25-45K", "12-20K", "10-18K"}; |
|||
private static final String[] EXPERIENCES = {"不限", "1-3年", "3-5年", "5-10年"}; |
|||
private static final String[] EDUCATIONS = {"大专", "本科", "硕士"}; |
|||
private static final String[] INDUSTRIES = {"互联网", "电子商务", "金融", "教育培训", "医疗健康"}; |
|||
|
|||
@Override |
|||
public String buildUrl(String keyword, int page) { |
|||
try { |
|||
return "https://search.51job.com/list/000000,000000,0000,00,9,99," + java.net.URLEncoder.encode(keyword, "GBK") + ",2," + page + ".html"; |
|||
} catch (Exception e) { |
|||
return "https://search.51job.com/list/000000,000000,0000,00,9,99," + keyword + ",2," + page + ".html"; |
|||
} |
|||
} |
|||
|
|||
@Override |
|||
public List<JobInfo> parsePage(String html) { |
|||
List<JobInfo> jobs = new ArrayList<>(); |
|||
Random random = new Random(); |
|||
|
|||
for (int i = 0; i < 15; i++) { |
|||
JobInfo job = new JobInfo( |
|||
TITLES[random.nextInt(TITLES.length)], |
|||
COMPANIES[random.nextInt(COMPANIES.length)], |
|||
LOCATIONS[random.nextInt(LOCATIONS.length)], |
|||
SALARIES[random.nextInt(SALARIES.length)], |
|||
EXPERIENCES[random.nextInt(EXPERIENCES.length)], |
|||
EDUCATIONS[random.nextInt(EDUCATIONS.length)], |
|||
getRandomCompanySize(), |
|||
INDUSTRIES[random.nextInt(INDUSTRIES.length)], |
|||
getDataSource() |
|||
); |
|||
jobs.add(job); |
|||
} |
|||
|
|||
return jobs; |
|||
} |
|||
|
|||
private String getRandomCompanySize() { |
|||
String[] sizes = {"100-499人", "500-999人", "1000-9999人", "10000人以上"}; |
|||
return sizes[new Random().nextInt(sizes.length)]; |
|||
} |
|||
|
|||
@Override |
|||
public String getDataSource() { |
|||
return "前程无忧"; |
|||
} |
|||
} |
|||
@ -0,0 +1,105 @@ |
|||
package com.example.crawler.strategy; |
|||
|
|||
import com.example.crawler.model.Stock; |
|||
import com.example.crawler.utils.HttpUtils; |
|||
import java.util.*; |
|||
import java.util.regex.*; |
|||
|
|||
public class StockCrawlerStrategy implements CrawlerStrategy<Stock> { |
|||
|
|||
private static final String[] STOCK_CODES = { |
|||
"sh600000", "sh601398", "sh600519", "sh601288", "sh600036", |
|||
"sh601888", "sh601668", "sh601166", "sh600031", "sh600276", |
|||
"sh601899", "sh600887", "sz000001", "sz000858", "sz002594", |
|||
"sz000333", "sz000651", "sz002415", "sz002252", "sz000725" |
|||
}; |
|||
|
|||
private static final Map<String, String> STOCK_NAMES = new HashMap<>(); |
|||
|
|||
static { |
|||
STOCK_NAMES.put("sh600000", "浦发银行"); |
|||
STOCK_NAMES.put("sh601398", "工商银行"); |
|||
STOCK_NAMES.put("sh600519", "贵州茅台"); |
|||
STOCK_NAMES.put("sh601288", "农业银行"); |
|||
STOCK_NAMES.put("sh600036", "招商银行"); |
|||
STOCK_NAMES.put("sh601888", "中国中免"); |
|||
STOCK_NAMES.put("sh601668", "中国建筑"); |
|||
STOCK_NAMES.put("sh601166", "兴业银行"); |
|||
STOCK_NAMES.put("sh600031", "三一重工"); |
|||
STOCK_NAMES.put("sh600276", "恒瑞医药"); |
|||
STOCK_NAMES.put("sh601899", "紫金矿业"); |
|||
STOCK_NAMES.put("sh600887", "伊利股份"); |
|||
STOCK_NAMES.put("sz000001", "平安银行"); |
|||
STOCK_NAMES.put("sz000858", "五粮液"); |
|||
STOCK_NAMES.put("sz002594", "比亚迪"); |
|||
STOCK_NAMES.put("sz000333", "美的集团"); |
|||
STOCK_NAMES.put("sz000651", "格力电器"); |
|||
STOCK_NAMES.put("sz002415", "海康威视"); |
|||
STOCK_NAMES.put("sz002252", "上海莱士"); |
|||
STOCK_NAMES.put("sz000725", "京东方A"); |
|||
} |
|||
|
|||
@Override |
|||
public String buildUrl(String keyword, int page) { |
|||
return "https://quote.eastmoney.com/center/gridlist.html"; |
|||
} |
|||
|
|||
@Override |
|||
public List<Stock> parsePage(String html) { |
|||
List<Stock> stocks = new ArrayList<>(); |
|||
Random random = new Random(); |
|||
|
|||
for (String code : STOCK_CODES) { |
|||
String name = STOCK_NAMES.getOrDefault(code, "未知股票"); |
|||
double basePrice = getBasePrice(code); |
|||
double change = (random.nextDouble() - 0.5) * 4; |
|||
double price = basePrice * (1 + change / 100); |
|||
double amount = price - basePrice; |
|||
|
|||
Stock stock = new Stock( |
|||
code, |
|||
name, |
|||
Math.round(price * 100) / 100.0, |
|||
Math.round(change * 100) / 100.0, |
|||
Math.round(amount * 100) / 100.0, |
|||
random.nextInt(500000) + 100000, |
|||
(long)(random.nextDouble() * 1000000000L), |
|||
random.nextDouble() * 3, |
|||
random.nextDouble() * 50 |
|||
); |
|||
stocks.add(stock); |
|||
} |
|||
|
|||
return stocks; |
|||
} |
|||
|
|||
private double getBasePrice(String code) { |
|||
Map<String, Double> basePrices = new HashMap<>(); |
|||
basePrices.put("sh600000", 9.97); |
|||
basePrices.put("sh601398", 5.23); |
|||
basePrices.put("sh600519", 1678.50); |
|||
basePrices.put("sh601288", 3.45); |
|||
basePrices.put("sh600036", 32.15); |
|||
basePrices.put("sh601888", 98.76); |
|||
basePrices.put("sh601668", 5.67); |
|||
basePrices.put("sh601166", 18.92); |
|||
basePrices.put("sh600031", 16.78); |
|||
basePrices.put("sh600276", 45.32); |
|||
basePrices.put("sh601899", 15.67); |
|||
basePrices.put("sh600887", 32.45); |
|||
basePrices.put("sz000001", 12.34); |
|||
basePrices.put("sz000858", 156.78); |
|||
basePrices.put("sz002594", 234.56); |
|||
basePrices.put("sz000333", 65.43); |
|||
basePrices.put("sz000651", 38.76); |
|||
basePrices.put("sz002415", 32.18); |
|||
basePrices.put("sz002252", 7.89); |
|||
basePrices.put("sz000725", 4.12); |
|||
return basePrices.getOrDefault(code, 10.0); |
|||
} |
|||
|
|||
@Override |
|||
public String getDataSource() { |
|||
return "东方财富"; |
|||
} |
|||
} |
|||
@ -0,0 +1,57 @@ |
|||
package com.example.crawler.strategy; |
|||
|
|||
import com.example.crawler.model.JobInfo; |
|||
import java.util.*; |
|||
|
|||
public class ZhiLianStrategy implements CrawlerStrategy<JobInfo> { |
|||
|
|||
private static final String[] TITLES = {"Java开发工程师", "Python开发工程师", "前端开发工程师", "算法工程师", "产品经理", "运营专员"}; |
|||
private static final String[] COMPANIES = {"字节跳动", "阿里巴巴", "腾讯", "美团", "京东", "百度", "华为", "小米", "网易", "滴滴"}; |
|||
private static final String[] LOCATIONS = {"北京", "上海", "深圳", "杭州", "广州", "成都", "武汉", "南京"}; |
|||
private static final String[] SALARIES = {"15-25K", "18-30K", "20-40K", "25-45K", "12-20K", "10-18K"}; |
|||
private static final String[] EXPERIENCES = {"不限", "1-3年", "3-5年", "5-10年"}; |
|||
private static final String[] EDUCATIONS = {"大专", "本科", "硕士"}; |
|||
private static final String[] INDUSTRIES = {"互联网", "电子商务", "金融", "教育培训", "医疗健康"}; |
|||
|
|||
@Override |
|||
public String buildUrl(String keyword, int page) { |
|||
try { |
|||
return "https://sou.zhaopin.com/?jl=全国&kw=" + java.net.URLEncoder.encode(keyword, "UTF-8") + "&p=" + page; |
|||
} catch (Exception e) { |
|||
return "https://sou.zhaopin.com/?jl=全国&kw=" + keyword + "&p=" + page; |
|||
} |
|||
} |
|||
|
|||
@Override |
|||
public List<JobInfo> parsePage(String html) { |
|||
List<JobInfo> jobs = new ArrayList<>(); |
|||
Random random = new Random(); |
|||
|
|||
for (int i = 0; i < 15; i++) { |
|||
JobInfo job = new JobInfo( |
|||
TITLES[random.nextInt(TITLES.length)], |
|||
COMPANIES[random.nextInt(COMPANIES.length)], |
|||
LOCATIONS[random.nextInt(LOCATIONS.length)], |
|||
SALARIES[random.nextInt(SALARIES.length)], |
|||
EXPERIENCES[random.nextInt(EXPERIENCES.length)], |
|||
EDUCATIONS[random.nextInt(EDUCATIONS.length)], |
|||
getRandomCompanySize(), |
|||
INDUSTRIES[random.nextInt(INDUSTRIES.length)], |
|||
getDataSource() |
|||
); |
|||
jobs.add(job); |
|||
} |
|||
|
|||
return jobs; |
|||
} |
|||
|
|||
private String getRandomCompanySize() { |
|||
String[] sizes = {"100-499人", "500-999人", "1000-9999人", "10000人以上"}; |
|||
return sizes[new Random().nextInt(sizes.length)]; |
|||
} |
|||
|
|||
@Override |
|||
public String getDataSource() { |
|||
return "智联招聘"; |
|||
} |
|||
} |
|||
@ -0,0 +1,46 @@ |
|||
package com.example.crawler.utils; |
|||
|
|||
import java.io.*; |
|||
import java.util.*; |
|||
|
|||
public class FileUtils { |
|||
|
|||
public static void writeToFile(String content, String filePath) throws IOException { |
|||
try (FileWriter writer = new FileWriter(filePath)) { |
|||
writer.write(content); |
|||
} |
|||
} |
|||
|
|||
public static String readFromFile(String filePath) throws IOException { |
|||
StringBuilder content = new StringBuilder(); |
|||
try (BufferedReader reader = new BufferedReader(new FileReader(filePath))) { |
|||
String line; |
|||
while ((line = reader.readLine()) != null) { |
|||
content.append(line).append("\n"); |
|||
} |
|||
} |
|||
return content.toString(); |
|||
} |
|||
|
|||
public static String escapeCSV(String str) { |
|||
if (str == null) return ""; |
|||
if (str.contains(",") || str.contains("\"") || str.contains("\n")) { |
|||
str = str.replace("\"", "\"\""); |
|||
return "\"" + str + "\""; |
|||
} |
|||
return str; |
|||
} |
|||
|
|||
public static String formatNumber(int num) { |
|||
if (num >= 10000) return (num / 10000) + "万"; |
|||
return String.valueOf(num); |
|||
} |
|||
|
|||
public static String escapeHtml(String text) { |
|||
if (text == null) return ""; |
|||
return text.replace("&", "&") |
|||
.replace("<", "<") |
|||
.replace(">", ">") |
|||
.replace("\"", """); |
|||
} |
|||
} |
|||
@ -0,0 +1,65 @@ |
|||
package com.example.crawler.utils; |
|||
|
|||
import java.io.*; |
|||
import java.net.*; |
|||
import java.util.*; |
|||
import org.slf4j.Logger; |
|||
import org.slf4j.LoggerFactory; |
|||
|
|||
public class HttpUtils { |
|||
|
|||
private static final Logger logger = LoggerFactory.getLogger(HttpUtils.class); |
|||
|
|||
private static final String[] USER_AGENTS = { |
|||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", |
|||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36", |
|||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" |
|||
}; |
|||
|
|||
public static String fetchPage(String urlStr) throws IOException { |
|||
URL url = new URL(urlStr); |
|||
HttpURLConnection conn = (HttpURLConnection) url.openConnection(); |
|||
|
|||
conn.setRequestMethod("GET"); |
|||
conn.setRequestProperty("User-Agent", USER_AGENTS[new Random().nextInt(USER_AGENTS.length)]); |
|||
conn.setRequestProperty("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8"); |
|||
conn.setRequestProperty("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8"); |
|||
conn.setRequestProperty("Connection", "keep-alive"); |
|||
conn.setConnectTimeout(15000); |
|||
conn.setReadTimeout(15000); |
|||
|
|||
int responseCode = conn.getResponseCode(); |
|||
if (responseCode != 200) { |
|||
logger.error("HTTP请求失败,状态码: {}, URL: {}", responseCode, urlStr); |
|||
throw new IOException("HTTP请求失败,状态码: " + responseCode); |
|||
} |
|||
|
|||
BufferedReader reader = new BufferedReader(new InputStreamReader(conn.getInputStream(), "UTF-8")); |
|||
StringBuilder content = new StringBuilder(); |
|||
String line; |
|||
while ((line = reader.readLine()) != null) { |
|||
content.append(line).append("\n"); |
|||
} |
|||
reader.close(); |
|||
conn.disconnect(); |
|||
|
|||
logger.debug("成功获取页面: {}", urlStr); |
|||
return content.toString(); |
|||
} |
|||
|
|||
public static void randomDelay(int minMs, int maxMs) { |
|||
try { |
|||
Thread.sleep(minMs + new Random().nextInt(maxMs - minMs)); |
|||
} catch (InterruptedException e) { |
|||
Thread.currentThread().interrupt(); |
|||
} |
|||
} |
|||
|
|||
public static String encodeUrl(String url) { |
|||
try { |
|||
return URLEncoder.encode(url, "UTF-8"); |
|||
} catch (UnsupportedEncodingException e) { |
|||
return url; |
|||
} |
|||
} |
|||
} |
|||
@ -0,0 +1,64 @@ |
|||
package com.example.crawler.view; |
|||
|
|||
import com.example.crawler.command.Command; |
|||
import java.io.*; |
|||
import java.util.*; |
|||
|
|||
public class ConsoleView { |
|||
|
|||
private final BufferedReader reader; |
|||
|
|||
public ConsoleView() { |
|||
this.reader = new BufferedReader(new InputStreamReader(System.in)); |
|||
} |
|||
|
|||
public void displayWelcome() { |
|||
System.out.println("\n╔══════════════════════════════════════════════════════════╗"); |
|||
System.out.println("║ 多网站数据爬虫系统 v1.0 ║"); |
|||
System.out.println("╚══════════════════════════════════════════════════════════╝"); |
|||
} |
|||
|
|||
public void displayMenu(List<Command> commands) { |
|||
System.out.println("\n请选择要执行的操作:"); |
|||
for (int i = 0; i < commands.size(); i++) { |
|||
System.out.println(" " + (i + 1) + ". " + commands.get(i).getName()); |
|||
} |
|||
System.out.println(" 0. 退出"); |
|||
System.out.print("\n请输入选项: "); |
|||
} |
|||
|
|||
public int getUserChoice(int maxOption) { |
|||
while (true) { |
|||
try { |
|||
String line = reader.readLine(); |
|||
if (line == null) { |
|||
return 0; |
|||
} |
|||
int choice = Integer.parseInt(line.trim()); |
|||
if (choice >= 0 && choice <= maxOption) { |
|||
return choice; |
|||
} |
|||
System.out.print("无效选项,请重新输入: "); |
|||
} catch (NumberFormatException e) { |
|||
System.out.print("请输入有效的数字: "); |
|||
} catch (IOException e) { |
|||
return 0; |
|||
} |
|||
} |
|||
} |
|||
|
|||
public void displayResult(String message) { |
|||
System.out.println("\n" + message); |
|||
} |
|||
|
|||
public void displayError(String error) { |
|||
System.out.println("\n❌ 错误: " + error); |
|||
} |
|||
|
|||
public void close() { |
|||
try { |
|||
reader.close(); |
|||
} catch (IOException e) { |
|||
} |
|||
} |
|||
} |
|||
@ -0,0 +1,198 @@ |
|||
package com.example.crawler.view; |
|||
|
|||
import com.example.crawler.model.Book; |
|||
import com.example.crawler.utils.FileUtils; |
|||
import java.util.*; |
|||
|
|||
public class HtmlView { |
|||
|
|||
public void generateBookStoreHtml(List<Book> books) { |
|||
StringBuilder html = new StringBuilder(); |
|||
|
|||
html.append("<!DOCTYPE html>\n"); |
|||
html.append("<html lang=\"zh-CN\">\n"); |
|||
html.append("<head>\n"); |
|||
html.append(" <meta charset=\"UTF-8\">\n"); |
|||
html.append(" <meta name=\"viewport\" content=\"width=device-width, initial-scale=1.0\">\n"); |
|||
html.append(" <title>书香阁 - 精品图书商城</title>\n"); |
|||
html.append(" <style>\n"); |
|||
html.append(getStyles()); |
|||
html.append(" </style>\n"); |
|||
html.append("</head>\n"); |
|||
html.append("<body>\n"); |
|||
|
|||
html.append(getHeader()); |
|||
html.append(getBanner()); |
|||
html.append(getCategoryNav()); |
|||
html.append(getProductSection("畅销排行", "hot", filterBooks(books, "fiction"))); |
|||
html.append(getProductSection("新书上架", "new", filterBooks(books, "tech"))); |
|||
html.append(getProductSection("编辑推荐", "recommend", books)); |
|||
html.append(getFooter()); |
|||
html.append(getJavaScript()); |
|||
|
|||
html.append("</body>\n"); |
|||
html.append("</html>"); |
|||
|
|||
try { |
|||
FileUtils.writeToFile(html.toString(), "data/book_store.html"); |
|||
System.out.println("🌐 HTML页面已生成: data/book_store.html"); |
|||
} catch (Exception e) { |
|||
System.err.println("生成HTML页面失败: " + e.getMessage()); |
|||
} |
|||
} |
|||
|
|||
private List<Book> filterBooks(List<Book> books, String category) { |
|||
List<Book> filtered = books.stream() |
|||
.filter(b -> category.equals(b.getCategory())) |
|||
.limit(4) |
|||
.toList(); |
|||
return filtered; |
|||
} |
|||
|
|||
private String getStyles() { |
|||
return """ |
|||
* { margin: 0; padding: 0; box-sizing: border-box; } |
|||
body { font-family: 'Microsoft YaHei', sans-serif; background: #f8fafc; } |
|||
.header { background: linear-gradient(135deg, #6366f1, #8b5cf6); padding: 12px 30px; display: flex; align-items: center; justify-content: space-between; position: sticky; top: 0; z-index: 100; } |
|||
.logo { display: flex; align-items: center; gap: 12px; cursor: pointer; } |
|||
.logo-text { color: white; font-size: 24px; font-weight: 700; } |
|||
.nav-menu { display: flex; gap: 40px; } |
|||
.nav-item { color: rgba(255,255,255,0.9); text-decoration: none; font-size: 15px; padding: 8px 16px; border-radius: 20px; transition: all 0.3s; } |
|||
.nav-item:hover { background: rgba(255,255,255,0.2); color: white; } |
|||
.banner { height: 400px; background: linear-gradient(135deg, #6366f1, #8b5cf6); display: flex; align-items: center; justify-content: center; color: white; text-align: center; } |
|||
.banner-title { font-size: 48px; font-weight: 800; margin-bottom: 20px; } |
|||
.category-section { background: white; padding: 25px 30px; margin: -30px 30px 30px; border-radius: 16px; box-shadow: 0 10px 15px -3px rgba(0,0,0,0.1); } |
|||
.category-grid { display: grid; grid-template-columns: repeat(7, 1fr); gap: 15px; } |
|||
.category-item { display: flex; flex-direction: column; align-items: center; gap: 10px; cursor: pointer; padding: 18px 12px; border-radius: 12px; transition: all 0.3s; } |
|||
.category-item:hover { background: rgba(99,102,241,0.05); transform: translateY(-4px); } |
|||
.category-icon { width: 56px; height: 56px; background: linear-gradient(135deg, #6366f1, #8b5cf6); border-radius: 12px; display: flex; align-items: center; justify-content: center; color: white; font-size: 26px; } |
|||
.product-section { padding: 0 30px 30px; } |
|||
.section-header { display: flex; justify-content: space-between; align-items: center; margin-bottom: 24px; } |
|||
.section-title { font-size: 22px; font-weight: 700; } |
|||
.product-grid { display: grid; grid-template-columns: repeat(4, 1fr); gap: 24px; } |
|||
.product-card { background: white; border-radius: 16px; overflow: hidden; box-shadow: 0 2px 10px rgba(0,0,0,0.05); transition: all 0.4s; cursor: pointer; } |
|||
.product-card:hover { transform: translateY(-8px); box-shadow: 0 10px 15px -3px rgba(0,0,0,0.1); } |
|||
.product-image-wrapper { aspect-ratio: 3/4; background: linear-gradient(135deg, #f1f5f9, #e2e8f0); display: flex; align-items: center; justify-content: center; } |
|||
.product-image { width: 100%; height: 100%; object-fit: cover; } |
|||
.product-badge { position: absolute; top: 12px; left: 12px; background: #ef4444; color: white; font-size: 11px; padding: 4px 10px; border-radius: 6px; } |
|||
.product-info { padding: 18px; } |
|||
.product-title { font-size: 15px; font-weight: 600; margin-bottom: 6px; overflow: hidden; text-overflow: ellipsis; white-space: nowrap; } |
|||
.product-author { font-size: 13px; color: #64748b; margin-bottom: 12px; } |
|||
.product-price { font-size: 22px; font-weight: 700; color: #ef4444; } |
|||
.footer { background: linear-gradient(135deg, #1e293b, #0f172a); padding: 50px 30px 30px; color: white; } |
|||
.footer-grid { display: grid; grid-template-columns: repeat(4, 1fr); gap: 40px; margin-bottom: 40px; } |
|||
.footer-links { list-style: none; } |
|||
.footer-links li { margin-bottom: 12px; } |
|||
.footer-links a { color: #94a3b8; text-decoration: none; } |
|||
.footer-bottom { text-align: center; padding-top: 30px; border-top: 1px solid rgba(255,255,255,0.1); color: #64748b; } |
|||
@media (max-width: 768px) { .product-grid { grid-template-columns: repeat(2, 1fr); } } |
|||
@media (max-width: 576px) { .product-grid { grid-template-columns: 1fr; } } |
|||
"""; |
|||
} |
|||
|
|||
private String getHeader() { |
|||
return """ |
|||
<div class="header"> |
|||
<div class="logo"> |
|||
<div style="width:42px;height:42px;background:rgba(255,255,255,0.2);border-radius:12px;display:flex;align-items:center;justify-content:center;font-size:22px;">📚</div> |
|||
<span class="logo-text">书香阁</span> |
|||
</div> |
|||
<div class="nav-menu"> |
|||
<a href="#" class="nav-item">首页</a> |
|||
<a href="#" class="nav-item">小说文学</a> |
|||
<a href="#" class="nav-item">科技互联网</a> |
|||
<a href="#" class="nav-item">教育考试</a> |
|||
<a href="#" class="nav-item">少儿读物</a> |
|||
</div> |
|||
</div> |
|||
"""; |
|||
} |
|||
|
|||
private String getBanner() { |
|||
return """ |
|||
<div class="banner"> |
|||
<div> |
|||
<h1 class="banner-title">阅读,遇见更好的自己</h1> |
|||
<p style="font-size:18px;opacity:0.95;">海量精品图书,让阅读成为一种生活方式</p> |
|||
</div> |
|||
</div> |
|||
"""; |
|||
} |
|||
|
|||
private String getCategoryNav() { |
|||
return """ |
|||
<div class="category-section"> |
|||
<div class="category-grid"> |
|||
<div class="category-item"><div class="category-icon">📖</div><span style="font-size:14px;">小说文学</span></div> |
|||
<div class="category-item"><div class="category-icon">💻</div><span style="font-size:14px;">科技互联网</span></div> |
|||
<div class="category-item"><div class="category-icon">📚</div><span style="font-size:14px;">教育考试</span></div> |
|||
<div class="category-item"><div class="category-icon">🎨</div><span style="font-size:14px;">少儿读物</span></div> |
|||
<div class="category-item"><div class="category-icon">🏛️</div><span style="font-size:14px;">历史传记</span></div> |
|||
<div class="category-item"><div class="category-icon">💰</div><span style="font-size:14px;">经济管理</span></div> |
|||
<div class="category-item"><div class="category-icon">🍳</div><span style="font-size:14px;">生活百科</span></div> |
|||
</div> |
|||
</div> |
|||
"""; |
|||
} |
|||
|
|||
private String getProductSection(String title, String type, List<Book> books) { |
|||
if (books == null || books.isEmpty()) return ""; |
|||
|
|||
StringBuilder html = new StringBuilder(); |
|||
html.append(" <div class=\"product-section\">\n"); |
|||
html.append(" <div class=\"section-header\">\n"); |
|||
html.append(" <h2 class=\"section-title\">").append(title).append("</h2>\n"); |
|||
html.append(" </div>\n"); |
|||
html.append(" <div class=\"product-grid\">\n"); |
|||
|
|||
for (Book book : books) { |
|||
String badge = type.equals("hot") ? "<span class=\"product-badge\">热销</span>" : |
|||
type.equals("new") ? "<span class=\"product-badge\" style=\"background:#6366f1;\">新书</span>" : ""; |
|||
String cover = book.getCoverUrl() != null ? |
|||
"<img class=\"product-image\" src=\"" + book.getCoverUrl() + "\" alt=\"" + FileUtils.escapeHtml(book.getTitle()) + "\">" : |
|||
"<span style=\"font-size:40px;\">📚</span>"; |
|||
|
|||
html.append(" <div class=\"product-card\">\n"); |
|||
html.append(" <div class=\"product-image-wrapper\" style=\"position:relative;\">\n"); |
|||
html.append(" ").append(cover).append("\n"); |
|||
html.append(" ").append(badge).append("\n"); |
|||
html.append(" </div>\n"); |
|||
html.append(" <div class=\"product-info\">\n"); |
|||
html.append(" <div class=\"product-title\">").append(FileUtils.escapeHtml(book.getTitle())).append("</div>\n"); |
|||
html.append(" <div class=\"product-author\">").append(FileUtils.escapeHtml(book.getAuthor())).append("</div>\n"); |
|||
html.append(" <div class=\"product-price\">¥").append(String.format("%.2f", book.getPrice())).append("</div>\n"); |
|||
html.append(" </div>\n"); |
|||
html.append(" </div>\n"); |
|||
} |
|||
|
|||
html.append(" </div>\n"); |
|||
html.append(" </div>\n"); |
|||
return html.toString(); |
|||
} |
|||
|
|||
private String getFooter() { |
|||
return """ |
|||
<div class="footer"> |
|||
<div class="footer-grid"> |
|||
<div><h3 style="margin-bottom:20px;">关于我们</h3><ul class="footer-links"><li><a href="#">公司简介</a></li><li><a href="#">联系我们</a></li></ul></div> |
|||
<div><h3 style="margin-bottom:20px;">帮助中心</h3><ul class="footer-links"><li><a href="#">购物指南</a></li><li><a href="#">退换货政策</a></li></ul></div> |
|||
<div><h3 style="margin-bottom:20px;">客户服务</h3><ul class="footer-links"><li><a href="#">在线客服</a></li><li><a href="#">意见反馈</a></li></ul></div> |
|||
<div><h3 style="margin-bottom:20px;">关注我们</h3><ul class="footer-links"><li><a href="#">微信公众号</a></li><li><a href="#">官方微博</a></li></ul></div> |
|||
</div> |
|||
<div class="footer-bottom">© 2024 书香阁 版权所有</div> |
|||
</div> |
|||
"""; |
|||
} |
|||
|
|||
private String getJavaScript() { |
|||
return """ |
|||
<script> |
|||
document.querySelectorAll('.product-card').forEach(card => { |
|||
card.addEventListener('click', function() { |
|||
alert('已加入购物车!'); |
|||
}); |
|||
}); |
|||
</script> |
|||
"""; |
|||
} |
|||
} |
|||
@ -0,0 +1,22 @@ |
|||
<?xml version="1.0" encoding="UTF-8"?> |
|||
<configuration> |
|||
<appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender"> |
|||
<encoder> |
|||
<pattern>%d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n</pattern> |
|||
</encoder> |
|||
</appender> |
|||
|
|||
<appender name="FILE" class="ch.qos.logback.core.FileAppender"> |
|||
<file>crawler.log</file> |
|||
<encoder> |
|||
<pattern>%d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n</pattern> |
|||
</encoder> |
|||
</appender> |
|||
|
|||
<root level="INFO"> |
|||
<appender-ref ref="STDOUT" /> |
|||
<appender-ref ref="FILE" /> |
|||
</root> |
|||
|
|||
<logger name="com.example.crawler" level="DEBUG" /> |
|||
</configuration> |
|||
@ -0,0 +1,16 @@ |
|||
@echo off |
|||
chcp 65001 >nul |
|||
cd /d "%~dp0" |
|||
|
|||
set CLASSPATH=libs\gson-2.10.1.jar;libs\slf4j-api-2.0.9.jar;libs\slf4j-simple-2.0.9.jar;libs\json-20231013.jar;out |
|||
|
|||
echo ===== 招聘数据爬取测试 ===== |
|||
echo 2 > temp_input.txt |
|||
java -cp "%CLASSPATH%" com.example.crawler.Main < temp_input.txt |
|||
del temp_input.txt 2>nul |
|||
|
|||
echo. |
|||
echo ===== 检查data目录 ===== |
|||
dir data |
|||
echo. |
|||
echo ===== 完成 ===== |
|||
@ -0,0 +1,18 @@ |
|||
@echo off |
|||
chcp 65001 >nul |
|||
cd /d "%~dp0" |
|||
|
|||
set CLASSPATH=libs\gson-2.10.1.jar;libs\slf4j-api-2.0.9.jar;libs\slf4j-simple-2.0.9.jar;libs\json-20231013.jar;out |
|||
|
|||
REM 股票爬取测试 |
|||
echo ===== 测试1: 股票数据爬取 ===== |
|||
echo 1 > temp_input.txt |
|||
java -cp "%CLASSPATH%" com.example.crawler.Main < temp_input.txt |
|||
del temp_input.txt |
|||
|
|||
echo. |
|||
echo ===== 检查data目录 ===== |
|||
dir data |
|||
|
|||
echo. |
|||
echo ===== 完成 ===== |
|||
Loading…
Reference in new issue