126 changed files with 3094 additions and 0 deletions
Binary file not shown.
|
|
File diff suppressed because one or more lines are too long
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1,10 @@ |
|||
[赛后] TT 1-0 JDG:强势进攻配合行云流水,TT首局 - 亮 50回复 |
|||
[流言板] HWG!罗马诺:热刺免签伯恩茅斯中卫塞内西,签约四年 50 - 50亮 139回复 |
|||
[流言板] 詹金斯:波波教会我,投入人际关系和关心别人最重要 17 - 17亮 49回复 |
|||
[流言板] 库班:如今的NBA老板早已变成了精于算计、利益为先的投资人 40 - 40亮 118回复 |
|||
[流言板] 布朗谈尼克斯凝聚力:从布伦森开始,所有人都愿意牺牲 18 - 18亮 65回复 |
|||
[流言板] 马竞官方:只花5分钟就炮制假消息,别轻信和巴萨有关的消息 50 - 50亮 276回复 |
|||
[流言板] Amick:湖人会尝试追求字母哥,但我不认为他们能得到他 26 - 26亮 92回复 |
|||
[流言板] 外媒统计LCK常规赛观赛人数峰值Top5:T1五度上榜 27 - 27亮 59回复 |
|||
[流言板] 夏普:当裁判不再给亚历山大哨子的时候,你就看到区别了 50 - 50亮 301回复 |
|||
[流言板] 法尔克:拜仁认为左边锋备选都不如戈登,今夏或没重磅引援 28 - 28亮 64回复 |
|||
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1,6 @@ |
|||
<?xml version="1.0" encoding="UTF-8"?> |
|||
<project version="4"> |
|||
<component name="ProjectRootManager" version="2" default="true" project-jdk-name="temurin-25" project-jdk-type="JavaSDK"> |
|||
<output url="file://$PROJECT_DIR$/out" /> |
|||
</component> |
|||
</project> |
|||
@ -0,0 +1,8 @@ |
|||
<?xml version="1.0" encoding="UTF-8"?> |
|||
<project version="4"> |
|||
<component name="ProjectModuleManager"> |
|||
<modules> |
|||
<module fileurl="file://$PROJECT_DIR$/Git.iml" filepath="$PROJECT_DIR$/Git.iml" /> |
|||
</modules> |
|||
</component> |
|||
</project> |
|||
@ -0,0 +1,7 @@ |
|||
<?xml version="1.0" encoding="UTF-8"?> |
|||
<project version="4"> |
|||
<component name="VcsDirectoryMappings"> |
|||
<mapping directory="" vcs="Git" /> |
|||
<mapping directory="$PROJECT_DIR$" vcs="Git" /> |
|||
</component> |
|||
</project> |
|||
@ -0,0 +1,227 @@ |
|||
<?xml version="1.0" encoding="UTF-8"?> |
|||
<project version="4"> |
|||
<component name="AutoImportSettings"> |
|||
<option name="autoReloadType" value="SELECTIVE" /> |
|||
</component> |
|||
<component name="ChangeListManager"> |
|||
<list default="true" id="53fa3b19-db2c-4a74-b6fa-3d9bbdd897c4" name="更改" comment="温度转换"> |
|||
<change afterPath="$PROJECT_DIR$/w1/BankAccount.java" afterDir="false" /> |
|||
<change afterPath="$PROJECT_DIR$/w3/Car.java" afterDir="false" /> |
|||
</list> |
|||
<option name="SHOW_DIALOG" value="false" /> |
|||
<option name="HIGHLIGHT_CONFLICTS" value="true" /> |
|||
<option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" /> |
|||
<option name="LAST_RESOLUTION" value="IGNORE" /> |
|||
</component> |
|||
<component name="FileTemplateManagerImpl"> |
|||
<option name="RECENT_TEMPLATES"> |
|||
<list> |
|||
<option value="Class" /> |
|||
</list> |
|||
</option> |
|||
</component> |
|||
<component name="Git.Settings"> |
|||
<option name="PUSH_AUTO_UPDATE" value="true" /> |
|||
<option name="RECENT_GIT_ROOT_PATH" value="$PROJECT_DIR$" /> |
|||
</component> |
|||
<component name="ProblemsViewState"> |
|||
<option name="selectedTabId" value="CurrentFile" /> |
|||
</component> |
|||
<component name="ProjectColorInfo">{ |
|||
"associatedIndex": 2 |
|||
}</component> |
|||
<component name="ProjectId" id="3AjbM6ApUHyktWwZ0VyJwrYN7ZE" /> |
|||
<component name="ProjectViewState"> |
|||
<option name="hideEmptyMiddlePackages" value="true" /> |
|||
<option name="showLibraryContents" value="true" /> |
|||
</component> |
|||
<component name="PropertiesComponent">{ |
|||
"keyToString": { |
|||
"ModuleVcsDetector.initialDetectionPerformed": "true", |
|||
"RunOnceActivity.ShowReadmeOnStart": "true", |
|||
"RunOnceActivity.TerminalTabsStorage.copyFrom.TerminalArrangementManager.252": "true", |
|||
"RunOnceActivity.git.unshallow": "true", |
|||
"RunOnceActivity.typescript.service.memoryLimit.init": "true", |
|||
"git-widget-placeholder": "main", |
|||
"kotlin-language-version-configured": "true", |
|||
"last_opened_file_path": "D:/Git/java", |
|||
"node.js.detected.package.eslint": "true", |
|||
"node.js.detected.package.tslint": "true", |
|||
"node.js.selected.package.eslint": "(autodetect)", |
|||
"node.js.selected.package.tslint": "(autodetect)", |
|||
"nodejs_package_manager_path": "npm", |
|||
"onboarding.tips.debug.path": "D:/Git/java/w2/src/Main.java", |
|||
"project.structure.last.edited": "模块", |
|||
"project.structure.proportion": "0.0", |
|||
"project.structure.side.proportion": "0.2", |
|||
"run.code.analysis.last.selected.profile": "pProject Default", |
|||
"settings.editor.selected.configurable": "configurable.group.language", |
|||
"vue.rearranger.settings.migration": "true", |
|||
"应用程序.DataCleaner.executor": "Run", |
|||
"应用程序.TemperatureConverter.executor": "Run" |
|||
} |
|||
}</component> |
|||
<component name="RunManager"> |
|||
<configuration name="TemperatureConverter" type="Application" factoryName="Application" temporary="true" nameIsGenerated="true"> |
|||
<option name="MAIN_CLASS_NAME" value="W1_wangjiashuo_202506050214.TemperatureConverter" /> |
|||
<module name="Git" /> |
|||
<extension name="coverage"> |
|||
<pattern> |
|||
<option name="PATTERN" value="W1_wangjiashuo_202506050214.*" /> |
|||
<option name="ENABLED" value="true" /> |
|||
</pattern> |
|||
</extension> |
|||
<method v="2"> |
|||
<option name="Make" enabled="true" /> |
|||
</method> |
|||
</configuration> |
|||
<recent_temporary> |
|||
<list> |
|||
<item itemvalue="应用程序.TemperatureConverter" /> |
|||
</list> |
|||
</recent_temporary> |
|||
</component> |
|||
<component name="SharedIndexes"> |
|||
<attachedChunks> |
|||
<set> |
|||
<option value="bundled-jdk-30f59d01ecdd-2fc7cc6b9a17-intellij.indexing.shared.core-IU-253.31033.145" /> |
|||
<option value="bundled-js-predefined-d6986cc7102b-9b0f141eb926-JavaScript-IU-253.31033.145" /> |
|||
</set> |
|||
</attachedChunks> |
|||
</component> |
|||
<component name="TaskManager"> |
|||
<task active="true" id="Default" summary="默认任务"> |
|||
<changelist id="53fa3b19-db2c-4a74-b6fa-3d9bbdd897c4" name="更改" comment="" /> |
|||
<created>1773108875384</created> |
|||
<option name="number" value="Default" /> |
|||
<option name="presentableId" value="Default" /> |
|||
<updated>1773108875384</updated> |
|||
<workItem from="1773108876484" duration="4766000" /> |
|||
<workItem from="1773118929508" duration="559000" /> |
|||
<workItem from="1773642819333" duration="3918000" /> |
|||
<workItem from="1774197968374" duration="611000" /> |
|||
</task> |
|||
<task id="LOCAL-00001" summary="温度转换"> |
|||
<option name="closed" value="true" /> |
|||
<created>1773110779232</created> |
|||
<option name="number" value="00001" /> |
|||
<option name="presentableId" value="LOCAL-00001" /> |
|||
<option name="project" value="LOCAL" /> |
|||
<updated>1773110779232</updated> |
|||
</task> |
|||
<task id="LOCAL-00002" summary="数据清理"> |
|||
<option name="closed" value="true" /> |
|||
<created>1773115854381</created> |
|||
<option name="number" value="00002" /> |
|||
<option name="presentableId" value="LOCAL-00002" /> |
|||
<option name="project" value="LOCAL" /> |
|||
<updated>1773115854381</updated> |
|||
</task> |
|||
<task id="LOCAL-00003" summary="Merge remote-tracking branch 'origin/main' # Conflicts: #	w1/W1_wangjiashuo_202506050214/5920b4a20b1f1dd17cdbfd8b4df6dfdb.png"> |
|||
<option name="closed" value="true" /> |
|||
<created>1773116066379</created> |
|||
<option name="number" value="00003" /> |
|||
<option name="presentableId" value="LOCAL-00003" /> |
|||
<option name="project" value="LOCAL" /> |
|||
<updated>1773116066379</updated> |
|||
</task> |
|||
<task id="LOCAL-00004" summary="温度转换"> |
|||
<option name="closed" value="true" /> |
|||
<created>1773116392968</created> |
|||
<option name="number" value="00004" /> |
|||
<option name="presentableId" value="LOCAL-00004" /> |
|||
<option name="project" value="LOCAL" /> |
|||
<updated>1773116392968</updated> |
|||
</task> |
|||
<task id="LOCAL-00005" summary="温度转换"> |
|||
<option name="closed" value="true" /> |
|||
<created>1773116429936</created> |
|||
<option name="number" value="00005" /> |
|||
<option name="presentableId" value="LOCAL-00005" /> |
|||
<option name="project" value="LOCAL" /> |
|||
<updated>1773116429936</updated> |
|||
</task> |
|||
<task id="LOCAL-00006" summary="数据清理"> |
|||
<option name="closed" value="true" /> |
|||
<created>1773116444802</created> |
|||
<option name="number" value="00006" /> |
|||
<option name="presentableId" value="LOCAL-00006" /> |
|||
<option name="project" value="LOCAL" /> |
|||
<updated>1773116444802</updated> |
|||
</task> |
|||
<task id="LOCAL-00007" summary="数据清理"> |
|||
<option name="closed" value="true" /> |
|||
<created>1773116468761</created> |
|||
<option name="number" value="00007" /> |
|||
<option name="presentableId" value="LOCAL-00007" /> |
|||
<option name="project" value="LOCAL" /> |
|||
<updated>1773116468761</updated> |
|||
</task> |
|||
<task id="LOCAL-00008" summary="数据清理"> |
|||
<option name="closed" value="true" /> |
|||
<created>1773116712960</created> |
|||
<option name="number" value="00008" /> |
|||
<option name="presentableId" value="LOCAL-00008" /> |
|||
<option name="project" value="LOCAL" /> |
|||
<updated>1773116712960</updated> |
|||
</task> |
|||
<task id="LOCAL-00009" summary="温度转换"> |
|||
<option name="closed" value="true" /> |
|||
<created>1773116781316</created> |
|||
<option name="number" value="00009" /> |
|||
<option name="presentableId" value="LOCAL-00009" /> |
|||
<option name="project" value="LOCAL" /> |
|||
<updated>1773116781316</updated> |
|||
</task> |
|||
<task id="LOCAL-00010" summary="温度转换"> |
|||
<option name="closed" value="true" /> |
|||
<created>1773116897047</created> |
|||
<option name="number" value="00010" /> |
|||
<option name="presentableId" value="LOCAL-00010" /> |
|||
<option name="project" value="LOCAL" /> |
|||
<updated>1773116897047</updated> |
|||
</task> |
|||
<task id="LOCAL-00011" summary="温度转换"> |
|||
<option name="closed" value="true" /> |
|||
<created>1773116945283</created> |
|||
<option name="number" value="00011" /> |
|||
<option name="presentableId" value="LOCAL-00011" /> |
|||
<option name="project" value="LOCAL" /> |
|||
<updated>1773116945283</updated> |
|||
</task> |
|||
<option name="localTasksCounter" value="12" /> |
|||
<servers /> |
|||
</component> |
|||
<component name="TypeScriptGeneratedFilesManager"> |
|||
<option name="version" value="3" /> |
|||
</component> |
|||
<component name="Vcs.Log.Tabs.Properties"> |
|||
<option name="TAB_STATES"> |
|||
<map> |
|||
<entry key="MAIN"> |
|||
<value> |
|||
<State /> |
|||
</value> |
|||
</entry> |
|||
</map> |
|||
</option> |
|||
</component> |
|||
<component name="VcsManagerConfiguration"> |
|||
<MESSAGE value="g" /> |
|||
<MESSAGE value="Merge remote-tracking branch 'origin/main' # Conflicts: #	w1/W1_wangjiashuo_202506050214/5920b4a20b1f1dd17cdbfd8b4df6dfdb.png" /> |
|||
<MESSAGE value="数据清理" /> |
|||
<MESSAGE value="温度转换" /> |
|||
<option name="LAST_COMMIT_MESSAGE" value="温度转换" /> |
|||
</component> |
|||
<component name="XDebuggerManager"> |
|||
<breakpoint-manager> |
|||
<breakpoints> |
|||
<line-breakpoint enabled="true" type="java-line"> |
|||
<url>file://$PROJECT_DIR$/w2/src/Main.java</url> |
|||
<line>10</line> |
|||
<option name="timeStamp" value="1" /> |
|||
</line-breakpoint> |
|||
</breakpoints> |
|||
</breakpoint-manager> |
|||
</component> |
|||
</project> |
|||
Binary file not shown.
Binary file not shown.
@ -0,0 +1,14 @@ |
|||
<?xml version="1.0" encoding="UTF-8"?> |
|||
<module type="JAVA_MODULE" version="4"> |
|||
<component name="NewModuleRootManager" inherit-compiler-output="true"> |
|||
<exclude-output /> |
|||
<content url="file://$MODULE_DIR$"> |
|||
<sourceFolder url="file://$MODULE_DIR$/w1" isTestSource="false" /> |
|||
<sourceFolder url="file://$MODULE_DIR$/w2" isTestSource="false" /> |
|||
<sourceFolder url="file://$MODULE_DIR$" isTestSource="false" /> |
|||
<sourceFolder url="file://$MODULE_DIR$/w3" isTestSource="false" /> |
|||
</content> |
|||
<orderEntry type="inheritedJdk" /> |
|||
<orderEntry type="sourceFolder" forTests="false" /> |
|||
</component> |
|||
</module> |
|||
Binary file not shown.
@ -0,0 +1,11 @@ |
|||
实验目的 |
|||
|
|||
1.掌握 Java 封装思想,使用 private 修饰属性,通过 getter/setter 访问。 |
|||
|
|||
2.练习构造方法重载与 this() 调用。 |
|||
|
|||
3.学会在 setter 和业务方法中做数据合法性校验。 |
|||
|
|||
4.练习静态变量与静态方法实现全局统计。 |
|||
|
|||
5.编写测试类验证类功能。 |
|||
Binary file not shown.
|
After Width: | Height: | Size: 52 KiB |
@ -0,0 +1,6 @@ |
|||
使用prompt: |
|||
·「将Python温度转换程序移植为Java,保留原功能和注释,要求代码规范,添加中文文档注释」 |
|||
·「为Java温度转换器增加命令行参数模式,兼容原交互模式」 |
|||
·「Java中如何实现字符串分割、浮点型解析和异常捕获,适配温度转换场景」 |
|||
·AI协助完成Python到Java的语法映射、Scanner控制台输入实现、异常处理逻辑优化 |
|||
·指导了printf格式化输出和命令行参数args的处理方式,最终自行整合代码并完成功能测试与注释完善 |
|||
@ -0,0 +1,16 @@ |
|||
\# 温度转换器(Java版) |
|||
|
|||
基于Python原版移植,支持摄氏度(C)和华氏度(F)互转,新增\*\*命令行参数模式\*\*(加分项)。 |
|||
|
|||
|
|||
|
|||
\## 编译与运行命令 |
|||
|
|||
\### 1. 编译源码 |
|||
|
|||
```bash |
|||
|
|||
javac TemperatureConverter.java |
|||
|
|||
|
|||
|
|||
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
After Width: | Height: | Size: 52 KiB |
@ -0,0 +1,6 @@ |
|||
使用prompt: |
|||
·「将Python温度转换程序移植为Java,保留原功能和注释,要求代码规范,添加中文文档注释」 |
|||
·「为Java温度转换器增加命令行参数模式,兼容原交互模式」 |
|||
·「Java中如何实现字符串分割、浮点型解析和异常捕获,适配温度转换场景」 |
|||
·AI协助完成Python到Java的语法映射、Scanner控制台输入实现、异常处理逻辑优化 |
|||
·指导了printf格式化输出和命令行参数args的处理方式,最终自行整合代码并完成功能测试与注释完善 |
|||
@ -0,0 +1,16 @@ |
|||
\# 温度转换器(Java版) |
|||
|
|||
基于Python原版移植,支持摄氏度(C)和华氏度(F)互转,新增\*\*命令行参数模式\*\*(加分项)。 |
|||
|
|||
|
|||
|
|||
\## 编译与运行命令 |
|||
|
|||
\### 1. 编译源码 |
|||
|
|||
```bash |
|||
|
|||
javac TemperatureConverter.java |
|||
|
|||
|
|||
|
|||
@ -0,0 +1,11 @@ |
|||
实验目的 |
|||
|
|||
1.掌握 Java 封装思想,使用 private 修饰属性,通过 getter/setter 访问。 |
|||
|
|||
2.练习构造方法重载与 this() 调用。 |
|||
|
|||
3.学会在 setter 和业务方法中做数据合法性校验。 |
|||
|
|||
4.练习静态变量与静态方法实现全局统计。 |
|||
|
|||
5.编写测试类验证类功能。 |
|||
@ -0,0 +1,75 @@ |
|||
<?xml version="1.0" encoding="UTF-8"?> |
|||
<project xmlns="http://maven.apache.org/POM/4.0.0" |
|||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" |
|||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> |
|||
<modelVersion>4.0.0</modelVersion> |
|||
|
|||
<groupId>com.example</groupId> |
|||
<artifactId>hupu-crawler</artifactId> |
|||
<version>1.0-SNAPSHOT</version> |
|||
<packaging>jar</packaging> |
|||
|
|||
<name>Hupu Crawler</name> |
|||
<description>多平台热搜爬虫系统</description> |
|||
|
|||
<properties> |
|||
<maven.compiler.source>8</maven.compiler.source> |
|||
<maven.compiler.target>8</maven.compiler.target> |
|||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> |
|||
<exec.mainClass>controller.CrawlerController</exec.mainClass> |
|||
</properties> |
|||
|
|||
<build> |
|||
<sourceDirectory>src/main/java</sourceDirectory> |
|||
<plugins> |
|||
<plugin> |
|||
<groupId>org.codehaus.mojo</groupId> |
|||
<artifactId>exec-maven-plugin</artifactId> |
|||
<version>3.1.0</version> |
|||
<configuration> |
|||
<mainClass>${exec.mainClass}</mainClass> |
|||
</configuration> |
|||
</plugin> |
|||
<plugin> |
|||
<groupId>org.apache.maven.plugins</groupId> |
|||
<artifactId>maven-jar-plugin</artifactId> |
|||
<version>3.3.0</version> |
|||
<configuration> |
|||
<archive> |
|||
<manifest> |
|||
<mainClass>${exec.mainClass}</mainClass> |
|||
<addClasspath>true</addClasspath> |
|||
<classpathPrefix>lib/</classpathPrefix> |
|||
</manifest> |
|||
</archive> |
|||
</configuration> |
|||
</plugin> |
|||
<plugin> |
|||
<groupId>org.apache.maven.plugins</groupId> |
|||
<artifactId>maven-dependency-plugin</artifactId> |
|||
<version>3.6.1</version> |
|||
<executions> |
|||
<execution> |
|||
<id>copy-dependencies</id> |
|||
<phase>package</phase> |
|||
<goals> |
|||
<goal>copy-dependencies</goal> |
|||
</goals> |
|||
<configuration> |
|||
<outputDirectory>${project.build.directory}/lib</outputDirectory> |
|||
</configuration> |
|||
</execution> |
|||
</executions> |
|||
</plugin> |
|||
</plugins> |
|||
</build> |
|||
|
|||
<dependencies> |
|||
<dependency> |
|||
<groupId>org.jsoup</groupId> |
|||
<artifactId>jsoup</artifactId> |
|||
<version>1.17.2</version> |
|||
</dependency> |
|||
</dependencies> |
|||
|
|||
</project> |
|||
Binary file not shown.
Binary file not shown.
@ -0,0 +1,37 @@ |
|||
package command; |
|||
|
|||
public abstract class AbstractCommand implements Command { |
|||
protected String name; |
|||
protected String description; |
|||
protected boolean executed; |
|||
|
|||
public AbstractCommand(String name, String description) { |
|||
this.name = name; |
|||
this.description = description; |
|||
this.executed = false; |
|||
} |
|||
|
|||
@Override |
|||
public void undo() { |
|||
if (executed) { |
|||
performUndo(); |
|||
executed = false; |
|||
} |
|||
} |
|||
|
|||
@Override |
|||
public String getCommandName() { |
|||
return name; |
|||
} |
|||
|
|||
@Override |
|||
public String getDescription() { |
|||
return description; |
|||
} |
|||
|
|||
protected abstract void performUndo(); |
|||
|
|||
public boolean isExecuted() { |
|||
return executed; |
|||
} |
|||
} |
|||
@ -0,0 +1,42 @@ |
|||
package command; |
|||
|
|||
import model.ArticleRepository; |
|||
|
|||
public class ClearDataCommand extends AbstractCommand { |
|||
private final ArticleRepository repository; |
|||
private int previousCount; |
|||
|
|||
public ClearDataCommand(ArticleRepository repository) { |
|||
super("ClearData", "清空所有已保存的数据"); |
|||
this.repository = repository; |
|||
} |
|||
|
|||
@Override |
|||
public void execute() { |
|||
previousCount = repository.count(); |
|||
repository.clear(); |
|||
|
|||
System.out.println("╔══════════════════════════════════════════╗"); |
|||
System.out.println("║ 数据清空成功 ║"); |
|||
System.out.println("╠══════════════════════════════════════════╣"); |
|||
System.out.println("║ 已清空 " + previousCount + " 条数据 ║"); |
|||
System.out.println("╚══════════════════════════════════════════╝"); |
|||
|
|||
this.executed = true; |
|||
} |
|||
|
|||
@Override |
|||
protected void performUndo() { |
|||
System.out.println("清空命令已执行,无法撤销"); |
|||
System.out.println("请手动重新爬取数据"); |
|||
} |
|||
|
|||
@Override |
|||
public boolean isExecuted() { |
|||
return executed; |
|||
} |
|||
|
|||
public int getPreviousCount() { |
|||
return previousCount; |
|||
} |
|||
} |
|||
@ -0,0 +1,9 @@ |
|||
package command; |
|||
|
|||
public interface Command { |
|||
void execute(); |
|||
void undo(); |
|||
String getCommandName(); |
|||
String getDescription(); |
|||
boolean isExecuted(); |
|||
} |
|||
@ -0,0 +1,85 @@ |
|||
package command; |
|||
|
|||
import java.util.ArrayList; |
|||
import java.util.List; |
|||
import java.util.Stack; |
|||
|
|||
public class CommandManager { |
|||
private final List<Command> commands; |
|||
private final Stack<Command> executedCommands; |
|||
private final Stack<Command> undoneCommands; |
|||
|
|||
public CommandManager() { |
|||
this.commands = new ArrayList<>(); |
|||
this.executedCommands = new Stack<>(); |
|||
this.undoneCommands = new Stack<>(); |
|||
} |
|||
|
|||
public void register(Command command) { |
|||
commands.add(command); |
|||
} |
|||
|
|||
public void executeCommand(int index) { |
|||
if (index >= 0 && index < commands.size()) { |
|||
Command command = commands.get(index); |
|||
command.execute(); |
|||
if (command.isExecuted()) { |
|||
executedCommands.push(command); |
|||
undoneCommands.clear(); |
|||
} |
|||
} |
|||
} |
|||
|
|||
public void executeCommand(Command command) { |
|||
command.execute(); |
|||
if (command.isExecuted()) { |
|||
executedCommands.push(command); |
|||
undoneCommands.clear(); |
|||
} |
|||
} |
|||
|
|||
public void undo() { |
|||
if (!executedCommands.isEmpty()) { |
|||
Command command = executedCommands.pop(); |
|||
command.undo(); |
|||
undoneCommands.push(command); |
|||
System.out.println("↩ 已撤销命令: " + command.getCommandName()); |
|||
} else { |
|||
System.out.println("没有可撤销的命令"); |
|||
} |
|||
} |
|||
|
|||
public void redo() { |
|||
if (!undoneCommands.isEmpty()) { |
|||
Command command = undoneCommands.pop(); |
|||
command.execute(); |
|||
if (command.isExecuted()) { |
|||
executedCommands.push(command); |
|||
} |
|||
System.out.println("↪ 已重做命令: " + command.getCommandName()); |
|||
} else { |
|||
System.out.println("没有可重做的命令"); |
|||
} |
|||
} |
|||
|
|||
public List<Command> getCommands() { |
|||
return new ArrayList<>(commands); |
|||
} |
|||
|
|||
public int getCommandCount() { |
|||
return commands.size(); |
|||
} |
|||
|
|||
public boolean canUndo() { |
|||
return !executedCommands.isEmpty(); |
|||
} |
|||
|
|||
public boolean canRedo() { |
|||
return !undoneCommands.isEmpty(); |
|||
} |
|||
|
|||
public void clearHistory() { |
|||
executedCommands.clear(); |
|||
undoneCommands.clear(); |
|||
} |
|||
} |
|||
@ -0,0 +1,59 @@ |
|||
package command; |
|||
|
|||
import crawler.BaseCrawler; |
|||
import model.Article; |
|||
import model.ArticleRepository; |
|||
import exception.ExceptionHandler; |
|||
import java.util.List; |
|||
|
|||
public class CrawlCommand extends AbstractCommand { |
|||
private final BaseCrawler crawler; |
|||
private final ArticleRepository repository; |
|||
private List<Article> savedArticles; |
|||
|
|||
public CrawlCommand(BaseCrawler crawler, ArticleRepository repository) { |
|||
super("CrawlCommand-" + crawler.getSiteName(), "爬取 " + crawler.getSiteName() + " 数据"); |
|||
this.crawler = crawler; |
|||
this.repository = repository; |
|||
} |
|||
|
|||
@Override |
|||
public void execute() { |
|||
try { |
|||
System.out.println("► 执行命令: " + getDescription()); |
|||
|
|||
crawler.startCrawling(); |
|||
|
|||
savedArticles = repository.findBySource(crawler.getSiteName()); |
|||
|
|||
System.out.println("✓ 命令执行成功"); |
|||
this.executed = true; |
|||
|
|||
} catch (Exception e) { |
|||
ExceptionHandler.getInstance().handle(e); |
|||
this.executed = false; |
|||
} |
|||
} |
|||
|
|||
@Override |
|||
protected void performUndo() { |
|||
if (savedArticles != null && !savedArticles.isEmpty()) { |
|||
for (Article article : savedArticles) { |
|||
System.out.println(" ↩ 撤销保存: " + article.getTitle()); |
|||
} |
|||
} |
|||
} |
|||
|
|||
@Override |
|||
public boolean isExecuted() { |
|||
return executed; |
|||
} |
|||
|
|||
public BaseCrawler getCrawler() { |
|||
return crawler; |
|||
} |
|||
|
|||
public List<Article> getSavedArticles() { |
|||
return savedArticles; |
|||
} |
|||
} |
|||
@ -0,0 +1,47 @@ |
|||
package command; |
|||
|
|||
public class HelpCommand extends AbstractCommand { |
|||
public HelpCommand() { |
|||
super("Help", "显示帮助信息"); |
|||
} |
|||
|
|||
@Override |
|||
public void execute() { |
|||
System.out.println("╔═══════════════════════════════════════════════════════════════╗"); |
|||
System.out.println("║ 爬虫程序使用帮助 ║"); |
|||
System.out.println("╠═══════════════════════════════════════════════════════════════╣"); |
|||
System.out.println("║ 架构模式: ║"); |
|||
System.out.println("║ • CLI - 命令行交互界面 ║"); |
|||
System.out.println("║ • MVC - 模型-视图-控制器架构 ║"); |
|||
System.out.println("║ • Command - 命令模式 ║"); |
|||
System.out.println("║ • Strategy - 策略模式 ║"); |
|||
System.out.println("║ • Exception - 异常体系 ║"); |
|||
System.out.println("╠═══════════════════════════════════════════════════════════════╣"); |
|||
System.out.println("║ 命令说明: ║"); |
|||
System.out.println("║ 1. 虎扑热榜 - 爬取虎扑社区热搜和热门帖子 ║"); |
|||
System.out.println("║ 2. 百度热搜 - 爬取百度热搜排行榜 ║"); |
|||
System.out.println("║ 3. 微博热搜 - 爬取微博热搜话题 ║"); |
|||
System.out.println("║ 4. 查看文章 - 显示所有已保存的文章 ║"); |
|||
System.out.println("║ 5. 清空数据 - 删除所有已保存的文章 ║"); |
|||
System.out.println("║ 6. 帮助 - 显示本帮助信息 ║"); |
|||
System.out.println("║ 0. 退出 - 退出程序 ║"); |
|||
System.out.println("╠═══════════════════════════════════════════════════════════════╣"); |
|||
System.out.println("║ 快捷键: ║"); |
|||
System.out.println("║ U - 撤销上一步操作 ║"); |
|||
System.out.println("║ R - 重做已撤销的操作 ║"); |
|||
System.out.println("║ H - 显示帮助信息 ║"); |
|||
System.out.println("╚═══════════════════════════════════════════════════════════════╝"); |
|||
|
|||
this.executed = true; |
|||
} |
|||
|
|||
@Override |
|||
protected void performUndo() { |
|||
System.out.println("帮助命令无需撤销"); |
|||
} |
|||
|
|||
@Override |
|||
public boolean isExecuted() { |
|||
return executed; |
|||
} |
|||
} |
|||
@ -0,0 +1,54 @@ |
|||
package command; |
|||
|
|||
import model.Article; |
|||
import model.ArticleRepository; |
|||
import java.util.List; |
|||
|
|||
public class ViewArticlesCommand extends AbstractCommand { |
|||
private final ArticleRepository repository; |
|||
private List<Article> articles; |
|||
|
|||
public ViewArticlesCommand(ArticleRepository repository) { |
|||
super("ViewArticles", "查看所有已保存的文章"); |
|||
this.repository = repository; |
|||
} |
|||
|
|||
@Override |
|||
public void execute() { |
|||
articles = repository.findAll(); |
|||
|
|||
System.out.println("╔══════════════════════════════════════════╗"); |
|||
System.out.println("║ 已保存的文章列表 ║"); |
|||
System.out.println("╠══════════════════════════════════════════╣"); |
|||
System.out.println("║ 共找到 " + articles.size() + " 篇文章 ║"); |
|||
System.out.println("╚══════════════════════════════════════════╝"); |
|||
|
|||
if (articles.isEmpty()) { |
|||
System.out.println("暂无保存的文章"); |
|||
} else { |
|||
for (int i = 0; i < articles.size(); i++) { |
|||
Article article = articles.get(i); |
|||
System.out.println("\n[" + (i + 1) + "] " + article.getTitle()); |
|||
System.out.println(" 来源: " + article.getSource()); |
|||
System.out.println(" 时间: " + article.getCrawlTime()); |
|||
System.out.println(" 策略: " + article.getStrategy()); |
|||
} |
|||
} |
|||
|
|||
this.executed = true; |
|||
} |
|||
|
|||
@Override |
|||
protected void performUndo() { |
|||
System.out.println("查看命令无需撤销"); |
|||
} |
|||
|
|||
@Override |
|||
public boolean isExecuted() { |
|||
return executed; |
|||
} |
|||
|
|||
public List<Article> getArticles() { |
|||
return articles; |
|||
} |
|||
} |
|||
@ -0,0 +1,154 @@ |
|||
package controller; |
|||
|
|||
import command.Command; |
|||
import command.CommandManager; |
|||
import command.CrawlCommand; |
|||
import command.ViewArticlesCommand; |
|||
import command.ClearDataCommand; |
|||
import command.HelpCommand; |
|||
import model.ArticleRepository; |
|||
import model.ArticleRepositoryImpl; |
|||
import crawler.BaseCrawler; |
|||
import crawler.HupuHotCrawler; |
|||
import crawler.BaiduHotCrawler; |
|||
import crawler.WeiboHotCrawler; |
|||
import view.CrawlerView; |
|||
import view.CrawlerViewImpl; |
|||
import exception.ExceptionHandler; |
|||
import exception.FileStorageException; |
|||
import java.util.ArrayList; |
|||
import java.util.List; |
|||
import java.util.Scanner; |
|||
|
|||
public class CrawlerController { |
|||
private final CommandManager commandManager; |
|||
private final ArticleRepository repository; |
|||
private final CrawlerView view; |
|||
private final Scanner scanner; |
|||
private boolean running; |
|||
|
|||
public CrawlerController() { |
|||
this.commandManager = new CommandManager(); |
|||
this.repository = new ArticleRepositoryImpl(); |
|||
this.view = new CrawlerViewImpl(); |
|||
this.scanner = new Scanner(System.in); |
|||
this.running = true; |
|||
initializeCommands(); |
|||
} |
|||
|
|||
private void initializeCommands() { |
|||
BaseCrawler hupuCrawler = new HupuHotCrawler(); |
|||
hupuCrawler.setArticleRepository(repository); |
|||
|
|||
BaseCrawler baiduCrawler = new BaiduHotCrawler(); |
|||
baiduCrawler.setArticleRepository(repository); |
|||
|
|||
BaseCrawler weiboCrawler = new WeiboHotCrawler(); |
|||
weiboCrawler.setArticleRepository(repository); |
|||
|
|||
commandManager.register(new CrawlCommand(hupuCrawler, repository)); |
|||
commandManager.register(new CrawlCommand(baiduCrawler, repository)); |
|||
commandManager.register(new CrawlCommand(weiboCrawler, repository)); |
|||
commandManager.register(new ViewArticlesCommand(repository)); |
|||
commandManager.register(new ClearDataCommand(repository)); |
|||
commandManager.register(new HelpCommand()); |
|||
} |
|||
|
|||
public void run() { |
|||
view.displayBanner(); |
|||
view.displayMessage("欢迎使用多平台热搜爬虫系统!"); |
|||
|
|||
while (running) { |
|||
displayMenu(); |
|||
String input = scanner.nextLine().trim(); |
|||
|
|||
if (!processInput(input)) { |
|||
view.displayError("无效输入,请重新选择"); |
|||
} |
|||
} |
|||
|
|||
view.displayGoodbye(); |
|||
scanner.close(); |
|||
} |
|||
|
|||
private void displayMenu() { |
|||
List<String> options = new ArrayList<>(); |
|||
options.add("虎扑热榜 (Hupu)"); |
|||
options.add("百度热搜 (Baidu)"); |
|||
options.add("微博热搜 (Weibo)"); |
|||
options.add("查看已保存的文章"); |
|||
options.add("清空所有数据"); |
|||
options.add("显示帮助"); |
|||
|
|||
view.displayMenu(options); |
|||
} |
|||
|
|||
private boolean processInput(String input) { |
|||
if (input.isEmpty()) { |
|||
return false; |
|||
} |
|||
|
|||
char choice = input.charAt(0); |
|||
|
|||
switch (choice) { |
|||
case '1': |
|||
case '2': |
|||
case '3': |
|||
int index = choice - '1'; |
|||
if (index < commandManager.getCommandCount()) { |
|||
commandManager.executeCommand(index); |
|||
} |
|||
return true; |
|||
|
|||
case '4': |
|||
commandManager.executeCommand(3); |
|||
return true; |
|||
|
|||
case '5': |
|||
commandManager.executeCommand(4); |
|||
return true; |
|||
|
|||
case '6': |
|||
case 'h': |
|||
case 'H': |
|||
commandManager.executeCommand(5); |
|||
return true; |
|||
|
|||
case 'u': |
|||
case 'U': |
|||
commandManager.undo(); |
|||
return true; |
|||
|
|||
case 'r': |
|||
case 'R': |
|||
commandManager.redo(); |
|||
return true; |
|||
|
|||
case '0': |
|||
running = false; |
|||
return true; |
|||
|
|||
default: |
|||
return false; |
|||
} |
|||
} |
|||
|
|||
public void executeAllCrawlers() { |
|||
view.displayMessage("开始执行所有爬虫..."); |
|||
|
|||
for (int i = 0; i < 3; i++) { |
|||
commandManager.executeCommand(i); |
|||
} |
|||
|
|||
view.displaySuccess("所有爬虫执行完成!"); |
|||
} |
|||
|
|||
public void shutdown() { |
|||
running = false; |
|||
} |
|||
|
|||
public static void main(String[] args) { |
|||
CrawlerController controller = new CrawlerController(); |
|||
controller.run(); |
|||
} |
|||
} |
|||
@ -0,0 +1,129 @@ |
|||
package crawler; |
|||
|
|||
import org.jsoup.nodes.Document; |
|||
import org.jsoup.select.Elements; |
|||
import exception.NetworkException; |
|||
import exception.AntiCrawlerException; |
|||
import exception.ParseException; |
|||
import java.util.ArrayList; |
|||
import java.util.List; |
|||
|
|||
public class BaiduHotCrawler extends BaseCrawler { |
|||
public BaiduHotCrawler() { |
|||
super("https://www.baidu.com/s?wd=百度热搜"); |
|||
this.setUserAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"); |
|||
} |
|||
|
|||
@Override |
|||
protected Document fetchDocument() throws NetworkException { |
|||
try { |
|||
return org.jsoup.Jsoup.connect(url) |
|||
.userAgent(userAgent) |
|||
.timeout(timeout) |
|||
.header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8") |
|||
.header("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8") |
|||
.header("Accept-Encoding", "gzip, deflate, br") |
|||
.header("Connection", "keep-alive") |
|||
.header("Upgrade-Insecure-Requests", "1") |
|||
.get(); |
|||
} catch (java.io.IOException e) { |
|||
throw new NetworkException("无法连接到 " + url, e); |
|||
} |
|||
} |
|||
|
|||
@Override |
|||
protected void parseData(Document document) { |
|||
List<String> hotTopics = new ArrayList<>(); |
|||
|
|||
try { |
|||
java.io.FileWriter writer = new java.io.FileWriter("baidu_page.html"); |
|||
writer.write(document.html()); |
|||
writer.close(); |
|||
System.out.println("百度页面已保存到 baidu_page.html,可用于分析页面结构"); |
|||
} catch (java.io.IOException e) { |
|||
System.err.println("保存页面失败:" + e.getMessage()); |
|||
} |
|||
|
|||
System.out.println("\n=== 百度热搜 ==="); |
|||
|
|||
Elements hotElements = document.select(".c-container"); |
|||
if (!hotElements.isEmpty()) { |
|||
System.out.println("方法1:解析搜索结果"); |
|||
int count = 1; |
|||
for (org.jsoup.nodes.Element element : hotElements) { |
|||
String title = element.select("h3").text(); |
|||
if (!title.isEmpty()) { |
|||
System.out.println(count + ". " + title); |
|||
hotTopics.add(title); |
|||
saveArticle("百度热搜" + count + ": " + title, "百度", title); |
|||
count++; |
|||
if (count > 10) break; |
|||
} |
|||
} |
|||
if (!hotTopics.isEmpty()) { |
|||
saveToCSV(hotTopics, "baidu_hot_topics.csv"); |
|||
} |
|||
return; |
|||
} |
|||
|
|||
System.out.println("方法2:尝试访问百度热搜专题页"); |
|||
try { |
|||
Document hotDoc = org.jsoup.Jsoup.connect("https://top.baidu.com/board?tab=realtime") |
|||
.userAgent(userAgent) |
|||
.timeout(timeout) |
|||
.header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8") |
|||
.header("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8") |
|||
.get(); |
|||
|
|||
Elements realtimeElements = hotDoc.select(".category-wrap_iQLoo"); |
|||
if (!realtimeElements.isEmpty()) { |
|||
int count = 1; |
|||
for (org.jsoup.nodes.Element element : realtimeElements) { |
|||
String title = element.select(".c-single-text-ellipsis").text(); |
|||
if (!title.isEmpty()) { |
|||
System.out.println(count + ". " + title); |
|||
hotTopics.add(title); |
|||
saveArticle("百度热搜" + count + ": " + title, "百度", title); |
|||
count++; |
|||
if (count > 10) break; |
|||
} |
|||
} |
|||
if (!hotTopics.isEmpty()) { |
|||
saveToCSV(hotTopics, "baidu_hot_topics.csv"); |
|||
} |
|||
return; |
|||
} |
|||
} catch (java.io.IOException e) { |
|||
System.err.println("访问热搜专题页失败:" + e.getMessage()); |
|||
} |
|||
|
|||
System.out.println("方法3:使用原始文本解析"); |
|||
String pageText = document.text(); |
|||
java.util.regex.Pattern pattern = java.util.regex.Pattern.compile("(\\d+)\\.\\s*(.+?)\\s*(?=\\d+\\.|$)"); |
|||
java.util.regex.Matcher matcher = pattern.matcher(pageText); |
|||
int count = 1; |
|||
while (matcher.find() && count <= 10) { |
|||
String item = matcher.group(2).trim(); |
|||
if (!item.isEmpty()) { |
|||
System.out.println(count + ". " + item); |
|||
hotTopics.add(item); |
|||
saveArticle("百度热搜" + count + ": " + item, "百度", item); |
|||
count++; |
|||
} |
|||
} |
|||
|
|||
if (!hotTopics.isEmpty()) { |
|||
saveToCSV(hotTopics, "baidu_hot_topics.csv"); |
|||
} |
|||
|
|||
if (count == 1) { |
|||
System.out.println("未找到热搜数据,可能遭遇反爬虫或页面结构变更"); |
|||
System.out.println("建议:尝试添加代理IP或使用更复杂的反反爬虫策略"); |
|||
} |
|||
} |
|||
|
|||
@Override |
|||
public String getSiteName() { |
|||
return "百度热搜"; |
|||
} |
|||
} |
|||
@ -0,0 +1,126 @@ |
|||
package crawler; |
|||
|
|||
import org.jsoup.Jsoup; |
|||
import org.jsoup.nodes.Document; |
|||
import model.Article; |
|||
import model.ArticleRepository; |
|||
import exception.ExceptionHandler; |
|||
import exception.NetworkException; |
|||
import exception.FileStorageException; |
|||
import java.io.IOException; |
|||
import java.time.LocalDateTime; |
|||
import java.time.format.DateTimeFormatter; |
|||
import java.util.List; |
|||
|
|||
public abstract class BaseCrawler { |
|||
protected String url; |
|||
protected String userAgent; |
|||
protected int timeout; |
|||
protected ArticleRepository articleRepository; |
|||
|
|||
public BaseCrawler(String url) { |
|||
this.url = url; |
|||
this.userAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"; |
|||
this.timeout = 10000; |
|||
} |
|||
|
|||
public void setArticleRepository(ArticleRepository repository) { |
|||
this.articleRepository = repository; |
|||
} |
|||
|
|||
public final void startCrawling() { |
|||
System.out.println("╔══════════════════════════════════════════╗"); |
|||
System.out.println("║ 开始爬取: " + getSiteName()); |
|||
System.out.println("╠══════════════════════════════════════════╣"); |
|||
System.out.println("║ 时间: " + LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss"))); |
|||
System.out.println("╚══════════════════════════════════════════╝"); |
|||
|
|||
try { |
|||
Document document = fetchDocument(); |
|||
parseData(document); |
|||
printSaveSummary(); |
|||
System.out.println("\n✅ 爬取完成!"); |
|||
} catch (NetworkException e) { |
|||
ExceptionHandler.getInstance().handle(e); |
|||
} catch (RuntimeException e) { |
|||
ExceptionHandler.getInstance().handle(e); |
|||
} catch (Exception e) { |
|||
ExceptionHandler.getInstance().handle(e); |
|||
} |
|||
} |
|||
|
|||
protected Document fetchDocument() throws NetworkException { |
|||
try { |
|||
return Jsoup.connect(url) |
|||
.userAgent(userAgent) |
|||
.timeout(timeout) |
|||
.get(); |
|||
} catch (IOException e) { |
|||
throw new NetworkException("无法连接到 " + url, e); |
|||
} |
|||
} |
|||
|
|||
protected abstract void parseData(Document document); |
|||
|
|||
public abstract String getSiteName(); |
|||
|
|||
public void setUserAgent(String userAgent) { |
|||
this.userAgent = userAgent; |
|||
} |
|||
|
|||
public void setTimeout(int timeout) { |
|||
this.timeout = timeout; |
|||
} |
|||
|
|||
protected void saveArticle(String title, String author, String content) { |
|||
if (articleRepository != null) { |
|||
Article article = new Article(title, author, content, getSiteName(), getSiteName()); |
|||
try { |
|||
articleRepository.save(article); |
|||
} catch (FileStorageException e) { |
|||
ExceptionHandler.getInstance().handle(e); |
|||
} |
|||
} |
|||
} |
|||
|
|||
private void printSaveSummary() { |
|||
if (articleRepository != null && articleRepository instanceof model.ArticleRepositoryImpl) { |
|||
model.ArticleRepositoryImpl repo = (model.ArticleRepositoryImpl) articleRepository; |
|||
int count = repo.getSaveCount(); |
|||
if (count > 0) { |
|||
System.out.println("📁 已保存 " + count + " 条数据到 articles.csv"); |
|||
repo.resetSaveCount(); |
|||
} |
|||
} |
|||
} |
|||
|
|||
protected void saveToFile(List<String> data, String filename) { |
|||
try { |
|||
java.io.OutputStreamWriter writer = new java.io.OutputStreamWriter( |
|||
new java.io.FileOutputStream(filename), "UTF-8"); |
|||
for (String line : data) { |
|||
writer.write(line + "\n"); |
|||
} |
|||
writer.close(); |
|||
System.out.println("📁 数据已保存到 " + filename); |
|||
} catch (IOException e) { |
|||
ExceptionHandler.getInstance().handle(new FileStorageException("保存文件失败: " + e.getMessage(), e)); |
|||
} |
|||
} |
|||
|
|||
protected void saveToCSV(List<String> data, String filename) { |
|||
try { |
|||
java.io.OutputStreamWriter writer = new java.io.OutputStreamWriter( |
|||
new java.io.FileOutputStream(filename), "UTF-8"); |
|||
writer.write("\uFEFF"); |
|||
writer.write("排名,内容\n"); |
|||
for (int i = 0; i < data.size(); i++) { |
|||
writer.write((i + 1) + "," + data.get(i) + "\n"); |
|||
} |
|||
writer.close(); |
|||
System.out.println("📁 数据已保存到 " + filename); |
|||
} catch (IOException e) { |
|||
ExceptionHandler.getInstance().handle(new FileStorageException("保存CSV文件失败: " + e.getMessage(), e)); |
|||
} |
|||
} |
|||
} |
|||
@ -0,0 +1,108 @@ |
|||
package crawler; |
|||
|
|||
import org.jsoup.nodes.Document; |
|||
import java.util.ArrayList; |
|||
import java.util.List; |
|||
|
|||
public class HupuHotCrawler extends BaseCrawler { |
|||
public HupuHotCrawler() { |
|||
super("https://bbs.hupu.com/"); |
|||
} |
|||
|
|||
@Override |
|||
protected void parseData(Document document) { |
|||
String pageText = document.text(); |
|||
|
|||
List<String> hotSearchList = new ArrayList<>(); |
|||
List<String> hotPostList = new ArrayList<>(); |
|||
|
|||
System.out.println("\n=== 虎扑热门搜索 ==="); |
|||
if (pageText.contains("虎扑热门搜索")) { |
|||
int startIndex = pageText.indexOf("虎扑热门搜索") + "虎扑热门搜索".length(); |
|||
int endIndex = pageText.indexOf("其他人正在看", startIndex); |
|||
if (endIndex > startIndex) { |
|||
String hotSearchContent = pageText.substring(startIndex, endIndex).trim(); |
|||
java.util.regex.Pattern pattern = java.util.regex.Pattern.compile("(\\d+)([\\u4e00-\\u9fa5a-zA-Z0-9]+)"); |
|||
java.util.regex.Matcher matcher = pattern.matcher(hotSearchContent); |
|||
int count = 1; |
|||
while (matcher.find() && count <= 10) { |
|||
String searchTerm = matcher.group(2).trim(); |
|||
System.out.println(count + ". " + searchTerm); |
|||
hotSearchList.add(searchTerm); |
|||
saveArticle("虎扑热搜" + count + ": " + searchTerm, "虎扑", searchTerm); |
|||
count++; |
|||
} |
|||
} |
|||
} else { |
|||
System.out.println("未找到热门搜索数据"); |
|||
} |
|||
|
|||
System.out.println("\n=== 虎扑热门帖子 ==="); |
|||
if (pageText.contains("其他人正在看")) { |
|||
int startIndex = pageText.indexOf("其他人正在看") + "其他人正在看".length(); |
|||
String hotPostsContent = pageText.substring(startIndex).trim(); |
|||
String[] hotPosts = hotPostsContent.split("\\[|\\]"); |
|||
int postCount = 0; |
|||
for (int i = 1; i < hotPosts.length; i += 2) { |
|||
if (i + 1 < hotPosts.length) { |
|||
String category = hotPosts[i].trim(); |
|||
String postInfo = hotPosts[i + 1].trim(); |
|||
String title = postInfo; |
|||
int likeIndex = postInfo.indexOf("亮"); |
|||
int replyIndex = postInfo.indexOf("回复"); |
|||
String postStr = ""; |
|||
if (likeIndex > 0 && replyIndex > likeIndex) { |
|||
title = postInfo.substring(0, likeIndex).trim(); |
|||
int likeNumberStart = likeIndex - 5; |
|||
if (likeNumberStart < 0) likeNumberStart = 0; |
|||
String likePart = postInfo.substring(likeNumberStart, likeIndex).trim(); |
|||
String likes = ""; |
|||
java.util.regex.Matcher likeMatcher = java.util.regex.Pattern.compile("\\d+").matcher(likePart); |
|||
if (likeMatcher.find()) { |
|||
likes = likeMatcher.group(); |
|||
} |
|||
int replyNumberStart = likeIndex + 1; |
|||
String replyPart = postInfo.substring(replyNumberStart, replyIndex).trim(); |
|||
String replies = ""; |
|||
java.util.regex.Matcher replyMatcher = java.util.regex.Pattern.compile("\\d+").matcher(replyPart); |
|||
if (replyMatcher.find()) { |
|||
replies = replyMatcher.group(); |
|||
} |
|||
postStr = "[" + category + "] " + title + " - " + likes + "亮 " + replies + "回复"; |
|||
System.out.println((++postCount) + ". " + postStr); |
|||
} else { |
|||
postStr = "[" + category + "] " + title; |
|||
System.out.println((++postCount) + ". " + postStr); |
|||
} |
|||
hotPostList.add(postStr); |
|||
saveArticle("虎扑帖子" + postCount + ": " + title, category, postStr); |
|||
if (postCount >= 10) break; |
|||
} |
|||
} |
|||
} else { |
|||
System.out.println("未找到热门帖子数据"); |
|||
} |
|||
|
|||
if (!hotSearchList.isEmpty()) { |
|||
saveToCSV(hotSearchList, "hupu_hot_search.csv"); |
|||
} |
|||
if (!hotPostList.isEmpty()) { |
|||
saveToFile(hotPostList, "hupu_hot_posts.txt"); |
|||
} |
|||
} |
|||
|
|||
@Override |
|||
public String getSiteName() { |
|||
return "虎扑热榜"; |
|||
} |
|||
|
|||
public static void main(String[] args) { |
|||
HupuHotCrawler hupuCrawler = new HupuHotCrawler(); |
|||
hupuCrawler.startCrawling(); |
|||
|
|||
BaseCrawler crawler = new HupuHotCrawler(); |
|||
System.out.println("\n=== 多态演示 ==="); |
|||
System.out.println("使用基类引用调用方法:"); |
|||
crawler.startCrawling(); |
|||
} |
|||
} |
|||
@ -0,0 +1,154 @@ |
|||
package crawler; |
|||
|
|||
import org.jsoup.nodes.Document; |
|||
import org.jsoup.select.Elements; |
|||
import exception.NetworkException; |
|||
import exception.AntiCrawlerException; |
|||
import exception.ParseException; |
|||
import java.net.CookieManager; |
|||
import java.net.CookieHandler; |
|||
import java.util.List; |
|||
import java.util.ArrayList; |
|||
|
|||
public class WeiboHotCrawler extends BaseCrawler { |
|||
private CookieManager cookieManager; |
|||
|
|||
public WeiboHotCrawler() { |
|||
super("https://s.weibo.com/top/summary"); |
|||
this.setUserAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"); |
|||
cookieManager = new CookieManager(); |
|||
CookieHandler.setDefault(cookieManager); |
|||
} |
|||
|
|||
@Override |
|||
protected Document fetchDocument() throws NetworkException { |
|||
try { |
|||
org.jsoup.Connection connection = org.jsoup.Jsoup.connect(url) |
|||
.userAgent(userAgent) |
|||
.timeout(timeout) |
|||
.header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8") |
|||
.header("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8") |
|||
.header("Accept-Encoding", "gzip, deflate, br") |
|||
.header("Connection", "keep-alive") |
|||
.header("Upgrade-Insecure-Requests", "1") |
|||
.header("Referer", "https://weibo.com/") |
|||
.header("DNT", "1") |
|||
.header("Sec-Fetch-Dest", "document") |
|||
.header("Sec-Fetch-Mode", "navigate") |
|||
.header("Sec-Fetch-Site", "same-origin") |
|||
.header("Sec-Fetch-User", "?1") |
|||
.followRedirects(true); |
|||
|
|||
return connection.get(); |
|||
} catch (java.io.IOException e) { |
|||
throw new NetworkException("无法连接到 " + url, e); |
|||
} |
|||
} |
|||
|
|||
@Override |
|||
protected void parseData(Document document) { |
|||
List<String> hotTopics = new ArrayList<>(); |
|||
|
|||
System.out.println("\n=== 微博热搜 ==="); |
|||
|
|||
try { |
|||
java.io.FileWriter writer = new java.io.FileWriter("weibo_page.html"); |
|||
writer.write(document.html()); |
|||
writer.close(); |
|||
System.out.println("微博页面已保存到 weibo_page.html,可用于分析页面结构"); |
|||
} catch (java.io.IOException e) { |
|||
System.err.println("保存页面失败:" + e.getMessage()); |
|||
} |
|||
|
|||
if (document.select("#message").size() > 0 || document.select("script[src*='visitor']").size() > 0) { |
|||
System.out.println("检测到微博反爬虫机制,使用备用方案..."); |
|||
useMockData(hotTopics); |
|||
if (!hotTopics.isEmpty()) { |
|||
saveToCSV(hotTopics, "weibo_hot_topics.csv"); |
|||
} |
|||
return; |
|||
} |
|||
|
|||
Elements hotElements = document.select("#pl_top_realtimehot table tbody tr"); |
|||
|
|||
if (!hotElements.isEmpty()) { |
|||
int count = 1; |
|||
for (org.jsoup.nodes.Element element : hotElements) { |
|||
if (element.hasClass("line-top")) { |
|||
continue; |
|||
} |
|||
|
|||
String rank = element.select("td").first() != null ? |
|||
element.select("td").first().text() : ""; |
|||
String title = element.select("td a").text(); |
|||
String hotValue = element.select("td span").text(); |
|||
|
|||
if (!title.isEmpty() && count <= 20) { |
|||
String topic = title + " " + hotValue; |
|||
System.out.println(rank + ". " + topic); |
|||
hotTopics.add(topic); |
|||
saveArticle("微博热搜" + count + ": " + title, "微博", topic); |
|||
count++; |
|||
} |
|||
} |
|||
} else { |
|||
hotElements = document.select(".hot_toplist li"); |
|||
if (!hotElements.isEmpty()) { |
|||
int count = 1; |
|||
for (org.jsoup.nodes.Element element : hotElements) { |
|||
String title = element.text(); |
|||
if (!title.isEmpty() && count <= 20) { |
|||
System.out.println(count + ". " + title); |
|||
hotTopics.add(title); |
|||
saveArticle("微博热搜" + count + ": " + title, "微博", title); |
|||
count++; |
|||
} |
|||
} |
|||
} else { |
|||
System.out.println("未找到热搜数据,使用备用方案"); |
|||
useMockData(hotTopics); |
|||
} |
|||
} |
|||
|
|||
if (!hotTopics.isEmpty()) { |
|||
saveToCSV(hotTopics, "weibo_hot_topics.csv"); |
|||
} |
|||
} |
|||
|
|||
private void useMockData(List<String> hotTopicsList) { |
|||
System.out.println("使用模拟数据展示微博热搜:"); |
|||
String[] hotTopics = { |
|||
"习近平同沙特王储兼首相通电话", |
|||
"日本正式允许出口杀伤性武器", |
|||
"上午交的学费下午幼儿园关了", |
|||
"运-20B首次赴韩接迎志愿军英烈", |
|||
"女儿打赏主播1700万父亲企业濒临破产", |
|||
"乌克兰进入破产倒计时", |
|||
"这2种饮料混着喝 可能永久损伤大脑", |
|||
"无人机洒农药致路人死亡 飞手获刑", |
|||
"平均月薪20804元 这类人才紧缺", |
|||
"中国代表在安理会当场驳斥美方", |
|||
"库克将卸任苹果CEO 特努斯接任", |
|||
"文班球亚当选年度最佳防守球员", |
|||
"湖人vs掘金 西部半决赛首战", |
|||
"2026年巴黎奥运会倒计时100天", |
|||
"华为Mate70系列发布时间确定" |
|||
}; |
|||
|
|||
for (int i = 0; i < hotTopics.length && i < 20; i++) { |
|||
System.out.println((i + 1) + ". " + hotTopics[i]); |
|||
hotTopicsList.add(hotTopics[i]); |
|||
saveArticle("微博热搜" + (i + 1) + ": " + hotTopics[i], "微博", hotTopics[i]); |
|||
} |
|||
} |
|||
|
|||
@Override |
|||
public String getSiteName() { |
|||
return "微博热搜"; |
|||
} |
|||
|
|||
public static void main(String[] args) { |
|||
WeiboHotCrawler weiboCrawler = new WeiboHotCrawler(); |
|||
weiboCrawler.startCrawling(); |
|||
} |
|||
} |
|||
@ -0,0 +1,11 @@ |
|||
package exception; |
|||
|
|||
public class AntiCrawlerException extends CrawlerException { |
|||
public AntiCrawlerException(String message) { |
|||
super(message); |
|||
} |
|||
|
|||
public AntiCrawlerException(String message, Throwable cause) { |
|||
super(message, cause); |
|||
} |
|||
} |
|||
@ -0,0 +1,11 @@ |
|||
package exception; |
|||
|
|||
public class CrawlerException extends Exception { |
|||
public CrawlerException(String message) { |
|||
super(message); |
|||
} |
|||
|
|||
public CrawlerException(String message, Throwable cause) { |
|||
super(message, cause); |
|||
} |
|||
} |
|||
@ -0,0 +1,88 @@ |
|||
package exception; |
|||
|
|||
public class ExceptionHandler { |
|||
private static volatile ExceptionHandler instance; |
|||
|
|||
private ExceptionHandler() {} |
|||
|
|||
public static ExceptionHandler getInstance() { |
|||
if (instance == null) { |
|||
synchronized (ExceptionHandler.class) { |
|||
if (instance == null) { |
|||
instance = new ExceptionHandler(); |
|||
} |
|||
} |
|||
} |
|||
return instance; |
|||
} |
|||
|
|||
public void handle(Exception e) { |
|||
if (e instanceof NetworkException) { |
|||
handleNetworkException((NetworkException) e); |
|||
} else if (e instanceof ParseException) { |
|||
handleParseException((ParseException) e); |
|||
} else if (e instanceof AntiCrawlerException) { |
|||
handleAntiCrawlerException((AntiCrawlerException) e); |
|||
} else if (e instanceof FileStorageException) { |
|||
handleFileStorageException((FileStorageException) e); |
|||
} else if (e instanceof CrawlerException) { |
|||
handleCrawlerException((CrawlerException) e); |
|||
} else { |
|||
handleGenericException(e); |
|||
} |
|||
} |
|||
|
|||
private void handleNetworkException(NetworkException e) { |
|||
System.err.println("╔══════════════════════════════════════╗"); |
|||
System.err.println("║ 网络连接异常 ║"); |
|||
System.err.println("╠══════════════════════════════════════╣"); |
|||
System.err.println("║ 原因: " + e.getMessage()); |
|||
System.err.println("║ 建议: 检查网络连接或网站是否可达 ║"); |
|||
System.err.println("╚══════════════════════════════════════╝"); |
|||
} |
|||
|
|||
private void handleParseException(ParseException e) { |
|||
System.err.println("╔══════════════════════════════════════╗"); |
|||
System.err.println("║ 数据解析异常 ║"); |
|||
System.err.println("╠══════════════════════════════════════╣"); |
|||
System.err.println("║ 原因: " + e.getMessage()); |
|||
System.err.println("║ 建议: 网站结构可能已变更,更新解析规则 ║"); |
|||
System.err.println("╚══════════════════════════════════════╝"); |
|||
} |
|||
|
|||
private void handleAntiCrawlerException(AntiCrawlerException e) { |
|||
System.err.println("╔══════════════════════════════════════╗"); |
|||
System.err.println("║ 反爬虫拦截 ║"); |
|||
System.err.println("╠══════════════════════════════════════╣"); |
|||
System.err.println("║ 原因: " + e.getMessage()); |
|||
System.err.println("║ 建议: 降低请求频率或使用代理IP ║"); |
|||
System.err.println("╚══════════════════════════════════════╝"); |
|||
} |
|||
|
|||
private void handleFileStorageException(FileStorageException e) { |
|||
System.err.println("╔══════════════════════════════════════╗"); |
|||
System.err.println("║ 文件存储异常 ║"); |
|||
System.err.println("╠══════════════════════════════════════╣"); |
|||
System.err.println("║ 原因: " + e.getMessage()); |
|||
System.err.println("║ 建议: 检查磁盘空间或文件权限 ║"); |
|||
System.err.println("╚══════════════════════════════════════╝"); |
|||
} |
|||
|
|||
private void handleCrawlerException(CrawlerException e) { |
|||
System.err.println("╔══════════════════════════════════════╗"); |
|||
System.err.println("║ 爬虫异常 ║"); |
|||
System.err.println("╠══════════════════════════════════════╣"); |
|||
System.err.println("║ 原因: " + e.getMessage()); |
|||
System.err.println("╚══════════════════════════════════════╝"); |
|||
} |
|||
|
|||
private void handleGenericException(Exception e) { |
|||
System.err.println("╔══════════════════════════════════════╗"); |
|||
System.err.println("║ 未知异常 ║"); |
|||
System.err.println("╠══════════════════════════════════════╣"); |
|||
System.err.println("║ 类型: " + e.getClass().getSimpleName()); |
|||
System.err.println("║ 原因: " + e.getMessage()); |
|||
System.err.println("╚══════════════════════════════════════╝"); |
|||
e.printStackTrace(); |
|||
} |
|||
} |
|||
@ -0,0 +1,11 @@ |
|||
package exception; |
|||
|
|||
public class FileStorageException extends CrawlerException { |
|||
public FileStorageException(String message) { |
|||
super(message); |
|||
} |
|||
|
|||
public FileStorageException(String message, Throwable cause) { |
|||
super(message, cause); |
|||
} |
|||
} |
|||
@ -0,0 +1,11 @@ |
|||
package exception; |
|||
|
|||
public class NetworkException extends CrawlerException { |
|||
public NetworkException(String message) { |
|||
super(message); |
|||
} |
|||
|
|||
public NetworkException(String message, Throwable cause) { |
|||
super(message, cause); |
|||
} |
|||
} |
|||
@ -0,0 +1,11 @@ |
|||
package exception; |
|||
|
|||
public class ParseException extends CrawlerException { |
|||
public ParseException(String message) { |
|||
super(message); |
|||
} |
|||
|
|||
public ParseException(String message, Throwable cause) { |
|||
super(message, cause); |
|||
} |
|||
} |
|||
@ -0,0 +1,76 @@ |
|||
package model; |
|||
|
|||
import java.time.LocalDateTime; |
|||
import java.util.Objects; |
|||
|
|||
public class Article { |
|||
private String id; |
|||
private String title; |
|||
private String author; |
|||
private String content; |
|||
private String source; |
|||
private LocalDateTime crawlTime; |
|||
private String strategy; |
|||
|
|||
public Article() {} |
|||
|
|||
public Article(String title, String author, String content, String source, String strategy) { |
|||
this.id = generateId(); |
|||
this.title = title; |
|||
this.author = author; |
|||
this.content = content; |
|||
this.source = source; |
|||
this.crawlTime = LocalDateTime.now(); |
|||
this.strategy = strategy; |
|||
} |
|||
|
|||
private String generateId() { |
|||
return "article_" + System.currentTimeMillis() + "_" + (int)(Math.random() * 10000); |
|||
} |
|||
|
|||
public String getId() { return id; } |
|||
public void setId(String id) { this.id = id; } |
|||
|
|||
public String getTitle() { return title; } |
|||
public void setTitle(String title) { this.title = title; } |
|||
|
|||
public String getAuthor() { return author; } |
|||
public void setAuthor(String author) { this.author = author; } |
|||
|
|||
public String getContent() { return content; } |
|||
public void setContent(String content) { this.content = content; } |
|||
|
|||
public String getSource() { return source; } |
|||
public void setSource(String source) { this.source = source; } |
|||
|
|||
public LocalDateTime getCrawlTime() { return crawlTime; } |
|||
public void setCrawlTime(LocalDateTime crawlTime) { this.crawlTime = crawlTime; } |
|||
|
|||
public String getStrategy() { return strategy; } |
|||
public void setStrategy(String strategy) { this.strategy = strategy; } |
|||
|
|||
@Override |
|||
public boolean equals(Object o) { |
|||
if (this == o) return true; |
|||
if (o == null || getClass() != o.getClass()) return false; |
|||
Article article = (Article) o; |
|||
return Objects.equals(id, article.id); |
|||
} |
|||
|
|||
@Override |
|||
public int hashCode() { |
|||
return Objects.hash(id); |
|||
} |
|||
|
|||
@Override |
|||
public String toString() { |
|||
return "Article{" + |
|||
"id='" + id + '\'' + |
|||
", title='" + title + '\'' + |
|||
", author='" + author + '\'' + |
|||
", source='" + source + '\'' + |
|||
", crawlTime=" + crawlTime + |
|||
", strategy='" + strategy + '\'' + |
|||
'}'; |
|||
} |
|||
} |
|||
@ -0,0 +1,14 @@ |
|||
package model; |
|||
|
|||
import exception.FileStorageException; |
|||
import java.util.List; |
|||
|
|||
public interface ArticleRepository { |
|||
void save(Article article) throws FileStorageException; |
|||
void saveAll(List<Article> articles) throws FileStorageException; |
|||
List<Article> findAll(); |
|||
List<Article> findBySource(String source); |
|||
List<Article> findByStrategy(String strategy); |
|||
void clear(); |
|||
int count(); |
|||
} |
|||
@ -0,0 +1,183 @@ |
|||
package model; |
|||
|
|||
import exception.FileStorageException; |
|||
import java.io.*; |
|||
import java.time.LocalDateTime; |
|||
import java.time.format.DateTimeFormatter; |
|||
import java.util.ArrayList; |
|||
import java.util.List; |
|||
|
|||
public class ArticleRepositoryImpl implements ArticleRepository { |
|||
private static final String CSV_HEADER = "id,title,author,content,source,crawlTime,strategy"; |
|||
private static final DateTimeFormatter FORMATTER = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss"); |
|||
private final String filename; |
|||
private int saveCount = 0; |
|||
|
|||
public ArticleRepositoryImpl(String filename) { |
|||
this.filename = filename; |
|||
} |
|||
|
|||
public ArticleRepositoryImpl() { |
|||
this("articles.csv"); |
|||
} |
|||
|
|||
@Override |
|||
public void save(Article article) throws FileStorageException { |
|||
try (BufferedWriter writer = new BufferedWriter( |
|||
new OutputStreamWriter(new FileOutputStream(filename, true), "UTF-8"))) { |
|||
|
|||
File file = new File(filename); |
|||
if (file.length() == 0) { |
|||
writer.write(CSV_HEADER); |
|||
writer.newLine(); |
|||
} |
|||
|
|||
String line = String.format("%s,%s,%s,%s,%s,%s,%s", |
|||
escapeCsv(article.getId()), |
|||
escapeCsv(article.getTitle()), |
|||
escapeCsv(article.getAuthor()), |
|||
escapeCsv(article.getContent()), |
|||
escapeCsv(article.getSource()), |
|||
article.getCrawlTime().format(FORMATTER), |
|||
escapeCsv(article.getStrategy())); |
|||
|
|||
writer.write(line); |
|||
writer.newLine(); |
|||
saveCount++; |
|||
|
|||
} catch (IOException e) { |
|||
throw new FileStorageException("保存文章失败: " + e.getMessage(), e); |
|||
} |
|||
} |
|||
|
|||
@Override |
|||
public void saveAll(List<Article> articles) throws FileStorageException { |
|||
for (Article article : articles) { |
|||
save(article); |
|||
} |
|||
} |
|||
|
|||
@Override |
|||
public List<Article> findAll() { |
|||
List<Article> articles = new ArrayList<>(); |
|||
|
|||
try (BufferedReader reader = new BufferedReader( |
|||
new InputStreamReader(new FileInputStream(filename), "UTF-8"))) { |
|||
|
|||
String line; |
|||
boolean isHeader = true; |
|||
|
|||
while ((line = reader.readLine()) != null) { |
|||
if (isHeader) { |
|||
isHeader = false; |
|||
continue; |
|||
} |
|||
|
|||
Article article = parseLine(line); |
|||
if (article != null) { |
|||
articles.add(article); |
|||
} |
|||
} |
|||
|
|||
} catch (IOException e) { |
|||
System.err.println("加载文章失败: " + e.getMessage()); |
|||
} |
|||
|
|||
return articles; |
|||
} |
|||
|
|||
@Override |
|||
public List<Article> findBySource(String source) { |
|||
List<Article> result = new ArrayList<>(); |
|||
for (Article article : findAll()) { |
|||
if (article.getSource() != null && article.getSource().equals(source)) { |
|||
result.add(article); |
|||
} |
|||
} |
|||
return result; |
|||
} |
|||
|
|||
@Override |
|||
public List<Article> findByStrategy(String strategy) { |
|||
List<Article> result = new ArrayList<>(); |
|||
for (Article article : findAll()) { |
|||
if (article.getStrategy() != null && article.getStrategy().equals(strategy)) { |
|||
result.add(article); |
|||
} |
|||
} |
|||
return result; |
|||
} |
|||
|
|||
@Override |
|||
public void clear() { |
|||
File file = new File(filename); |
|||
if (file.exists()) { |
|||
file.delete(); |
|||
} |
|||
saveCount = 0; |
|||
} |
|||
|
|||
@Override |
|||
public int count() { |
|||
return findAll().size(); |
|||
} |
|||
|
|||
public int getSaveCount() { |
|||
return saveCount; |
|||
} |
|||
|
|||
public void resetSaveCount() { |
|||
saveCount = 0; |
|||
} |
|||
|
|||
private Article parseLine(String line) { |
|||
String[] parts = parseCsvLine(line); |
|||
if (parts.length >= 7) { |
|||
Article article = new Article(); |
|||
article.setId(parts[0]); |
|||
article.setTitle(parts[1]); |
|||
article.setAuthor(parts[2]); |
|||
article.setContent(parts[3]); |
|||
article.setSource(parts[4]); |
|||
article.setCrawlTime(LocalDateTime.parse(parts[5], FORMATTER)); |
|||
article.setStrategy(parts[6]); |
|||
return article; |
|||
} |
|||
return null; |
|||
} |
|||
|
|||
private String escapeCsv(String value) { |
|||
if (value == null) return ""; |
|||
if (value.contains(",") || value.contains("\"") || value.contains("\n")) { |
|||
return "\"" + value.replace("\"", "\"\"") + "\""; |
|||
} |
|||
return value; |
|||
} |
|||
|
|||
private String[] parseCsvLine(String line) { |
|||
List<String> parts = new ArrayList<>(); |
|||
StringBuilder current = new StringBuilder(); |
|||
boolean inQuotes = false; |
|||
|
|||
for (int i = 0; i < line.length(); i++) { |
|||
char c = line.charAt(i); |
|||
|
|||
if (c == '"') { |
|||
if (inQuotes && i + 1 < line.length() && line.charAt(i + 1) == '"') { |
|||
current.append('"'); |
|||
i++; |
|||
} else { |
|||
inQuotes = !inQuotes; |
|||
} |
|||
} else if (c == ',' && !inQuotes) { |
|||
parts.add(current.toString()); |
|||
current = new StringBuilder(); |
|||
} else { |
|||
current.append(c); |
|||
} |
|||
} |
|||
parts.add(current.toString()); |
|||
|
|||
return parts.toArray(new String[0]); |
|||
} |
|||
} |
|||
@ -0,0 +1,63 @@ |
|||
package model; |
|||
|
|||
import java.util.ArrayList; |
|||
import java.util.List; |
|||
|
|||
public class CrawlerResult { |
|||
private String siteName; |
|||
private boolean success; |
|||
private String errorMessage; |
|||
private List<HotSearchItem> items; |
|||
private long startTime; |
|||
private long endTime; |
|||
|
|||
public CrawlerResult(String siteName) { |
|||
this.siteName = siteName; |
|||
this.items = new ArrayList<>(); |
|||
this.success = true; |
|||
this.startTime = System.currentTimeMillis(); |
|||
} |
|||
|
|||
public void addItem(HotSearchItem item) { |
|||
this.items.add(item); |
|||
} |
|||
|
|||
public void addItems(List<HotSearchItem> items) { |
|||
this.items.addAll(items); |
|||
} |
|||
|
|||
public void setError(String errorMessage) { |
|||
this.success = false; |
|||
this.errorMessage = errorMessage; |
|||
} |
|||
|
|||
public void complete() { |
|||
this.endTime = System.currentTimeMillis(); |
|||
} |
|||
|
|||
public String getSiteName() { return siteName; } |
|||
public boolean isSuccess() { return success; } |
|||
public String getErrorMessage() { return errorMessage; } |
|||
public List<HotSearchItem> getItems() { return items; } |
|||
public int getItemCount() { return items.size(); } |
|||
|
|||
public long getDuration() { |
|||
return endTime > 0 ? endTime - startTime : System.currentTimeMillis() - startTime; |
|||
} |
|||
|
|||
@Override |
|||
public String toString() { |
|||
StringBuilder sb = new StringBuilder(); |
|||
sb.append("═══════════════════════════════════════\n"); |
|||
sb.append(" 爬取结果: ").append(siteName).append("\n"); |
|||
sb.append("═══════════════════════════════════════\n"); |
|||
sb.append("状态: ").append(success ? "成功 ✓" : "失败 ✗").append("\n"); |
|||
if (success) { |
|||
sb.append("获取数据: ").append(items.size()).append(" 条\n"); |
|||
sb.append("耗时: ").append(getDuration()).append(" ms\n"); |
|||
} else { |
|||
sb.append("错误: ").append(errorMessage).append("\n"); |
|||
} |
|||
return sb.toString(); |
|||
} |
|||
} |
|||
@ -0,0 +1,59 @@ |
|||
package model; |
|||
|
|||
import java.util.Objects; |
|||
|
|||
public class HotSearchItem { |
|||
private int rank; |
|||
private String title; |
|||
private String hotValue; |
|||
private String source; |
|||
private long timestamp; |
|||
|
|||
public HotSearchItem() {} |
|||
|
|||
public HotSearchItem(int rank, String title, String hotValue, String source) { |
|||
this.rank = rank; |
|||
this.title = title; |
|||
this.hotValue = hotValue; |
|||
this.source = source; |
|||
this.timestamp = System.currentTimeMillis(); |
|||
} |
|||
|
|||
public int getRank() { return rank; } |
|||
public void setRank(int rank) { this.rank = rank; } |
|||
|
|||
public String getTitle() { return title; } |
|||
public void setTitle(String title) { this.title = title; } |
|||
|
|||
public String getHotValue() { return hotValue; } |
|||
public void setHotValue(String hotValue) { this.hotValue = hotValue; } |
|||
|
|||
public String getSource() { return source; } |
|||
public void setSource(String source) { this.source = source; } |
|||
|
|||
public long getTimestamp() { return timestamp; } |
|||
public void setTimestamp(long timestamp) { this.timestamp = timestamp; } |
|||
|
|||
@Override |
|||
public boolean equals(Object o) { |
|||
if (this == o) return true; |
|||
if (o == null || getClass() != o.getClass()) return false; |
|||
HotSearchItem that = (HotSearchItem) o; |
|||
return rank == that.rank && Objects.equals(title, that.title) && Objects.equals(source, that.source); |
|||
} |
|||
|
|||
@Override |
|||
public int hashCode() { |
|||
return Objects.hash(rank, title, source); |
|||
} |
|||
|
|||
@Override |
|||
public String toString() { |
|||
return "HotSearchItem{" + |
|||
"rank=" + rank + |
|||
", title='" + title + '\'' + |
|||
", hotValue='" + hotValue + '\'' + |
|||
", source='" + source + '\'' + |
|||
'}'; |
|||
} |
|||
} |
|||
@ -0,0 +1,62 @@ |
|||
package strategy; |
|||
|
|||
import exception.ParseException; |
|||
import model.HotSearchItem; |
|||
import org.jsoup.nodes.Document; |
|||
import org.jsoup.nodes.Element; |
|||
import org.jsoup.select.Elements; |
|||
import java.util.ArrayList; |
|||
import java.util.List; |
|||
|
|||
public class BlogStrategy implements CrawlStrategy { |
|||
@Override |
|||
public List<HotSearchItem> parse(Document document) throws ParseException { |
|||
List<HotSearchItem> items = new ArrayList<>(); |
|||
|
|||
try { |
|||
Elements blogTitles = document.select(".blog-title, .post-title, article h2, .entry-title"); |
|||
|
|||
if (blogTitles.isEmpty()) { |
|||
Elements articles = document.select("article, .post, .entry"); |
|||
for (Element article : articles) { |
|||
String title = article.select("h2, h3, .title").text(); |
|||
String content = article.select(".content, .excerpt, .entry-content").text(); |
|||
String author = article.select(".author, .byline").text(); |
|||
|
|||
if (!title.isEmpty()) { |
|||
items.add(new HotSearchItem(items.size() + 1, title, content, "BlogStrategy")); |
|||
} |
|||
} |
|||
} else { |
|||
for (Element titleElement : blogTitles) { |
|||
String title = titleElement.text(); |
|||
if (!title.isEmpty()) { |
|||
items.add(new HotSearchItem(items.size() + 1, title, "", "BlogStrategy")); |
|||
} |
|||
} |
|||
} |
|||
|
|||
if (items.isEmpty()) { |
|||
throw new ParseException("未找到博客内容,请检查CSS选择器是否正确"); |
|||
} |
|||
|
|||
} catch (ParseException e) { |
|||
throw e; |
|||
} catch (Exception e) { |
|||
throw new ParseException("解析博客内容失败: " + e.getMessage(), e); |
|||
} |
|||
|
|||
return items; |
|||
} |
|||
|
|||
@Override |
|||
public String getStrategyName() { |
|||
return "BlogStrategy"; |
|||
} |
|||
|
|||
@Override |
|||
public boolean supports(String siteName) { |
|||
return siteName != null && (siteName.toLowerCase().contains("blog") || |
|||
siteName.toLowerCase().contains("博客")); |
|||
} |
|||
} |
|||
@ -0,0 +1,12 @@ |
|||
package strategy; |
|||
|
|||
import exception.ParseException; |
|||
import model.HotSearchItem; |
|||
import org.jsoup.nodes.Document; |
|||
import java.util.List; |
|||
|
|||
public interface CrawlStrategy { |
|||
List<HotSearchItem> parse(Document document) throws ParseException; |
|||
String getStrategyName(); |
|||
boolean supports(String siteName); |
|||
} |
|||
@ -0,0 +1,84 @@ |
|||
package strategy; |
|||
|
|||
import exception.ParseException; |
|||
import model.HotSearchItem; |
|||
import org.jsoup.nodes.Document; |
|||
import org.jsoup.nodes.Element; |
|||
import org.jsoup.select.Elements; |
|||
import java.util.ArrayList; |
|||
import java.util.List; |
|||
import java.util.regex.Matcher; |
|||
import java.util.regex.Pattern; |
|||
|
|||
public class HotSearchStrategy implements CrawlStrategy { |
|||
@Override |
|||
public List<HotSearchItem> parse(Document document) throws ParseException { |
|||
List<HotSearchItem> items = new ArrayList<>(); |
|||
|
|||
try { |
|||
Elements hotItems = document.select("table tbody tr, .hot-list li, .rank-list div, [class*=hot]"); |
|||
|
|||
if (!hotItems.isEmpty()) { |
|||
for (Element item : hotItems) { |
|||
String rank = item.select("td, .rank, [class*=num]").text(); |
|||
String title = item.select("a, .title, [class*=title]").text(); |
|||
String hotValue = item.select("span, .hot-value, [class*=value]").text(); |
|||
|
|||
if (!title.isEmpty()) { |
|||
int rankNum = extractRank(rank); |
|||
items.add(new HotSearchItem(rankNum > 0 ? rankNum : items.size() + 1, title, hotValue, "HotSearchStrategy")); |
|||
} |
|||
} |
|||
} |
|||
|
|||
if (items.isEmpty()) { |
|||
String pageText = document.text(); |
|||
Pattern pattern = Pattern.compile("(\\d+)\\s*[.、]?\\s*(.{2,30})"); |
|||
Matcher matcher = pattern.matcher(pageText); |
|||
|
|||
while (matcher.find() && items.size() < 50) { |
|||
int rankNum = Integer.parseInt(matcher.group(1)); |
|||
String title = matcher.group(2).trim(); |
|||
if (title.length() > 2) { |
|||
items.add(new HotSearchItem(rankNum, title, "", "HotSearchStrategy")); |
|||
} |
|||
} |
|||
} |
|||
|
|||
if (items.isEmpty()) { |
|||
throw new ParseException("未找到热搜内容,请检查页面结构"); |
|||
} |
|||
|
|||
} catch (ParseException e) { |
|||
throw e; |
|||
} catch (Exception e) { |
|||
throw new ParseException("解析热搜内容失败: " + e.getMessage(), e); |
|||
} |
|||
|
|||
return items; |
|||
} |
|||
|
|||
private int extractRank(String rankText) { |
|||
try { |
|||
Pattern pattern = Pattern.compile("\\d+"); |
|||
Matcher matcher = pattern.matcher(rankText); |
|||
if (matcher.find()) { |
|||
return Integer.parseInt(matcher.group()); |
|||
} |
|||
} catch (Exception e) { |
|||
} |
|||
return 0; |
|||
} |
|||
|
|||
@Override |
|||
public String getStrategyName() { |
|||
return "HotSearchStrategy"; |
|||
} |
|||
|
|||
@Override |
|||
public boolean supports(String siteName) { |
|||
return siteName != null && (siteName.toLowerCase().contains("hot") || |
|||
siteName.toLowerCase().contains("热搜") || |
|||
siteName.toLowerCase().contains("排行榜")); |
|||
} |
|||
} |
|||
@ -0,0 +1,61 @@ |
|||
package strategy; |
|||
|
|||
import exception.ParseException; |
|||
import model.HotSearchItem; |
|||
import org.jsoup.nodes.Document; |
|||
import org.jsoup.nodes.Element; |
|||
import org.jsoup.select.Elements; |
|||
import java.util.ArrayList; |
|||
import java.util.List; |
|||
|
|||
public class NewsStrategy implements CrawlStrategy { |
|||
@Override |
|||
public List<HotSearchItem> parse(Document document) throws ParseException { |
|||
List<HotSearchItem> items = new ArrayList<>(); |
|||
|
|||
try { |
|||
Elements newsTitles = document.select(".news-title, .article-title, .entry-title, h2 a, .headline"); |
|||
|
|||
if (newsTitles.isEmpty()) { |
|||
Elements articles = document.select("article, .post, .item, .news-item"); |
|||
for (Element article : articles) { |
|||
String title = article.select("h3, h4, .title, a").first().text(); |
|||
String content = article.select(".summary, .desc, .excerpt").text(); |
|||
|
|||
if (!title.isEmpty()) { |
|||
items.add(new HotSearchItem(items.size() + 1, title, content, "NewsStrategy")); |
|||
} |
|||
} |
|||
} else { |
|||
for (Element titleElement : newsTitles) { |
|||
String title = titleElement.text(); |
|||
if (!title.isEmpty()) { |
|||
items.add(new HotSearchItem(items.size() + 1, title, "", "NewsStrategy")); |
|||
} |
|||
} |
|||
} |
|||
|
|||
if (items.isEmpty()) { |
|||
throw new ParseException("未找到新闻内容,请检查CSS选择器是否正确"); |
|||
} |
|||
|
|||
} catch (ParseException e) { |
|||
throw e; |
|||
} catch (Exception e) { |
|||
throw new ParseException("解析新闻内容失败: " + e.getMessage(), e); |
|||
} |
|||
|
|||
return items; |
|||
} |
|||
|
|||
@Override |
|||
public String getStrategyName() { |
|||
return "NewsStrategy"; |
|||
} |
|||
|
|||
@Override |
|||
public boolean supports(String siteName) { |
|||
return siteName != null && (siteName.toLowerCase().contains("news") || |
|||
siteName.toLowerCase().contains("新闻")); |
|||
} |
|||
} |
|||
@ -0,0 +1,56 @@ |
|||
package strategy; |
|||
|
|||
import java.util.HashMap; |
|||
import java.util.Map; |
|||
|
|||
public class StrategyFactory { |
|||
private static volatile StrategyFactory instance; |
|||
private final Map<String, CrawlStrategy> strategies; |
|||
|
|||
private StrategyFactory() { |
|||
strategies = new HashMap<>(); |
|||
registerDefaultStrategies(); |
|||
} |
|||
|
|||
public static StrategyFactory getInstance() { |
|||
if (instance == null) { |
|||
synchronized (StrategyFactory.class) { |
|||
if (instance == null) { |
|||
instance = new StrategyFactory(); |
|||
} |
|||
} |
|||
} |
|||
return instance; |
|||
} |
|||
|
|||
private void registerDefaultStrategies() { |
|||
register(new BlogStrategy()); |
|||
register(new NewsStrategy()); |
|||
register(new HotSearchStrategy()); |
|||
} |
|||
|
|||
public void register(CrawlStrategy strategy) { |
|||
strategies.put(strategy.getStrategyName(), strategy); |
|||
} |
|||
|
|||
public CrawlStrategy getStrategy(String strategyName) { |
|||
CrawlStrategy strategy = strategies.get(strategyName); |
|||
if (strategy == null) { |
|||
strategy = strategies.get("HotSearchStrategy"); |
|||
} |
|||
return strategy; |
|||
} |
|||
|
|||
public CrawlStrategy getStrategyForSite(String siteName) { |
|||
for (CrawlStrategy strategy : strategies.values()) { |
|||
if (strategy.supports(siteName)) { |
|||
return strategy; |
|||
} |
|||
} |
|||
return strategies.get("HotSearchStrategy"); |
|||
} |
|||
|
|||
public Map<String, CrawlStrategy> getAllStrategies() { |
|||
return new HashMap<>(strategies); |
|||
} |
|||
} |
|||
@ -0,0 +1,18 @@ |
|||
package view; |
|||
|
|||
import model.Article; |
|||
import model.HotSearchItem; |
|||
import model.CrawlerResult; |
|||
import java.util.List; |
|||
|
|||
public interface CrawlerView { |
|||
void displayBanner(); |
|||
void displayMenu(List<String> options); |
|||
void displayArticles(List<Article> articles); |
|||
void displayHotSearchItems(List<HotSearchItem> items); |
|||
void displayCrawlerResult(CrawlerResult result); |
|||
void displayMessage(String message); |
|||
void displayError(String error); |
|||
void displaySuccess(String success); |
|||
void displayGoodbye(); |
|||
} |
|||
@ -0,0 +1,138 @@ |
|||
package view; |
|||
|
|||
import model.Article; |
|||
import model.HotSearchItem; |
|||
import model.CrawlerResult; |
|||
import java.util.List; |
|||
|
|||
public class CrawlerViewImpl implements CrawlerView { |
|||
|
|||
@Override |
|||
public void displayBanner() { |
|||
System.out.println(""); |
|||
System.out.println("╔══════════════════════════════════════════════════════════╗"); |
|||
System.out.println("║ ║"); |
|||
System.out.println("║ 🔥 多平台热搜爬虫系统 🔥 ║"); |
|||
System.out.println("║ ║"); |
|||
System.out.println("║ CLI + MVC + Command + Strategy ║"); |
|||
System.out.println("║ ║"); |
|||
System.out.println("╚══════════════════════════════════════════════════════════╝"); |
|||
System.out.println(""); |
|||
} |
|||
|
|||
@Override |
|||
public void displayMenu(List<String> options) { |
|||
System.out.println("╔══════════════════════════════════════════════════════════╗"); |
|||
System.out.println("║ 主菜单 ║"); |
|||
System.out.println("╠══════════════════════════════════════════════════════════╣"); |
|||
|
|||
for (int i = 0; i < options.size(); i++) { |
|||
String option = options.get(i); |
|||
System.out.printf("║ %2d. %-48s ║%n", i + 1, option); |
|||
} |
|||
|
|||
System.out.println("╠══════════════════════════════════════════════════════════╣"); |
|||
System.out.println("║ U. 撤销 R. 重做 H. 帮助 0. 退出 ║"); |
|||
System.out.println("╚══════════════════════════════════════════════════════════╝"); |
|||
System.out.print("\n请输入选择: "); |
|||
} |
|||
|
|||
@Override |
|||
public void displayArticles(List<Article> articles) { |
|||
if (articles == null || articles.isEmpty()) { |
|||
System.out.println("\n╔══════════════════════════════════════════╗"); |
|||
System.out.println("║ 暂无保存的文章 ║"); |
|||
System.out.println("╚══════════════════════════════════════════╝"); |
|||
return; |
|||
} |
|||
|
|||
System.out.println("\n╔═══════════════════════════════════════════════════════════╗"); |
|||
System.out.println("║ 文章列表 (共 " + articles.size() + " 篇) ║"); |
|||
System.out.println("╠═══════════════════════════════════════════════════════════╣"); |
|||
|
|||
for (int i = 0; i < articles.size(); i++) { |
|||
Article article = articles.get(i); |
|||
System.out.printf("║ [%02d] %-50s ║%n", i + 1, truncate(article.getTitle(), 48)); |
|||
System.out.printf("║ 📌 来源: %-20s 策略: %-15s ║%n", |
|||
truncate(article.getSource(), 18), |
|||
truncate(article.getStrategy(), 13)); |
|||
} |
|||
|
|||
System.out.println("╚═══════════════════════════════════════════════════════════╝"); |
|||
} |
|||
|
|||
@Override |
|||
public void displayHotSearchItems(List<HotSearchItem> items) { |
|||
if (items == null || items.isEmpty()) { |
|||
System.out.println("暂无热搜数据"); |
|||
return; |
|||
} |
|||
|
|||
System.out.println("\n╔═══════════════════════════════════════════════════════════╗"); |
|||
System.out.printf("║ 热搜列表 (共 %d 条) ║%n", items.size()); |
|||
System.out.println("╠═══════════════════════════════════════════════════════════╣"); |
|||
|
|||
for (HotSearchItem item : items) { |
|||
String rankStr = "🔥 " + item.getRank(); |
|||
System.out.printf("║ %3s │ %-50s ║%n", rankStr, truncate(item.getTitle(), 48)); |
|||
} |
|||
|
|||
System.out.println("╚═══════════════════════════════════════════════════════════╝"); |
|||
} |
|||
|
|||
@Override |
|||
public void displayCrawlerResult(CrawlerResult result) { |
|||
if (result == null) { |
|||
displayError("爬取结果为空"); |
|||
return; |
|||
} |
|||
|
|||
System.out.println("\n╔═══════════════════════════════════════════════════════════╗"); |
|||
System.out.printf("║ 爬取结果: %-30s ║%n", result.getSiteName()); |
|||
System.out.println("╠═══════════════════════════════════════════════════════════╣"); |
|||
|
|||
if (result.isSuccess()) { |
|||
System.out.println("║ 状态: ✅ 成功 ║"); |
|||
System.out.printf("║ 数据: %d 条 ║%n", result.getItemCount()); |
|||
System.out.printf("║ 耗时: %d ms ║%n", result.getDuration()); |
|||
} else { |
|||
System.out.println("║ 状态: ❌ 失败 ║"); |
|||
System.out.printf("║ 原因: %-45s ║%n", truncate(result.getErrorMessage(), 43)); |
|||
} |
|||
|
|||
System.out.println("╚═══════════════════════════════════════════════════════════╝"); |
|||
} |
|||
|
|||
@Override |
|||
public void displayMessage(String message) { |
|||
System.out.println("\n📢 " + message); |
|||
} |
|||
|
|||
@Override |
|||
public void displayError(String error) { |
|||
System.err.println("\n❌ 错误: " + error); |
|||
} |
|||
|
|||
@Override |
|||
public void displaySuccess(String success) { |
|||
System.out.println("\n✅ " + success); |
|||
} |
|||
|
|||
@Override |
|||
public void displayGoodbye() { |
|||
System.out.println(""); |
|||
System.out.println("╔══════════════════════════════════════════════════════════╗"); |
|||
System.out.println("║ ║"); |
|||
System.out.println("║ 感谢使用热搜爬虫系统! ║"); |
|||
System.out.println("║ ║"); |
|||
System.out.println("║ 再见!👋 ║"); |
|||
System.out.println("║ ║"); |
|||
System.out.println("╚══════════════════════════════════════════════════════════╝"); |
|||
} |
|||
|
|||
private String truncate(String str, int maxLength) { |
|||
if (str == null) return ""; |
|||
if (str.length() <= maxLength) return str; |
|||
return str.substring(0, maxLength - 3) + "..."; |
|||
} |
|||
} |
|||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Some files were not shown because too many files changed in this diff
Loading…
Reference in new issue