124 changed files with 4495 additions and 0 deletions
@ -0,0 +1,4 @@ |
|||||
|
*.jar |
||||
|
*.jar |
||||
|
*.class |
||||
|
*.log |
||||
@ -0,0 +1,4 @@ |
|||||
|
{ |
||||
|
"git.ignoreLimitWarning": true, |
||||
|
"java.configuration.updateBuildConfiguration": "interactive" |
||||
|
} |
||||
@ -0,0 +1,17 @@ |
|||||
|
# DataCollect 教学项目 — 最小可运行版本 |
||||
|
|
||||
|
这是一个最小可用的 Java CLI 演示工程,目标:打印帮助信息以验证运行环境。 |
||||
|
|
||||
|
构建: |
||||
|
```bash |
||||
|
mvn -q package |
||||
|
``` |
||||
|
|
||||
|
运行(示例): |
||||
|
```bash |
||||
|
java -jar target/datacollect-cli-0.1.0-jar-with-dependencies.jar --help |
||||
|
``` |
||||
|
|
||||
|
项目结构(最小): |
||||
|
- `src/main/java/com/example/datacollect/Main.java` — CLI 入口,打印帮助 |
||||
|
- `pom.xml` — Maven 构建配置,生成可执行 jar |
||||
@ -0,0 +1,71 @@ |
|||||
|
<project xmlns="http://maven.apache.org/POM/4.0.0" |
||||
|
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" |
||||
|
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd"> |
||||
|
<modelVersion>4.0.0</modelVersion> |
||||
|
<groupId>com.example</groupId> |
||||
|
<artifactId>datacollect-cli</artifactId> |
||||
|
<version>0.1.0</version> |
||||
|
<properties> |
||||
|
<maven.compiler.source>11</maven.compiler.source> |
||||
|
<maven.compiler.target>11</maven.compiler.target> |
||||
|
<slf4j.version>2.0.9</slf4j.version> |
||||
|
<logback.version>1.4.11</logback.version> |
||||
|
<jsoup.version>1.17.2</jsoup.version> |
||||
|
<okhttp.version>4.12.0</okhttp.version> |
||||
|
</properties> |
||||
|
<dependencies> |
||||
|
<dependency> |
||||
|
<groupId>org.slf4j</groupId> |
||||
|
<artifactId>slf4j-api</artifactId> |
||||
|
<version>${slf4j.version}</version> |
||||
|
</dependency> |
||||
|
<dependency> |
||||
|
<groupId>ch.qos.logback</groupId> |
||||
|
<artifactId>logback-classic</artifactId> |
||||
|
<version>${logback.version}</version> |
||||
|
</dependency> |
||||
|
<dependency> |
||||
|
<groupId>org.jsoup</groupId> |
||||
|
<artifactId>jsoup</artifactId> |
||||
|
<version>${jsoup.version}</version> |
||||
|
</dependency> |
||||
|
<dependency> |
||||
|
<groupId>com.squareup.okhttp3</groupId> |
||||
|
<artifactId>okhttp</artifactId> |
||||
|
<version>${okhttp.version}</version> |
||||
|
</dependency> |
||||
|
</dependencies> |
||||
|
<build> |
||||
|
<plugins> |
||||
|
<plugin> |
||||
|
<groupId>org.apache.maven.plugins</groupId> |
||||
|
<artifactId>maven-compiler-plugin</artifactId> |
||||
|
<version>3.8.1</version> |
||||
|
</plugin> |
||||
|
<plugin> |
||||
|
<groupId>org.apache.maven.plugins</groupId> |
||||
|
<artifactId>maven-assembly-plugin</artifactId> |
||||
|
<version>3.3.0</version> |
||||
|
<configuration> |
||||
|
<archive> |
||||
|
<manifest> |
||||
|
<mainClass>com.example.datacollect.Main</mainClass> |
||||
|
</manifest> |
||||
|
</archive> |
||||
|
<descriptorRefs> |
||||
|
<descriptorRef>jar-with-dependencies</descriptorRef> |
||||
|
</descriptorRefs> |
||||
|
</configuration> |
||||
|
<executions> |
||||
|
<execution> |
||||
|
<id>make-assembly</id> |
||||
|
<phase>package</phase> |
||||
|
<goals> |
||||
|
<goal>single</goal> |
||||
|
</goals> |
||||
|
</execution> |
||||
|
</executions> |
||||
|
</plugin> |
||||
|
</plugins> |
||||
|
</build> |
||||
|
</project> |
||||
@ -0,0 +1,19 @@ |
|||||
|
package com.example.datacollect; |
||||
|
|
||||
|
import com.example.datacollect.controller.CrawlerController; |
||||
|
import com.example.datacollect.repository.ArticleRepository; |
||||
|
import com.example.datacollect.view.ConsoleView; |
||||
|
|
||||
|
public class Main { |
||||
|
|
||||
|
public static void main(String[] args) { |
||||
|
ConsoleView view = new ConsoleView(); |
||||
|
ArticleRepository repository = new ArticleRepository(); |
||||
|
CrawlerController controller = new CrawlerController(view, repository); |
||||
|
|
||||
|
view.printSuccess("Welcome to CLI Crawler (W10)! Type help for commands."); |
||||
|
while (true) { |
||||
|
controller.handle(view.readLine()); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,6 @@ |
|||||
|
package com.example.datacollect.command; |
||||
|
|
||||
|
public interface Command { |
||||
|
String getName(); |
||||
|
void execute(String[] args, CommandContext context); |
||||
|
} |
||||
@ -0,0 +1,42 @@ |
|||||
|
package com.example.datacollect.command; |
||||
|
|
||||
|
import com.example.datacollect.repository.ArticleRepository; |
||||
|
import org.slf4j.Logger; |
||||
|
import org.slf4j.LoggerFactory; |
||||
|
|
||||
|
import java.util.ArrayList; |
||||
|
import java.util.List; |
||||
|
|
||||
|
public class CommandContext { |
||||
|
private static final Logger logger = LoggerFactory.getLogger(CommandContext.class); |
||||
|
private final ArticleRepository repository; |
||||
|
private final List<String> history; |
||||
|
|
||||
|
public CommandContext(ArticleRepository repository) { |
||||
|
this.repository = repository; |
||||
|
this.history = new ArrayList<>(); |
||||
|
logger.debug("CommandContext initialized"); |
||||
|
} |
||||
|
|
||||
|
public ArticleRepository getRepository() { |
||||
|
return repository; |
||||
|
} |
||||
|
|
||||
|
public List<String> getHistory() { |
||||
|
return new ArrayList<>(history); |
||||
|
} |
||||
|
|
||||
|
public void addToHistory(String command) { |
||||
|
history.add(command); |
||||
|
logger.trace("Command added to history: {}", command); |
||||
|
} |
||||
|
|
||||
|
public int getHistorySize() { |
||||
|
return history.size(); |
||||
|
} |
||||
|
|
||||
|
public void clearHistory() { |
||||
|
history.clear(); |
||||
|
logger.debug("Command history cleared"); |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,77 @@ |
|||||
|
package com.example.datacollect.command; |
||||
|
|
||||
|
import com.example.datacollect.exception.CrawlerException; |
||||
|
import com.example.datacollect.model.Article; |
||||
|
import com.example.datacollect.strategy.CrawlStrategy; |
||||
|
import com.example.datacollect.strategy.StrategyFactory; |
||||
|
import com.example.datacollect.view.ConsoleView; |
||||
|
import org.slf4j.Logger; |
||||
|
import org.slf4j.LoggerFactory; |
||||
|
|
||||
|
import java.util.List; |
||||
|
|
||||
|
public class CrawlCommand implements Command { |
||||
|
private static final Logger logger = LoggerFactory.getLogger(CrawlCommand.class); |
||||
|
private static final int MAX_RETRIES = 3; |
||||
|
private static final long RETRY_DELAY_MS = 1000; |
||||
|
private final ConsoleView view; |
||||
|
|
||||
|
public CrawlCommand(ConsoleView view) { |
||||
|
this.view = view; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String getName() { |
||||
|
return "crawl"; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public void execute(String[] args, CommandContext context) { |
||||
|
if (args.length < 3) { |
||||
|
view.printError("Usage: crawl <type> <url>"); |
||||
|
view.printError("Supported types: " + StrategyFactory.getSupportedTypes()); |
||||
|
return; |
||||
|
} |
||||
|
|
||||
|
String type = args[1]; |
||||
|
String url = args[2]; |
||||
|
|
||||
|
if (!StrategyFactory.hasStrategy(type)) { |
||||
|
view.printError("Unknown strategy type: " + type); |
||||
|
view.printError("Supported types: " + StrategyFactory.getSupportedTypes()); |
||||
|
return; |
||||
|
} |
||||
|
|
||||
|
int attempt = 0; |
||||
|
Exception lastException = null; |
||||
|
|
||||
|
while (attempt < MAX_RETRIES) { |
||||
|
try { |
||||
|
attempt++; |
||||
|
logger.info("Crawl attempt {} of {} for: {}", attempt, MAX_RETRIES, url); |
||||
|
CrawlStrategy strategy = StrategyFactory.getStrategy(type); |
||||
|
List<Article> articles = strategy.crawl(url); |
||||
|
context.getRepository().addAll(articles); |
||||
|
view.printSuccess("Crawled " + articles.size() + " articles using " + type + " strategy"); |
||||
|
return; |
||||
|
} catch (CrawlerException e) { |
||||
|
lastException = e; |
||||
|
logger.warn("Crawl attempt {} failed: {}", attempt, e.getMessage()); |
||||
|
if (attempt < MAX_RETRIES) { |
||||
|
try { |
||||
|
Thread.sleep(RETRY_DELAY_MS); |
||||
|
} catch (InterruptedException ie) { |
||||
|
Thread.currentThread().interrupt(); |
||||
|
break; |
||||
|
} |
||||
|
} |
||||
|
} catch (Exception e) { |
||||
|
lastException = e; |
||||
|
logger.error("Crawl failed with unexpected error: {}", e.getMessage()); |
||||
|
break; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
view.printError("Crawl failed after " + attempt + " attempts: " + (lastException != null ? lastException.getMessage() : "Unknown error")); |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,26 @@ |
|||||
|
package com.example.datacollect.command; |
||||
|
|
||||
|
import com.example.datacollect.view.ConsoleView; |
||||
|
import org.slf4j.Logger; |
||||
|
import org.slf4j.LoggerFactory; |
||||
|
|
||||
|
public class ExitCommand implements Command { |
||||
|
private static final Logger logger = LoggerFactory.getLogger(ExitCommand.class); |
||||
|
private final ConsoleView view; |
||||
|
|
||||
|
public ExitCommand(ConsoleView view) { |
||||
|
this.view = view; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String getName() { |
||||
|
return "exit"; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public void execute(String[] args, CommandContext context) { |
||||
|
logger.info("Exit command executed"); |
||||
|
view.printSuccess("Bye!"); |
||||
|
System.exit(0); |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,34 @@ |
|||||
|
package com.example.datacollect.command; |
||||
|
|
||||
|
import com.example.datacollect.view.ConsoleView; |
||||
|
import org.slf4j.Logger; |
||||
|
import org.slf4j.LoggerFactory; |
||||
|
|
||||
|
public class HelpCommand implements Command { |
||||
|
private static final Logger logger = LoggerFactory.getLogger(HelpCommand.class); |
||||
|
private final ConsoleView view; |
||||
|
|
||||
|
public HelpCommand(ConsoleView view) { |
||||
|
this.view = view; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String getName() { |
||||
|
return "help"; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public void execute(String[] args, CommandContext context) { |
||||
|
logger.debug("Help command executed"); |
||||
|
view.printInfo("Commands:"); |
||||
|
view.printInfo(" crawl <type> <url> - Crawl articles"); |
||||
|
view.printInfo(" Types:"); |
||||
|
view.printInfo(" blog - 模拟博客爬取(演示用)"); |
||||
|
view.printInfo(" news - 模拟新闻爬取(演示用)"); |
||||
|
view.printInfo(" real - 真实网页爬取(从目标网站获取真实内容)"); |
||||
|
view.printInfo(" list - List all articles"); |
||||
|
view.printInfo(" history - Show command history"); |
||||
|
view.printInfo(" help - Show this help"); |
||||
|
view.printInfo(" exit - Exit program"); |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,37 @@ |
|||||
|
package com.example.datacollect.command; |
||||
|
|
||||
|
import com.example.datacollect.view.ConsoleView; |
||||
|
import org.slf4j.Logger; |
||||
|
import org.slf4j.LoggerFactory; |
||||
|
|
||||
|
import java.util.List; |
||||
|
|
||||
|
public class HistoryCommand implements Command { |
||||
|
private static final Logger logger = LoggerFactory.getLogger(HistoryCommand.class); |
||||
|
private final ConsoleView view; |
||||
|
|
||||
|
public HistoryCommand(ConsoleView view) { |
||||
|
this.view = view; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String getName() { |
||||
|
return "history"; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public void execute(String[] args, CommandContext context) { |
||||
|
List<String> history = context.getHistory(); |
||||
|
logger.debug("History command executed, {} entries", history.size()); |
||||
|
|
||||
|
if (history.isEmpty()) { |
||||
|
view.printInfo("暂无命令历史"); |
||||
|
return; |
||||
|
} |
||||
|
|
||||
|
view.printInfo("命令历史:"); |
||||
|
for (int i = 0; i < history.size(); i++) { |
||||
|
System.out.println((i + 1) + ". " + history.get(i)); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,25 @@ |
|||||
|
package com.example.datacollect.command; |
||||
|
|
||||
|
import com.example.datacollect.view.ConsoleView; |
||||
|
import org.slf4j.Logger; |
||||
|
import org.slf4j.LoggerFactory; |
||||
|
|
||||
|
public class ListCommand implements Command { |
||||
|
private static final Logger logger = LoggerFactory.getLogger(ListCommand.class); |
||||
|
private final ConsoleView view; |
||||
|
|
||||
|
public ListCommand(ConsoleView view) { |
||||
|
this.view = view; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String getName() { |
||||
|
return "list"; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public void execute(String[] args, CommandContext context) { |
||||
|
logger.debug("List command executed"); |
||||
|
view.display(context.getRepository().findAll()); |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,59 @@ |
|||||
|
package com.example.datacollect.controller; |
||||
|
|
||||
|
import com.example.datacollect.command.Command; |
||||
|
import com.example.datacollect.command.CommandContext; |
||||
|
import com.example.datacollect.command.CrawlCommand; |
||||
|
import com.example.datacollect.command.ExitCommand; |
||||
|
import com.example.datacollect.command.HelpCommand; |
||||
|
import com.example.datacollect.command.HistoryCommand; |
||||
|
import com.example.datacollect.command.ListCommand; |
||||
|
import com.example.datacollect.repository.ArticleRepository; |
||||
|
import com.example.datacollect.view.ConsoleView; |
||||
|
import org.slf4j.Logger; |
||||
|
import org.slf4j.LoggerFactory; |
||||
|
|
||||
|
import java.util.HashMap; |
||||
|
import java.util.Map; |
||||
|
|
||||
|
public class CrawlerController { |
||||
|
private static final Logger logger = LoggerFactory.getLogger(CrawlerController.class); |
||||
|
private final Map<String, Command> commands = new HashMap<>(); |
||||
|
private final ConsoleView view; |
||||
|
private final CommandContext context; |
||||
|
|
||||
|
public CrawlerController(ConsoleView view, ArticleRepository repository) { |
||||
|
this.view = view; |
||||
|
this.context = new CommandContext(repository); |
||||
|
register(new HelpCommand(view)); |
||||
|
register(new ListCommand(view)); |
||||
|
register(new CrawlCommand(view)); |
||||
|
register(new ExitCommand(view)); |
||||
|
register(new HistoryCommand(view)); |
||||
|
logger.info("CrawlerController initialized with {} commands", commands.size()); |
||||
|
} |
||||
|
|
||||
|
private void register(Command command) { |
||||
|
commands.put(command.getName(), command); |
||||
|
logger.debug("Registered command: {}", command.getName()); |
||||
|
} |
||||
|
|
||||
|
public void handle(String input) { |
||||
|
String text = input == null ? "" : input.trim(); |
||||
|
if (text.isEmpty()) { |
||||
|
return; |
||||
|
} |
||||
|
|
||||
|
context.addToHistory(text); |
||||
|
|
||||
|
String[] args = text.split("\\s+"); |
||||
|
String cmdName = args[0].toLowerCase(); |
||||
|
Command command = commands.get(cmdName); |
||||
|
if (command == null) { |
||||
|
logger.warn("Unknown command received: {}", cmdName); |
||||
|
view.printError("Unknown command: " + cmdName); |
||||
|
return; |
||||
|
} |
||||
|
logger.debug("Executing command: {}", cmdName); |
||||
|
command.execute(args, context); |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,11 @@ |
|||||
|
package com.example.datacollect.exception; |
||||
|
|
||||
|
public class CrawlerException extends Exception { |
||||
|
public CrawlerException(String message) { |
||||
|
super(message); |
||||
|
} |
||||
|
|
||||
|
public CrawlerException(String message, Throwable cause) { |
||||
|
super(message, cause); |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,11 @@ |
|||||
|
package com.example.datacollect.exception; |
||||
|
|
||||
|
public class NetworkException extends CrawlerException { |
||||
|
public NetworkException(String message) { |
||||
|
super(message); |
||||
|
} |
||||
|
|
||||
|
public NetworkException(String message, Throwable cause) { |
||||
|
super(message, cause); |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,11 @@ |
|||||
|
package com.example.datacollect.exception; |
||||
|
|
||||
|
public class ParseException extends CrawlerException { |
||||
|
public ParseException(String message) { |
||||
|
super(message); |
||||
|
} |
||||
|
|
||||
|
public ParseException(String message, Throwable cause) { |
||||
|
super(message, cause); |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,75 @@ |
|||||
|
package com.example.datacollect.model; |
||||
|
|
||||
|
import java.time.LocalDate; |
||||
|
|
||||
|
public class Article { |
||||
|
private String title; |
||||
|
private String url; |
||||
|
private String content; |
||||
|
private String author; |
||||
|
private LocalDate publishDate; |
||||
|
|
||||
|
public Article(String title, String url, String content) { |
||||
|
this.title = title; |
||||
|
this.url = url; |
||||
|
this.content = content; |
||||
|
} |
||||
|
|
||||
|
public Article(String title, String url, String content, String author, LocalDate publishDate) { |
||||
|
this.title = title; |
||||
|
this.url = url; |
||||
|
this.content = content; |
||||
|
this.author = author; |
||||
|
this.publishDate = publishDate; |
||||
|
} |
||||
|
|
||||
|
public String getTitle() { |
||||
|
return title; |
||||
|
} |
||||
|
|
||||
|
public void setTitle(String title) { |
||||
|
this.title = title; |
||||
|
} |
||||
|
|
||||
|
public String getUrl() { |
||||
|
return url; |
||||
|
} |
||||
|
|
||||
|
public void setUrl(String url) { |
||||
|
this.url = url; |
||||
|
} |
||||
|
|
||||
|
public String getContent() { |
||||
|
return content; |
||||
|
} |
||||
|
|
||||
|
public void setContent(String content) { |
||||
|
this.content = content; |
||||
|
} |
||||
|
|
||||
|
public String getAuthor() { |
||||
|
return author; |
||||
|
} |
||||
|
|
||||
|
public void setAuthor(String author) { |
||||
|
this.author = author; |
||||
|
} |
||||
|
|
||||
|
public LocalDate getPublishDate() { |
||||
|
return publishDate; |
||||
|
} |
||||
|
|
||||
|
public void setPublishDate(LocalDate publishDate) { |
||||
|
this.publishDate = publishDate; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String toString() { |
||||
|
return "Article{" |
||||
|
+ "title='" + title + '\'' |
||||
|
+ ", url='" + url + '\'' |
||||
|
+ ", author='" + author + '\'' |
||||
|
+ ", publishDate=" + publishDate |
||||
|
+ '}'; |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,73 @@ |
|||||
|
package com.example.datacollect.repository; |
||||
|
|
||||
|
import com.example.datacollect.model.Article; |
||||
|
import org.slf4j.Logger; |
||||
|
import org.slf4j.LoggerFactory; |
||||
|
|
||||
|
import java.util.ArrayList; |
||||
|
import java.util.Collections; |
||||
|
import java.util.List; |
||||
|
import java.util.Optional; |
||||
|
|
||||
|
public class ArticleRepository { |
||||
|
private static final Logger logger = LoggerFactory.getLogger(ArticleRepository.class); |
||||
|
private final List<Article> articles = new ArrayList<>(); |
||||
|
|
||||
|
public void add(Article article) { |
||||
|
if (article == null) { |
||||
|
logger.warn("Attempted to add null article to repository"); |
||||
|
throw new IllegalArgumentException("Article cannot be null"); |
||||
|
} |
||||
|
if (article.getUrl() == null || article.getUrl().isEmpty()) { |
||||
|
logger.warn("Attempted to add article with null or empty URL"); |
||||
|
throw new IllegalArgumentException("Article URL cannot be null or empty"); |
||||
|
} |
||||
|
if (findByUrl(article.getUrl()).isPresent()) { |
||||
|
logger.debug("Article with URL {} already exists, skipping", article.getUrl()); |
||||
|
return; |
||||
|
} |
||||
|
articles.add(article); |
||||
|
logger.debug("Added article: {} ({})", article.getTitle(), article.getUrl()); |
||||
|
} |
||||
|
|
||||
|
public void addAll(List<Article> articleList) { |
||||
|
if (articleList == null) { |
||||
|
logger.warn("Attempted to add null article list to repository"); |
||||
|
throw new IllegalArgumentException("Article list cannot be null"); |
||||
|
} |
||||
|
int count = 0; |
||||
|
for (Article article : articleList) { |
||||
|
try { |
||||
|
add(article); |
||||
|
count++; |
||||
|
} catch (IllegalArgumentException e) { |
||||
|
logger.warn("Skipping invalid article: {}", e.getMessage()); |
||||
|
} |
||||
|
} |
||||
|
logger.info("Added {} articles to repository (total: {})", count, articles.size()); |
||||
|
} |
||||
|
|
||||
|
public List<Article> findAll() { |
||||
|
return Collections.unmodifiableList(new ArrayList<>(articles)); |
||||
|
} |
||||
|
|
||||
|
public Optional<Article> findByUrl(String url) { |
||||
|
if (url == null || url.isEmpty()) { |
||||
|
logger.warn("findByUrl called with null or empty URL"); |
||||
|
return Optional.empty(); |
||||
|
} |
||||
|
return articles.stream() |
||||
|
.filter(a -> a.getUrl().equals(url)) |
||||
|
.findFirst(); |
||||
|
} |
||||
|
|
||||
|
public int count() { |
||||
|
return articles.size(); |
||||
|
} |
||||
|
|
||||
|
public void clear() { |
||||
|
int size = articles.size(); |
||||
|
articles.clear(); |
||||
|
logger.info("Cleared {} articles from repository", size); |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,33 @@ |
|||||
|
package com.example.datacollect.strategy; |
||||
|
|
||||
|
import com.example.datacollect.exception.CrawlerException; |
||||
|
import com.example.datacollect.exception.ParseException; |
||||
|
import com.example.datacollect.model.Article; |
||||
|
import org.slf4j.Logger; |
||||
|
import org.slf4j.LoggerFactory; |
||||
|
|
||||
|
import java.util.ArrayList; |
||||
|
import java.util.List; |
||||
|
|
||||
|
public class BlogStrategy implements CrawlStrategy { |
||||
|
private static final Logger logger = LoggerFactory.getLogger(BlogStrategy.class); |
||||
|
|
||||
|
@Override |
||||
|
public String getType() { |
||||
|
return "blog"; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public List<Article> crawl(String url) throws CrawlerException { |
||||
|
logger.info("Starting blog crawl for: {}", url); |
||||
|
List<Article> articles = new ArrayList<>(); |
||||
|
articles.add(new Article("Java编程入门教程", "https://www.oracle.com/java/technologies/get-started/", "Oracle官方Java入门教程,涵盖Java基础知识和开发环境配置")); |
||||
|
articles.add(new Article("Java最佳实践指南", "https://www.baeldung.com/java-best-practices", "Baeldung提供的Java编程最佳实践,包括代码规范、性能优化等")); |
||||
|
logger.info("Successfully crawled {} articles from {}", articles.size(), url); |
||||
|
return articles; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public void parse(String html, String url) throws ParseException { |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,13 @@ |
|||||
|
package com.example.datacollect.strategy; |
||||
|
|
||||
|
import com.example.datacollect.exception.CrawlerException; |
||||
|
import com.example.datacollect.exception.ParseException; |
||||
|
import com.example.datacollect.model.Article; |
||||
|
|
||||
|
import java.util.List; |
||||
|
|
||||
|
public interface CrawlStrategy { |
||||
|
String getType(); |
||||
|
List<Article> crawl(String url) throws CrawlerException; |
||||
|
void parse(String html, String url) throws ParseException; |
||||
|
} |
||||
@ -0,0 +1,56 @@ |
|||||
|
package com.example.datacollect.strategy; |
||||
|
|
||||
|
import com.example.datacollect.exception.CrawlerException; |
||||
|
import com.example.datacollect.exception.ParseException; |
||||
|
import com.example.datacollect.model.Article; |
||||
|
import org.slf4j.Logger; |
||||
|
import org.slf4j.LoggerFactory; |
||||
|
|
||||
|
import java.util.ArrayList; |
||||
|
import java.util.List; |
||||
|
|
||||
|
public class NewsStrategy implements CrawlStrategy { |
||||
|
private static final Logger logger = LoggerFactory.getLogger(NewsStrategy.class); |
||||
|
|
||||
|
@Override |
||||
|
public String getType() { |
||||
|
return "news"; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public List<Article> crawl(String url) throws CrawlerException { |
||||
|
logger.info("Starting news crawl for: {}", url); |
||||
|
List<Article> articles = new ArrayList<>(); |
||||
|
|
||||
|
String baseUrl = url.replaceAll("/$", ""); |
||||
|
|
||||
|
if (url.contains("toutiao.com")) { |
||||
|
articles.add(new Article("今日头条 - 热点新闻", "https://www.toutiao.com/", "今日头条热点新闻聚合平台")); |
||||
|
articles.add(new Article("今日头条科技", "https://www.toutiao.com/c/user/token/MS4wLjABAAAAlS0q8OYF0X0Kf2dJ7w0wFg/", "科技资讯频道")); |
||||
|
articles.add(new Article("今日头条娱乐", "https://www.toutiao.com/c/user/token/MS4wLjABAAAAj80G9D28h2a8q9F9x9x9x9/", "娱乐新闻频道")); |
||||
|
} else if (url.contains("sina.com") || url.contains("sina.cn")) { |
||||
|
articles.add(new Article("新浪新闻 - 国内新闻", "https://news.sina.com.cn/", "新浪国内新闻频道")); |
||||
|
articles.add(new Article("新浪财经", "https://finance.sina.com.cn/", "财经资讯")); |
||||
|
articles.add(new Article("新浪体育", "https://sports.sina.com.cn/", "体育新闻")); |
||||
|
} else if (url.contains("qq.com")) { |
||||
|
articles.add(new Article("腾讯新闻", "https://news.qq.com/", "腾讯新闻频道")); |
||||
|
articles.add(new Article("腾讯财经", "https://finance.qq.com/", "财经资讯")); |
||||
|
articles.add(new Article("腾讯科技", "https://tech.qq.com/", "科技新闻")); |
||||
|
} else if (url.contains("163.com")) { |
||||
|
articles.add(new Article("网易新闻", "https://news.163.com/", "网易新闻频道")); |
||||
|
articles.add(new Article("网易财经", "https://money.163.com/", "财经资讯")); |
||||
|
articles.add(new Article("网易科技", "https://tech.163.com/", "科技新闻")); |
||||
|
} else { |
||||
|
articles.add(new Article("科技新闻 - TechCrunch", "https://techcrunch.com/", "TechCrunch提供最新的科技新闻")); |
||||
|
articles.add(new Article("国际新闻 - BBC News", "https://www.bbc.com/news", "BBC News全球新闻")); |
||||
|
articles.add(new Article("商业新闻 - Reuters", "https://www.reuters.com/", "路透社商业新闻")); |
||||
|
} |
||||
|
|
||||
|
logger.info("Successfully crawled {} articles from {}", articles.size(), url); |
||||
|
return articles; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public void parse(String html, String url) throws ParseException { |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,178 @@ |
|||||
|
package com.example.datacollect.strategy; |
||||
|
|
||||
|
import com.example.datacollect.exception.CrawlerException; |
||||
|
import com.example.datacollect.exception.NetworkException; |
||||
|
import com.example.datacollect.exception.ParseException; |
||||
|
import com.example.datacollect.model.Article; |
||||
|
import org.jsoup.nodes.Document; |
||||
|
import org.slf4j.Logger; |
||||
|
import org.slf4j.LoggerFactory; |
||||
|
|
||||
|
import java.util.ArrayList; |
||||
|
import java.util.List; |
||||
|
import java.util.Set; |
||||
|
import java.util.HashSet; |
||||
|
|
||||
|
public class RealCrawlStrategy implements CrawlStrategy { |
||||
|
private static final Logger logger = LoggerFactory.getLogger(RealCrawlStrategy.class); |
||||
|
|
||||
|
private static final Set<String> NEWS_KEYWORDS = Set.of("news", "article", "post", "story", "report", "blog"); |
||||
|
|
||||
|
@Override |
||||
|
public String getType() { |
||||
|
return "real"; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public List<Article> crawl(String url) throws CrawlerException { |
||||
|
List<Article> articles = new ArrayList<>(); |
||||
|
Set<String> visitedUrls = new HashSet<>(); |
||||
|
|
||||
|
try { |
||||
|
logger.info("Starting real crawl for: {}", url); |
||||
|
|
||||
|
Document doc = WebCrawler.fetchDocument(url); |
||||
|
String pageTitle = WebCrawler.extractTitle(doc); |
||||
|
String pageContent = WebCrawler.extractContent(doc); |
||||
|
|
||||
|
logger.debug("Page title extracted: {}", pageTitle); |
||||
|
articles.add(new Article(pageTitle, url, pageContent)); |
||||
|
visitedUrls.add(url); |
||||
|
|
||||
|
List<String> links = WebCrawler.extractLinks(doc, url); |
||||
|
logger.debug("Found {} links on page", links.size()); |
||||
|
|
||||
|
if (links.size() > 0) { |
||||
|
logger.debug("First 5 links:"); |
||||
|
for (int i = 0; i < Math.min(5, links.size()); i++) { |
||||
|
logger.debug(" {}: {} (isArticle: {})", i + 1, links.get(i), isArticleLink(links.get(i))); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
int count = 0; |
||||
|
for (String link : links) { |
||||
|
if (count >= 5) break; |
||||
|
if (visitedUrls.contains(link)) continue; |
||||
|
if (!isArticleLink(link)) continue; |
||||
|
|
||||
|
try { |
||||
|
Document articleDoc = WebCrawler.fetchDocument(link); |
||||
|
String articleTitle = WebCrawler.extractTitle(articleDoc); |
||||
|
String articleContent = WebCrawler.extractContent(articleDoc); |
||||
|
|
||||
|
articles.add(new Article(articleTitle, link, articleContent)); |
||||
|
visitedUrls.add(link); |
||||
|
count++; |
||||
|
|
||||
|
logger.debug("Crawled article: {} - {}", articleTitle, link); |
||||
|
} catch (NetworkException e) { |
||||
|
logger.warn("Failed to crawl article: {} - {}", link, e.getMessage()); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
if (articles.size() == 1 && "No Title".equals(pageTitle)) { |
||||
|
logger.warn("Only 1 article found with no title, trying alternative extraction"); |
||||
|
articles = extractArticlesFromPage(doc, url); |
||||
|
} |
||||
|
|
||||
|
logger.info("Successfully crawled {} articles from {}", articles.size(), url); |
||||
|
|
||||
|
} catch (NetworkException e) { |
||||
|
logger.error("Failed to crawl {}: {}", url, e.getMessage()); |
||||
|
throw e; |
||||
|
} |
||||
|
|
||||
|
return articles; |
||||
|
} |
||||
|
|
||||
|
private List<Article> extractArticlesFromPage(Document doc, String baseUrl) { |
||||
|
List<Article> articles = new ArrayList<>(); |
||||
|
|
||||
|
articles.add(new Article(doc.title(), baseUrl, WebCrawler.extractContent(doc))); |
||||
|
|
||||
|
return articles; |
||||
|
} |
||||
|
|
||||
|
private boolean isArticleLink(String url) { |
||||
|
String lowerUrl = url.toLowerCase(); |
||||
|
|
||||
|
// 排除非文章链接
|
||||
|
if (lowerUrl.contains("/login") || |
||||
|
lowerUrl.contains("/register") || |
||||
|
lowerUrl.contains("/logout") || |
||||
|
lowerUrl.contains("/search") || |
||||
|
lowerUrl.contains("/about") || |
||||
|
lowerUrl.contains("/contact") || |
||||
|
lowerUrl.contains("/terms") || |
||||
|
lowerUrl.contains("/privacy") || |
||||
|
lowerUrl.contains("/sitemap") || |
||||
|
lowerUrl.contains("/feed") || |
||||
|
lowerUrl.contains("?") || |
||||
|
lowerUrl.contains(".json") || |
||||
|
lowerUrl.contains(".xml") || |
||||
|
lowerUrl.contains("#") || |
||||
|
lowerUrl.contains("/courses/") || |
||||
|
lowerUrl.endsWith("/") || |
||||
|
lowerUrl.endsWith("/start-here") || |
||||
|
lowerUrl.endsWith("/home") || |
||||
|
lowerUrl.endsWith("/index")) { |
||||
|
return false; |
||||
|
} |
||||
|
|
||||
|
// 包含文章相关关键词的链接
|
||||
|
if (lowerUrl.contains("/article/") || |
||||
|
lowerUrl.contains("/posts/") || |
||||
|
lowerUrl.contains("/blog/") || |
||||
|
lowerUrl.contains("/news/") || |
||||
|
lowerUrl.contains("/story/") || |
||||
|
lowerUrl.contains("/post/") || |
||||
|
lowerUrl.contains("/tutorial/") || |
||||
|
lowerUrl.contains("/guide/") || |
||||
|
lowerUrl.contains("/learn/") || |
||||
|
lowerUrl.contains("/articles/") || |
||||
|
lowerUrl.contains("/java-") || |
||||
|
lowerUrl.contains("/spring-") || |
||||
|
lowerUrl.contains("/kotlin-") || |
||||
|
lowerUrl.contains("/maven-") || |
||||
|
lowerUrl.contains("/gradle-") || |
||||
|
lowerUrl.contains("/junit-") || |
||||
|
lowerUrl.contains("/hibernate-") || |
||||
|
lowerUrl.contains("/jdbc-") || |
||||
|
lowerUrl.contains("/concurrent-") || |
||||
|
lowerUrl.contains("/stream-") || |
||||
|
lowerUrl.contains("/regex-") || |
||||
|
lowerUrl.contains("/json-") || |
||||
|
lowerUrl.contains("/xml-") || |
||||
|
lowerUrl.contains("/security-") || |
||||
|
lowerUrl.contains("/test-")) { |
||||
|
return true; |
||||
|
} |
||||
|
|
||||
|
// 带数字ID或日期格式的链接
|
||||
|
if (lowerUrl.matches(".*/\\d+\\.html?$") || |
||||
|
lowerUrl.matches(".*/[\\w-]+\\.html?$") || |
||||
|
lowerUrl.matches(".*/\\d+/\\d+/\\d+/.*") || |
||||
|
lowerUrl.matches(".*/\\d{4}/\\d{2}/.*") || |
||||
|
lowerUrl.matches(".*/\\d{4}/\\d{2}/\\d{2}/.*") || |
||||
|
lowerUrl.matches(".*/\\d{2}-\\d{2}-.*")) { |
||||
|
return true; |
||||
|
} |
||||
|
|
||||
|
// 对于 baeldung 这类技术博客,识别 /java-something 形式的链接
|
||||
|
if (lowerUrl.matches(".*/java-[\\w-]+$") || |
||||
|
lowerUrl.matches(".*/spring-[\\w-]+$") || |
||||
|
lowerUrl.matches(".*/kotlin-[\\w-]+$")) { |
||||
|
return true; |
||||
|
} |
||||
|
|
||||
|
return false; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public void parse(String html, String url) throws ParseException { |
||||
|
if (html == null || html.isEmpty()) { |
||||
|
throw new ParseException("HTML content is null or empty for URL: " + url); |
||||
|
} |
||||
|
logger.debug("Parsing HTML content for URL: {}", url); |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,46 @@ |
|||||
|
package com.example.datacollect.strategy; |
||||
|
|
||||
|
import org.slf4j.Logger; |
||||
|
import org.slf4j.LoggerFactory; |
||||
|
|
||||
|
import java.util.HashMap; |
||||
|
import java.util.Map; |
||||
|
import java.util.ServiceLoader; |
||||
|
|
||||
|
public class StrategyFactory { |
||||
|
private static final Logger logger = LoggerFactory.getLogger(StrategyFactory.class); |
||||
|
private static final Map<String, CrawlStrategy> strategies = new HashMap<>(); |
||||
|
|
||||
|
static { |
||||
|
loadStrategies(); |
||||
|
} |
||||
|
|
||||
|
private static synchronized void loadStrategies() { |
||||
|
if (!strategies.isEmpty()) { |
||||
|
return; |
||||
|
} |
||||
|
ServiceLoader<CrawlStrategy> loader = ServiceLoader.load(CrawlStrategy.class); |
||||
|
for (CrawlStrategy strategy : loader) { |
||||
|
strategies.put(strategy.getType().toLowerCase(), strategy); |
||||
|
logger.debug("Loaded strategy: {}", strategy.getType()); |
||||
|
} |
||||
|
logger.info("Loaded {} strategies", strategies.size()); |
||||
|
} |
||||
|
|
||||
|
public static CrawlStrategy getStrategy(String type) { |
||||
|
CrawlStrategy strategy = strategies.get(type.toLowerCase()); |
||||
|
if (strategy == null) { |
||||
|
logger.error("Unknown strategy type requested: {}", type); |
||||
|
throw new IllegalArgumentException("Unknown strategy type: " + type); |
||||
|
} |
||||
|
return strategy; |
||||
|
} |
||||
|
|
||||
|
public static boolean hasStrategy(String type) { |
||||
|
return strategies.containsKey(type.toLowerCase()); |
||||
|
} |
||||
|
|
||||
|
public static String getSupportedTypes() { |
||||
|
return String.join(", ", strategies.keySet()); |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,244 @@ |
|||||
|
package com.example.datacollect.strategy; |
||||
|
|
||||
|
import com.example.datacollect.exception.NetworkException; |
||||
|
import okhttp3.OkHttpClient; |
||||
|
import okhttp3.Request; |
||||
|
import okhttp3.Response; |
||||
|
import okhttp3.ResponseBody; |
||||
|
import org.jsoup.Jsoup; |
||||
|
import org.jsoup.nodes.Document; |
||||
|
import org.jsoup.nodes.Element; |
||||
|
import org.jsoup.select.Elements; |
||||
|
import org.slf4j.Logger; |
||||
|
import org.slf4j.LoggerFactory; |
||||
|
|
||||
|
import java.io.IOException; |
||||
|
import java.nio.charset.StandardCharsets; |
||||
|
import java.util.ArrayList; |
||||
|
import java.util.HashSet; |
||||
|
import java.util.List; |
||||
|
import java.util.Set; |
||||
|
import java.util.regex.Pattern; |
||||
|
|
||||
|
public class WebCrawler { |
||||
|
private static final Logger logger = LoggerFactory.getLogger(WebCrawler.class); |
||||
|
|
||||
|
private static final OkHttpClient client = new OkHttpClient.Builder() |
||||
|
.connectTimeout(java.time.Duration.ofSeconds(30)) |
||||
|
.readTimeout(java.time.Duration.ofSeconds(30)) |
||||
|
.followRedirects(true) |
||||
|
.followSslRedirects(true) |
||||
|
.build(); |
||||
|
|
||||
|
private static final Pattern URL_PATTERN = Pattern.compile("^https?://"); |
||||
|
|
||||
|
private static final Set<String> VALID_EXTENSIONS = Set.of( |
||||
|
".html", ".htm", ".php", ".asp", ".aspx", ".jsp", "" |
||||
|
); |
||||
|
|
||||
|
private static final int MAX_RETRIES = 3; |
||||
|
private static final long RETRY_DELAY_MS = 1000; |
||||
|
|
||||
|
public static Document fetchDocument(String url) throws NetworkException { |
||||
|
int attempt = 0; |
||||
|
IOException lastException = null; |
||||
|
|
||||
|
while (attempt < MAX_RETRIES) { |
||||
|
attempt++; |
||||
|
try { |
||||
|
logger.debug("Fetching document attempt {} of {} for URL: {}", attempt, MAX_RETRIES, url); |
||||
|
return doFetchDocument(url); |
||||
|
} catch (IOException e) { |
||||
|
lastException = e; |
||||
|
logger.warn("Fetch attempt {} failed for URL {}: {}", attempt, url, e.getMessage()); |
||||
|
if (attempt < MAX_RETRIES) { |
||||
|
try { |
||||
|
Thread.sleep(RETRY_DELAY_MS); |
||||
|
} catch (InterruptedException ie) { |
||||
|
Thread.currentThread().interrupt(); |
||||
|
throw new NetworkException("Fetching interrupted for URL: " + url, ie); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
throw new NetworkException("Failed to fetch document after " + MAX_RETRIES + " attempts for URL: " + url, lastException); |
||||
|
} |
||||
|
|
||||
|
private static Document doFetchDocument(String url) throws IOException { |
||||
|
Request request = new Request.Builder() |
||||
|
.url(url) |
||||
|
.header("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36") |
||||
|
.header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8") |
||||
|
.header("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8") |
||||
|
.header("Connection", "keep-alive") |
||||
|
.get() |
||||
|
.build(); |
||||
|
|
||||
|
try (Response response = client.newCall(request).execute()) { |
||||
|
if (!response.isSuccessful()) { |
||||
|
throw new IOException("HTTP request failed with code: " + response.code()); |
||||
|
} |
||||
|
|
||||
|
String contentType = response.header("Content-Type"); |
||||
|
logger.debug("Content-Type: {}", contentType); |
||||
|
|
||||
|
if (contentType != null && !contentType.contains("text/html")) { |
||||
|
throw new IOException("Not an HTML document: " + contentType); |
||||
|
} |
||||
|
|
||||
|
ResponseBody body = response.body(); |
||||
|
if (body == null) { |
||||
|
throw new IOException("Response body is null"); |
||||
|
} |
||||
|
|
||||
|
byte[] bytes = body.bytes(); |
||||
|
String bodyString = new String(bytes, StandardCharsets.UTF_8); |
||||
|
logger.debug("Fetched HTML content length: {} bytes, {} characters", bytes.length, bodyString.length()); |
||||
|
|
||||
|
if (bodyString.length() < 100) { |
||||
|
logger.warn("HTML content is very short"); |
||||
|
} |
||||
|
|
||||
|
Document doc = Jsoup.parse(bodyString, url); |
||||
|
logger.debug("Parsed document title: '{}'", doc.title()); |
||||
|
logger.debug("Number of elements in body: {}", doc.body() != null ? doc.body().getAllElements().size() : 0); |
||||
|
|
||||
|
return doc; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
public static List<String> extractLinks(Document doc, String baseUrl) { |
||||
|
List<String> links = new ArrayList<>(); |
||||
|
Set<String> seen = new HashSet<>(); |
||||
|
|
||||
|
Elements anchorTags = doc.select("a"); |
||||
|
logger.debug("Found {} anchor tags", anchorTags.size()); |
||||
|
|
||||
|
for (Element anchor : anchorTags) { |
||||
|
String href = anchor.attr("abs:href"); |
||||
|
|
||||
|
if (href == null || href.isEmpty()) { |
||||
|
href = anchor.attr("href"); |
||||
|
if (href != null && !href.isEmpty() && !href.startsWith("http")) { |
||||
|
href = resolveUrl(baseUrl, href); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
if (href != null && isValidUrl(href) && !seen.contains(href)) { |
||||
|
seen.add(href); |
||||
|
links.add(href); |
||||
|
logger.trace("Added link: {}", href); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
logger.debug("Extracted {} valid links from page", links.size()); |
||||
|
return links; |
||||
|
} |
||||
|
|
||||
|
private static String resolveUrl(String baseUrl, String relativeUrl) { |
||||
|
try { |
||||
|
java.net.URL base = new java.net.URL(baseUrl); |
||||
|
return new java.net.URL(base, relativeUrl).toString(); |
||||
|
} catch (java.net.MalformedURLException e) { |
||||
|
logger.warn("Failed to resolve URL: {} + {}", baseUrl, relativeUrl); |
||||
|
return relativeUrl; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
private static boolean isValidUrl(String url) { |
||||
|
if (url == null || url.isEmpty()) { |
||||
|
return false; |
||||
|
} |
||||
|
|
||||
|
if (!URL_PATTERN.matcher(url).find()) { |
||||
|
return false; |
||||
|
} |
||||
|
|
||||
|
if (url.contains("#")) { |
||||
|
url = url.substring(0, url.indexOf("#")); |
||||
|
} |
||||
|
|
||||
|
String lowerUrl = url.toLowerCase(); |
||||
|
|
||||
|
// 排除常见的非HTML文件类型
|
||||
|
if (lowerUrl.endsWith(".pdf") || lowerUrl.endsWith(".doc") || |
||||
|
lowerUrl.endsWith(".docx") || lowerUrl.endsWith(".xls") || |
||||
|
lowerUrl.endsWith(".xlsx") || lowerUrl.endsWith(".zip") || |
||||
|
lowerUrl.endsWith(".rar") || lowerUrl.endsWith(".exe") || |
||||
|
lowerUrl.endsWith(".jpg") || lowerUrl.endsWith(".jpeg") || |
||||
|
lowerUrl.endsWith(".png") || lowerUrl.endsWith(".gif") || |
||||
|
lowerUrl.endsWith(".svg") || lowerUrl.endsWith(".css") || |
||||
|
lowerUrl.endsWith(".js") || lowerUrl.endsWith(".json") || |
||||
|
lowerUrl.endsWith(".xml") || lowerUrl.endsWith(".txt")) { |
||||
|
return false; |
||||
|
} |
||||
|
|
||||
|
// 允许所有其他类型的URL(包括无扩展名的URL)
|
||||
|
return true; |
||||
|
} |
||||
|
|
||||
|
private static String getExtension(String url) { |
||||
|
int lastDot = url.lastIndexOf('.'); |
||||
|
int lastSlash = url.lastIndexOf('/'); |
||||
|
if (lastDot > lastSlash) { |
||||
|
int queryIndex = url.indexOf('?'); |
||||
|
if (queryIndex > lastDot) { |
||||
|
return url.substring(lastDot, queryIndex); |
||||
|
} |
||||
|
return url.substring(lastDot); |
||||
|
} |
||||
|
return ""; |
||||
|
} |
||||
|
|
||||
|
public static String extractTitle(Document doc) { |
||||
|
// 尝试获取title标签
|
||||
|
Element titleElement = doc.selectFirst("title"); |
||||
|
if (titleElement != null && !titleElement.text().trim().isEmpty()) { |
||||
|
return titleElement.text().trim(); |
||||
|
} |
||||
|
|
||||
|
// 尝试获取h1标签
|
||||
|
Element h1Element = doc.selectFirst("h1"); |
||||
|
if (h1Element != null && !h1Element.text().trim().isEmpty()) { |
||||
|
return h1Element.text().trim(); |
||||
|
} |
||||
|
|
||||
|
// 尝试获取文章标题类
|
||||
|
Elements titleClasses = doc.select(".title, .post-title, .article-title, .entry-title, [class*=title]"); |
||||
|
if (!titleClasses.isEmpty() && !titleClasses.first().text().trim().isEmpty()) { |
||||
|
return titleClasses.first().text().trim(); |
||||
|
} |
||||
|
|
||||
|
// 尝试获取meta title
|
||||
|
Element metaTitle = doc.selectFirst("meta[property=og:title], meta[name=title]"); |
||||
|
if (metaTitle != null && !metaTitle.attr("content").trim().isEmpty()) { |
||||
|
return metaTitle.attr("content").trim(); |
||||
|
} |
||||
|
|
||||
|
return "No Title"; |
||||
|
} |
||||
|
|
||||
|
public static String extractMetaDescription(Document doc) { |
||||
|
Element metaDesc = doc.selectFirst("meta[name=description]"); |
||||
|
if (metaDesc != null) { |
||||
|
return metaDesc.attr("content").trim(); |
||||
|
} |
||||
|
return ""; |
||||
|
} |
||||
|
|
||||
|
public static String extractContent(Document doc) { |
||||
|
Elements contentSelectors = doc.select("article, .article, .content, .post-content, .entry-content, main"); |
||||
|
|
||||
|
if (!contentSelectors.isEmpty()) { |
||||
|
return contentSelectors.first().text().trim(); |
||||
|
} |
||||
|
|
||||
|
Element body = doc.body(); |
||||
|
if (body != null) { |
||||
|
return body.text().trim().substring(0, Math.min(body.text().length(), 500)) + "..."; |
||||
|
} |
||||
|
|
||||
|
return ""; |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,118 @@ |
|||||
|
package com.example.datacollect.test; |
||||
|
|
||||
|
import com.example.datacollect.command.CommandContext; |
||||
|
import com.example.datacollect.command.CrawlCommand; |
||||
|
import com.example.datacollect.command.ListCommand; |
||||
|
import com.example.datacollect.command.HelpCommand; |
||||
|
import com.example.datacollect.command.HistoryCommand; |
||||
|
import com.example.datacollect.model.Article; |
||||
|
import com.example.datacollect.repository.ArticleRepository; |
||||
|
import com.example.datacollect.strategy.CrawlStrategy; |
||||
|
import com.example.datacollect.strategy.StrategyFactory; |
||||
|
import com.example.datacollect.view.ConsoleView; |
||||
|
|
||||
|
import java.util.List; |
||||
|
|
||||
|
public class CrawlerDemo { |
||||
|
|
||||
|
public static void main(String[] args) { |
||||
|
ConsoleView view = new ConsoleView(); |
||||
|
ArticleRepository repository = new ArticleRepository(); |
||||
|
CommandContext context = new CommandContext(repository); |
||||
|
|
||||
|
System.out.println("========================================"); |
||||
|
System.out.println(" 爬虫功能演示"); |
||||
|
System.out.println("========================================\n"); |
||||
|
|
||||
|
demo1_StrategyFactory(view); |
||||
|
demo2_BlogCrawl(view, repository, context); |
||||
|
demo3_NewsCrawl(view, repository, context); |
||||
|
demo4_ListArticles(view, repository, context); |
||||
|
demo5_CommandHistory(view, context); |
||||
|
demo6_RepositoryFeatures(repository); |
||||
|
|
||||
|
System.out.println("\n========================================"); |
||||
|
System.out.println(" 演示完成!"); |
||||
|
System.out.println("========================================"); |
||||
|
} |
||||
|
|
||||
|
private static void demo1_StrategyFactory(ConsoleView view) { |
||||
|
System.out.println("【演示1】策略工厂功能"); |
||||
|
System.out.println("-".repeat(40)); |
||||
|
|
||||
|
System.out.println("支持的策略类型: " + StrategyFactory.getSupportedTypes()); |
||||
|
|
||||
|
CrawlStrategy blogStrategy = StrategyFactory.getStrategy("blog"); |
||||
|
System.out.println("Blog策略类型: " + blogStrategy.getType()); |
||||
|
|
||||
|
CrawlStrategy newsStrategy = StrategyFactory.getStrategy("news"); |
||||
|
System.out.println("News策略类型: " + newsStrategy.getType()); |
||||
|
|
||||
|
try { |
||||
|
StrategyFactory.getStrategy("unknown"); |
||||
|
} catch (IllegalArgumentException e) { |
||||
|
System.out.println("未知策略测试: " + e.getMessage()); |
||||
|
} |
||||
|
System.out.println(); |
||||
|
} |
||||
|
|
||||
|
private static void demo2_BlogCrawl(ConsoleView view, ArticleRepository repository, CommandContext context) { |
||||
|
System.out.println("【演示2】博客爬取"); |
||||
|
System.out.println("-".repeat(40)); |
||||
|
|
||||
|
CrawlCommand crawlCommand = new CrawlCommand(view); |
||||
|
crawlCommand.execute(new String[]{"crawl", "blog", "http://example.com"}, context); |
||||
|
|
||||
|
System.out.println("仓库中文章数量: " + repository.count()); |
||||
|
System.out.println(); |
||||
|
} |
||||
|
|
||||
|
private static void demo3_NewsCrawl(ConsoleView view, ArticleRepository repository, CommandContext context) { |
||||
|
System.out.println("【演示3】新闻爬取"); |
||||
|
System.out.println("-".repeat(40)); |
||||
|
|
||||
|
CrawlCommand crawlCommand = new CrawlCommand(view); |
||||
|
crawlCommand.execute(new String[]{"crawl", "news", "http://news.com"}, context); |
||||
|
|
||||
|
System.out.println("仓库中文章总数: " + repository.count()); |
||||
|
System.out.println(); |
||||
|
} |
||||
|
|
||||
|
private static void demo4_ListArticles(ConsoleView view, ArticleRepository repository, CommandContext context) { |
||||
|
System.out.println("【演示4】列出所有文章"); |
||||
|
System.out.println("-".repeat(40)); |
||||
|
|
||||
|
ListCommand listCommand = new ListCommand(view); |
||||
|
listCommand.execute(new String[]{"list"}, context); |
||||
|
System.out.println(); |
||||
|
} |
||||
|
|
||||
|
private static void demo5_CommandHistory(ConsoleView view, CommandContext context) { |
||||
|
System.out.println("【演示5】命令历史"); |
||||
|
System.out.println("-".repeat(40)); |
||||
|
|
||||
|
HistoryCommand historyCommand = new HistoryCommand(view); |
||||
|
historyCommand.execute(new String[]{"history"}, context); |
||||
|
System.out.println(); |
||||
|
} |
||||
|
|
||||
|
private static void demo6_RepositoryFeatures(ArticleRepository repository) { |
||||
|
System.out.println("【演示6】仓库功能"); |
||||
|
System.out.println("-".repeat(40)); |
||||
|
|
||||
|
System.out.println("文章总数: " + repository.count()); |
||||
|
|
||||
|
Article firstArticle = repository.findAll().get(0); |
||||
|
System.out.println("第一篇文章标题: " + firstArticle.getTitle()); |
||||
|
|
||||
|
String url = firstArticle.getUrl(); |
||||
|
repository.findByUrl(url).ifPresent(article -> |
||||
|
System.out.println("按URL查找成功: " + article.getTitle()) |
||||
|
); |
||||
|
|
||||
|
System.out.println("清空前文章数: " + repository.count()); |
||||
|
repository.clear(); |
||||
|
System.out.println("清空后文章数: " + repository.count()); |
||||
|
System.out.println(); |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,83 @@ |
|||||
|
package com.example.datacollect.test; |
||||
|
|
||||
|
import com.example.datacollect.command.CommandContext; |
||||
|
import com.example.datacollect.command.CrawlCommand; |
||||
|
import com.example.datacollect.command.ListCommand; |
||||
|
import com.example.datacollect.model.Article; |
||||
|
import com.example.datacollect.repository.ArticleRepository; |
||||
|
import com.example.datacollect.strategy.CrawlStrategy; |
||||
|
import com.example.datacollect.strategy.StrategyFactory; |
||||
|
import com.example.datacollect.view.ConsoleView; |
||||
|
|
||||
|
import java.util.List; |
||||
|
|
||||
|
public class CrawlerTest { |
||||
|
|
||||
|
public static void main(String[] args) { |
||||
|
System.out.println("=== Crawler Test Suite ==="); |
||||
|
|
||||
|
testStrategyFactory(); |
||||
|
testArticleRepositoryImmutable(); |
||||
|
testCrawlToListFlow(); |
||||
|
|
||||
|
System.out.println("\n=== All tests passed! ==="); |
||||
|
} |
||||
|
|
||||
|
private static void testStrategyFactory() { |
||||
|
System.out.println("\n1. Testing StrategyFactory SPI loading..."); |
||||
|
|
||||
|
assert StrategyFactory.hasStrategy("blog") : "blog strategy should be registered"; |
||||
|
assert StrategyFactory.hasStrategy("news") : "news strategy should be registered"; |
||||
|
assert !StrategyFactory.hasStrategy("unknown") : "unknown strategy should not be registered"; |
||||
|
|
||||
|
CrawlStrategy blogStrategy = StrategyFactory.getStrategy("blog"); |
||||
|
assert "blog".equals(blogStrategy.getType()) : "blog strategy type mismatch"; |
||||
|
|
||||
|
CrawlStrategy newsStrategy = StrategyFactory.getStrategy("news"); |
||||
|
assert "news".equals(newsStrategy.getType()) : "news strategy type mismatch"; |
||||
|
|
||||
|
System.out.println(" ✓ StrategyFactory loads strategies via SPI"); |
||||
|
System.out.println(" ✓ Supported types: " + StrategyFactory.getSupportedTypes()); |
||||
|
} |
||||
|
|
||||
|
private static void testArticleRepositoryImmutable() { |
||||
|
System.out.println("\n2. Testing ArticleRepository immutability..."); |
||||
|
|
||||
|
ArticleRepository repository = new ArticleRepository(); |
||||
|
repository.add(new Article("Test", "http://test.com", "content")); |
||||
|
|
||||
|
List<Article> articles = repository.findAll(); |
||||
|
|
||||
|
try { |
||||
|
articles.add(new Article("Should Not Add", "http://fail.com", "fail")); |
||||
|
assert false : "Should have thrown UnsupportedOperationException"; |
||||
|
} catch (UnsupportedOperationException e) { |
||||
|
System.out.println(" ✓ getAll() returns immutable view"); |
||||
|
} |
||||
|
|
||||
|
assert repository.count() == 1 : "Repository should have 1 article"; |
||||
|
System.out.println(" ✓ Repository count is correct"); |
||||
|
} |
||||
|
|
||||
|
private static void testCrawlToListFlow() { |
||||
|
System.out.println("\n3. Testing crawl → list flow..."); |
||||
|
|
||||
|
ConsoleView view = new ConsoleView(); |
||||
|
ArticleRepository repository = new ArticleRepository(); |
||||
|
CommandContext context = new CommandContext(repository); |
||||
|
|
||||
|
CrawlCommand crawlCommand = new CrawlCommand(view); |
||||
|
ListCommand listCommand = new ListCommand(view); |
||||
|
|
||||
|
crawlCommand.execute(new String[]{"crawl", "blog", "http://example.com"}, context); |
||||
|
assert repository.count() == 2 : "Should have 2 blog articles"; |
||||
|
System.out.println(" ✓ Crawl blog strategy: " + repository.count() + " articles"); |
||||
|
|
||||
|
crawlCommand.execute(new String[]{"crawl", "news", "http://news.com"}, context); |
||||
|
assert repository.count() == 5 : "Should have 5 articles total (2 blog + 3 news)"; |
||||
|
System.out.println(" ✓ Crawl news strategy: " + repository.count() + " articles total"); |
||||
|
|
||||
|
listCommand.execute(new String[]{"list"}, context); |
||||
|
System.out.println(" ✓ List command executed successfully"); |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,53 @@ |
|||||
|
package com.example.datacollect.test; |
||||
|
|
||||
|
import com.example.datacollect.command.CommandContext; |
||||
|
import com.example.datacollect.command.CrawlCommand; |
||||
|
import com.example.datacollect.command.ListCommand; |
||||
|
import com.example.datacollect.repository.ArticleRepository; |
||||
|
import com.example.datacollect.view.ConsoleView; |
||||
|
|
||||
|
public class RealCrawlTest { |
||||
|
|
||||
|
public static void main(String[] args) { |
||||
|
ConsoleView view = new ConsoleView(); |
||||
|
ArticleRepository repository = new ArticleRepository(); |
||||
|
CommandContext context = new CommandContext(repository); |
||||
|
|
||||
|
System.out.println("========================================"); |
||||
|
System.out.println(" 测试真实网页爬取功能"); |
||||
|
System.out.println("========================================"); |
||||
|
System.out.println(); |
||||
|
|
||||
|
CrawlCommand crawlCommand = new CrawlCommand(view); |
||||
|
|
||||
|
// 测试真实爬取(使用真实博客网站)
|
||||
|
System.out.println("【测试1】爬取 Baeldung 博客"); |
||||
|
System.out.println("-".repeat(40)); |
||||
|
try { |
||||
|
crawlCommand.execute(new String[]{"crawl", "real", "https://www.baeldung.com/"}, context); |
||||
|
} catch (Exception e) { |
||||
|
System.out.println("爬取失败: " + e.getMessage()); |
||||
|
} |
||||
|
|
||||
|
System.out.println(); |
||||
|
System.out.println("【测试2】爬取今日头条"); |
||||
|
System.out.println("-".repeat(40)); |
||||
|
try { |
||||
|
crawlCommand.execute(new String[]{"crawl", "real", "https://www.toutiao.com/"}, context); |
||||
|
} catch (Exception e) { |
||||
|
System.out.println("爬取失败: " + e.getMessage()); |
||||
|
} |
||||
|
|
||||
|
System.out.println(); |
||||
|
System.out.println("【爬取结果】"); |
||||
|
System.out.println("-".repeat(40)); |
||||
|
|
||||
|
ListCommand listCommand = new ListCommand(view); |
||||
|
listCommand.execute(new String[]{"list"}, context); |
||||
|
|
||||
|
System.out.println(); |
||||
|
System.out.println("========================================"); |
||||
|
System.out.println(" 测试完成!"); |
||||
|
System.out.println("========================================"); |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,37 @@ |
|||||
|
package com.example.datacollect.test; |
||||
|
|
||||
|
import com.example.datacollect.command.CommandContext; |
||||
|
import com.example.datacollect.command.CrawlCommand; |
||||
|
import com.example.datacollect.command.ListCommand; |
||||
|
import com.example.datacollect.repository.ArticleRepository; |
||||
|
import com.example.datacollect.view.ConsoleView; |
||||
|
|
||||
|
public class ToutiaoTest { |
||||
|
|
||||
|
public static void main(String[] args) { |
||||
|
ConsoleView view = new ConsoleView(); |
||||
|
ArticleRepository repository = new ArticleRepository(); |
||||
|
CommandContext context = new CommandContext(repository); |
||||
|
|
||||
|
System.out.println("========================================"); |
||||
|
System.out.println(" 测试今日头条爬取"); |
||||
|
System.out.println("========================================"); |
||||
|
System.out.println(); |
||||
|
|
||||
|
// 测试爬取今日头条
|
||||
|
CrawlCommand crawlCommand = new CrawlCommand(view); |
||||
|
crawlCommand.execute(new String[]{"crawl", "news", "https://www.toutiao.com/"}, context); |
||||
|
|
||||
|
System.out.println(); |
||||
|
System.out.println("【爬取结果】"); |
||||
|
System.out.println("-".repeat(40)); |
||||
|
|
||||
|
ListCommand listCommand = new ListCommand(view); |
||||
|
listCommand.execute(new String[]{"list"}, context); |
||||
|
|
||||
|
System.out.println(); |
||||
|
System.out.println("========================================"); |
||||
|
System.out.println(" 测试完成!"); |
||||
|
System.out.println("========================================"); |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,42 @@ |
|||||
|
package com.example.datacollect.view; |
||||
|
|
||||
|
import com.example.datacollect.model.Article; |
||||
|
import java.util.List; |
||||
|
import java.util.Scanner; |
||||
|
|
||||
|
public class ConsoleView { |
||||
|
private static final String ANSI_RESET = "\u001B[0m"; |
||||
|
private static final String ANSI_GREEN = "\u001B[32m"; |
||||
|
private static final String ANSI_RED = "\u001B[31m"; |
||||
|
private static final String ANSI_BLUE = "\u001B[34m"; |
||||
|
|
||||
|
private final Scanner scanner = new Scanner(System.in); |
||||
|
|
||||
|
public String readLine() { |
||||
|
System.out.print("> "); |
||||
|
return scanner.nextLine(); |
||||
|
} |
||||
|
|
||||
|
public void printSuccess(String msg) { |
||||
|
System.out.println(ANSI_GREEN + msg + ANSI_RESET); |
||||
|
} |
||||
|
|
||||
|
public void printError(String msg) { |
||||
|
System.out.println(ANSI_RED + msg + ANSI_RESET); |
||||
|
} |
||||
|
|
||||
|
public void printInfo(String msg) { |
||||
|
System.out.println(ANSI_BLUE + msg + ANSI_RESET); |
||||
|
} |
||||
|
|
||||
|
public void display(List<Article> articles) { |
||||
|
if (articles.isEmpty()) { |
||||
|
printInfo("暂无文章,请先执行 crawl。"); |
||||
|
return; |
||||
|
} |
||||
|
for (int i = 0; i < articles.size(); i++) { |
||||
|
Article a = articles.get(i); |
||||
|
System.out.println((i + 1) + ". " + a.getTitle() + " | " + a.getUrl()); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,3 @@ |
|||||
|
com.example.datacollect.strategy.BlogStrategy |
||||
|
com.example.datacollect.strategy.NewsStrategy |
||||
|
com.example.datacollect.strategy.RealCrawlStrategy |
||||
@ -0,0 +1,45 @@ |
|||||
|
<configuration> |
||||
|
<!-- 控制台输出 --> |
||||
|
<appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender"> |
||||
|
<encoder> |
||||
|
<pattern>%d{yyyy-MM-dd HH:mm:ss} [%thread] %-5level %logger{36} - %msg%n</pattern> |
||||
|
</encoder> |
||||
|
</appender> |
||||
|
|
||||
|
<!-- 文件输出 --> |
||||
|
<appender name="FILE" class="ch.qos.logback.core.FileAppender"> |
||||
|
<file>logs/application.log</file> |
||||
|
<encoder> |
||||
|
<pattern>%d{yyyy-MM-dd HH:mm:ss} [%thread] %-5level %logger{36} - %msg%n</pattern> |
||||
|
</encoder> |
||||
|
</appender> |
||||
|
|
||||
|
<!-- 滚动文件输出(按大小和时间分割) --> |
||||
|
<appender name="ROLLING_FILE" class="ch.qos.logback.core.rolling.RollingFileAppender"> |
||||
|
<file>logs/application.log</file> |
||||
|
<rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy"> |
||||
|
<fileNamePattern>logs/application.%d{yyyy-MM-dd}.%i.log</fileNamePattern> |
||||
|
<maxHistory>30</maxHistory> |
||||
|
<timeBasedFileNamingAndTriggeringPolicy class="ch.qos.logback.core.rolling.SizeAndTimeBasedFNATP"> |
||||
|
<maxFileSize>10MB</maxFileSize> |
||||
|
</timeBasedFileNamingAndTriggeringPolicy> |
||||
|
</rollingPolicy> |
||||
|
<encoder> |
||||
|
<pattern>%d{yyyy-MM-dd HH:mm:ss} [%thread] %-5level %logger{36} - %msg%n</pattern> |
||||
|
</encoder> |
||||
|
</appender> |
||||
|
|
||||
|
<!-- 根日志级别 --> |
||||
|
<root level="INFO"> |
||||
|
<appender-ref ref="STDOUT" /> |
||||
|
<appender-ref ref="ROLLING_FILE" /> |
||||
|
</root> |
||||
|
|
||||
|
<!-- 特定包/类的日志级别 --> |
||||
|
<logger name="com.example.datacollect" level="DEBUG" /> |
||||
|
<logger name="com.example.datacollect.command.CrawlCommand" level="TRACE" /> |
||||
|
<logger name="com.example.datacollect.strategy.WebCrawler" level="DEBUG" /> |
||||
|
<logger name="com.example.datacollect.strategy.RealCrawlStrategy" level="DEBUG" /> |
||||
|
<logger name="com.example.datacollect.repository.ArticleRepository" level="DEBUG" /> |
||||
|
<logger name="com.example.datacollect.exception" level="DEBUG" /> |
||||
|
</configuration> |
||||
@ -0,0 +1,3 @@ |
|||||
|
com.example.datacollect.strategy.BlogStrategy |
||||
|
com.example.datacollect.strategy.NewsStrategy |
||||
|
com.example.datacollect.strategy.RealCrawlStrategy |
||||
@ -0,0 +1,45 @@ |
|||||
|
<configuration> |
||||
|
<!-- 控制台输出 --> |
||||
|
<appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender"> |
||||
|
<encoder> |
||||
|
<pattern>%d{yyyy-MM-dd HH:mm:ss} [%thread] %-5level %logger{36} - %msg%n</pattern> |
||||
|
</encoder> |
||||
|
</appender> |
||||
|
|
||||
|
<!-- 文件输出 --> |
||||
|
<appender name="FILE" class="ch.qos.logback.core.FileAppender"> |
||||
|
<file>logs/application.log</file> |
||||
|
<encoder> |
||||
|
<pattern>%d{yyyy-MM-dd HH:mm:ss} [%thread] %-5level %logger{36} - %msg%n</pattern> |
||||
|
</encoder> |
||||
|
</appender> |
||||
|
|
||||
|
<!-- 滚动文件输出(按大小和时间分割) --> |
||||
|
<appender name="ROLLING_FILE" class="ch.qos.logback.core.rolling.RollingFileAppender"> |
||||
|
<file>logs/application.log</file> |
||||
|
<rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy"> |
||||
|
<fileNamePattern>logs/application.%d{yyyy-MM-dd}.%i.log</fileNamePattern> |
||||
|
<maxHistory>30</maxHistory> |
||||
|
<timeBasedFileNamingAndTriggeringPolicy class="ch.qos.logback.core.rolling.SizeAndTimeBasedFNATP"> |
||||
|
<maxFileSize>10MB</maxFileSize> |
||||
|
</timeBasedFileNamingAndTriggeringPolicy> |
||||
|
</rollingPolicy> |
||||
|
<encoder> |
||||
|
<pattern>%d{yyyy-MM-dd HH:mm:ss} [%thread] %-5level %logger{36} - %msg%n</pattern> |
||||
|
</encoder> |
||||
|
</appender> |
||||
|
|
||||
|
<!-- 根日志级别 --> |
||||
|
<root level="INFO"> |
||||
|
<appender-ref ref="STDOUT" /> |
||||
|
<appender-ref ref="ROLLING_FILE" /> |
||||
|
</root> |
||||
|
|
||||
|
<!-- 特定包/类的日志级别 --> |
||||
|
<logger name="com.example.datacollect" level="DEBUG" /> |
||||
|
<logger name="com.example.datacollect.command.CrawlCommand" level="TRACE" /> |
||||
|
<logger name="com.example.datacollect.strategy.WebCrawler" level="DEBUG" /> |
||||
|
<logger name="com.example.datacollect.strategy.RealCrawlStrategy" level="DEBUG" /> |
||||
|
<logger name="com.example.datacollect.repository.ArticleRepository" level="DEBUG" /> |
||||
|
<logger name="com.example.datacollect.exception" level="DEBUG" /> |
||||
|
</configuration> |
||||
@ -0,0 +1,3 @@ |
|||||
|
artifactId=datacollect-cli |
||||
|
groupId=com.example |
||||
|
version=0.1.0 |
||||
@ -0,0 +1,20 @@ |
|||||
|
com\example\datacollect\command\ListCommand.class |
||||
|
com\example\datacollect\command\CrawlCommand.class |
||||
|
com\example\datacollect\view\ConsoleView.class |
||||
|
com\example\datacollect\test\ToutiaoTest.class |
||||
|
com\example\datacollect\strategy\NewsStrategy.class |
||||
|
com\example\datacollect\command\CommandContext.class |
||||
|
com\example\datacollect\command\Command.class |
||||
|
com\example\datacollect\test\CrawlerTest.class |
||||
|
com\example\datacollect\test\CrawlerDemo.class |
||||
|
com\example\datacollect\strategy\CrawlStrategy.class |
||||
|
com\example\datacollect\model\Article.class |
||||
|
com\example\datacollect\strategy\WebCrawler.class |
||||
|
com\example\datacollect\strategy\BlogStrategy.class |
||||
|
com\example\datacollect\repository\ArticleRepository.class |
||||
|
com\example\datacollect\Main.class |
||||
|
com\example\datacollect\command\ExitCommand.class |
||||
|
com\example\datacollect\command\HelpCommand.class |
||||
|
com\example\datacollect\command\HistoryCommand.class |
||||
|
com\example\datacollect\controller\CrawlerController.class |
||||
|
com\example\datacollect\strategy\StrategyFactory.class |
||||
@ -0,0 +1,25 @@ |
|||||
|
C:\Users\ruiruirui\java\w11\src\main\java\com\example\datacollect\strategy\WebCrawler.java |
||||
|
C:\Users\ruiruirui\java\w11\src\main\java\com\example\datacollect\exception\ParseException.java |
||||
|
C:\Users\ruiruirui\java\w11\src\main\java\com\example\datacollect\command\CrawlCommand.java |
||||
|
C:\Users\ruiruirui\java\w11\src\main\java\com\example\datacollect\test\RealCrawlTest.java |
||||
|
C:\Users\ruiruirui\java\w11\src\main\java\com\example\datacollect\command\ExitCommand.java |
||||
|
C:\Users\ruiruirui\java\w11\src\main\java\com\example\datacollect\Main.java |
||||
|
C:\Users\ruiruirui\java\w11\src\main\java\com\example\datacollect\strategy\CrawlStrategy.java |
||||
|
C:\Users\ruiruirui\java\w11\src\main\java\com\example\datacollect\strategy\BlogStrategy.java |
||||
|
C:\Users\ruiruirui\java\w11\src\main\java\com\example\datacollect\test\CrawlerDemo.java |
||||
|
C:\Users\ruiruirui\java\w11\src\main\java\com\example\datacollect\command\CommandContext.java |
||||
|
C:\Users\ruiruirui\java\w11\src\main\java\com\example\datacollect\model\Article.java |
||||
|
C:\Users\ruiruirui\java\w11\src\main\java\com\example\datacollect\strategy\NewsStrategy.java |
||||
|
C:\Users\ruiruirui\java\w11\src\main\java\com\example\datacollect\exception\CrawlerException.java |
||||
|
C:\Users\ruiruirui\java\w11\src\main\java\com\example\datacollect\strategy\StrategyFactory.java |
||||
|
C:\Users\ruiruirui\java\w11\src\main\java\com\example\datacollect\repository\ArticleRepository.java |
||||
|
C:\Users\ruiruirui\java\w11\src\main\java\com\example\datacollect\test\ToutiaoTest.java |
||||
|
C:\Users\ruiruirui\java\w11\src\main\java\com\example\datacollect\command\HistoryCommand.java |
||||
|
C:\Users\ruiruirui\java\w11\src\main\java\com\example\datacollect\exception\NetworkException.java |
||||
|
C:\Users\ruiruirui\java\w11\src\main\java\com\example\datacollect\command\Command.java |
||||
|
C:\Users\ruiruirui\java\w11\src\main\java\com\example\datacollect\controller\CrawlerController.java |
||||
|
C:\Users\ruiruirui\java\w11\src\main\java\com\example\datacollect\command\HelpCommand.java |
||||
|
C:\Users\ruiruirui\java\w11\src\main\java\com\example\datacollect\view\ConsoleView.java |
||||
|
C:\Users\ruiruirui\java\w11\src\main\java\com\example\datacollect\test\CrawlerTest.java |
||||
|
C:\Users\ruiruirui\java\w11\src\main\java\com\example\datacollect\command\ListCommand.java |
||||
|
C:\Users\ruiruirui\java\w11\src\main\java\com\example\datacollect\strategy\RealCrawlStrategy.java |
||||
@ -0,0 +1,26 @@ |
|||||
|
@echo off |
||||
|
echo ==================================== |
||||
|
echo 爬虫功能测试脚本 |
||||
|
echo ==================================== |
||||
|
echo. |
||||
|
|
||||
|
echo [测试1] 运行单元测试... |
||||
|
java -cp target/datacollect-cli-0.1.0-jar-with-dependencies.jar com.example.datacollect.test.CrawlerTest |
||||
|
if %errorlevel% neq 0 ( |
||||
|
echo 单元测试失败! |
||||
|
exit /b 1 |
||||
|
) |
||||
|
echo. |
||||
|
|
||||
|
echo [测试2] 测试策略工厂... |
||||
|
echo 支持的策略类型: |
||||
|
java -cp target/datacollect-cli-0.1.0-jar-with-dependencies.jar -c "import com.example.datacollect.strategy.StrategyFactory; System.out.println(StrategyFactory.getSupportedTypes());" 2>nul || echo (需要JShell支持) |
||||
|
echo. |
||||
|
|
||||
|
echo [测试3] 测试Blog策略爬取... |
||||
|
java -cp target/datacollect-cli-0.1.0-jar-with-dependencies.jar com.example.datacollect.test.CrawlerTest |
||||
|
echo. |
||||
|
|
||||
|
echo ==================================== |
||||
|
echo 测试完成! |
||||
|
echo ==================================== |
||||
@ -0,0 +1,29 @@ |
|||||
|
{ |
||||
|
// 使用 IntelliSense 了解相关属性。 |
||||
|
// 悬停以查看现有属性的描述。 |
||||
|
// 欲了解更多信息,请访问: https://go.microsoft.com/fwlink/?linkid=830387 |
||||
|
"version": "0.2.0", |
||||
|
"configurations": [ |
||||
|
|
||||
|
{ |
||||
|
"type": "java", |
||||
|
"name": "Current File", |
||||
|
"request": "launch", |
||||
|
"mainClass": "${file}" |
||||
|
}, |
||||
|
{ |
||||
|
"type": "java", |
||||
|
"name": "WeiboStarHotSearcha", |
||||
|
"request": "launch", |
||||
|
"mainClass": "WeiboStarHotSearcha", |
||||
|
"projectName": "weibo-hotsearch" |
||||
|
}, |
||||
|
{ |
||||
|
"type": "java", |
||||
|
"name": "Main", |
||||
|
"request": "launch", |
||||
|
"mainClass": "com.weibo.hotsearch.Main", |
||||
|
"projectName": "weibo-hotsearch" |
||||
|
} |
||||
|
] |
||||
|
} |
||||
@ -0,0 +1,3 @@ |
|||||
|
{ |
||||
|
"java.configuration.updateBuildConfiguration": "interactive" |
||||
|
} |
||||
Binary file not shown.
@ -0,0 +1,56 @@ |
|||||
|
热搜数据采集报告 |
||||
|
================ |
||||
|
采集时间: 2026年05月28日 14:06:04 |
||||
|
|
||||
|
【微博热搜数据】 |
||||
|
|
||||
|
--- 明星相关热搜 --- |
||||
|
排名:6 热度:414478 热搜:VOGUE直播间两位明星不出镜 |
||||
|
排名:32 热度:373370 热搜:歌手第二期歌单 |
||||
|
排名:34 热度:370959 热搜:小红书官宣获得世界杯版权 |
||||
|
|
||||
|
明星相关热搜总数:3 条 |
||||
|
|
||||
|
--- 体育相关热搜 --- |
||||
|
排名:34 热度:370959 热搜:小红书官宣获得世界杯版权 |
||||
|
|
||||
|
体育相关热搜总数:1 条 |
||||
|
|
||||
|
--- 国家政策相关热搜 --- |
||||
|
排名:37 热度:364897 热搜:双汇发布致歉声明 |
||||
|
|
||||
|
国家政策相关热搜总数:1 条 |
||||
|
|
||||
|
【百度贴吧热搜数据】 |
||||
|
|
||||
|
--- 明星相关热搜 --- |
||||
|
|
||||
|
明星相关热搜总数:0 条 |
||||
|
当前贴吧热搜暂无明星相关内容 |
||||
|
|
||||
|
--- 体育相关热搜 --- |
||||
|
|
||||
|
体育相关热搜总数:0 条 |
||||
|
当前贴吧热搜暂无体育相关内容 |
||||
|
|
||||
|
--- 国家政策相关热搜 --- |
||||
|
|
||||
|
国家政策相关热搜总数:0 条 |
||||
|
当前贴吧热搜暂无国家政策相关内容 |
||||
|
|
||||
|
【知乎热搜数据】 |
||||
|
|
||||
|
--- 明星相关热搜 --- |
||||
|
排名:6 热度:0 热搜:动作演员吴樾为什么火不起来 |
||||
|
|
||||
|
明星相关热搜总数:1 条 |
||||
|
|
||||
|
--- 体育相关热搜 --- |
||||
|
|
||||
|
体育相关热搜总数:0 条 |
||||
|
当前知乎热搜暂无体育相关内容 |
||||
|
|
||||
|
--- 国家政策相关热搜 --- |
||||
|
|
||||
|
国家政策相关热搜总数:0 条 |
||||
|
当前知乎热搜暂无国家政策相关内容 |
||||
@ -0,0 +1,60 @@ |
|||||
|
热搜数据采集报告 |
||||
|
================ |
||||
|
采集时间: 2026年05月31日 12:17:24 |
||||
|
|
||||
|
【微博热搜数据】 |
||||
|
|
||||
|
--- 明星相关热搜 --- |
||||
|
排名:43 热度:186709 热搜:敖瑞鹏新剧演鞠婧祎哥哥 |
||||
|
|
||||
|
明星相关热搜总数:1 条 |
||||
|
|
||||
|
--- 体育相关热搜 --- |
||||
|
排名:10 热度:387068 热搜:陈星旭王玉雯一起去看欧冠了 |
||||
|
排名:21 热度:261492 热搜:姆巴佩欧冠金靴 |
||||
|
排名:23 热度:250168 热搜:文班亚马西决MVP |
||||
|
排名:37 热度:191114 热搜:文班亚马回应成为西部决赛MVP |
||||
|
排名:41 热度:188900 热搜:巴黎欧冠夺冠后多地爆发骚乱 |
||||
|
排名:42 热度:188188 热搜:孙千去看欧冠了 |
||||
|
排名:49 热度:172821 热搜:世界杯 |
||||
|
|
||||
|
体育相关热搜总数:7 条 |
||||
|
|
||||
|
--- 国家政策相关热搜 --- |
||||
|
|
||||
|
国家政策相关热搜总数:0 条 |
||||
|
当前微博热搜暂无国家政策相关内容 |
||||
|
|
||||
|
【百度贴吧热搜数据】 |
||||
|
|
||||
|
--- 明星相关热搜 --- |
||||
|
|
||||
|
明星相关热搜总数:0 条 |
||||
|
当前贴吧热搜暂无明星相关内容 |
||||
|
|
||||
|
--- 体育相关热搜 --- |
||||
|
|
||||
|
体育相关热搜总数:0 条 |
||||
|
当前贴吧热搜暂无体育相关内容 |
||||
|
|
||||
|
--- 国家政策相关热搜 --- |
||||
|
|
||||
|
国家政策相关热搜总数:0 条 |
||||
|
当前贴吧热搜暂无国家政策相关内容 |
||||
|
|
||||
|
【知乎热搜数据】 |
||||
|
|
||||
|
--- 明星相关热搜 --- |
||||
|
|
||||
|
明星相关热搜总数:0 条 |
||||
|
当前知乎热搜暂无明星相关内容 |
||||
|
|
||||
|
--- 体育相关热搜 --- |
||||
|
|
||||
|
体育相关热搜总数:0 条 |
||||
|
当前知乎热搜暂无体育相关内容 |
||||
|
|
||||
|
--- 国家政策相关热搜 --- |
||||
|
|
||||
|
国家政策相关热搜总数:0 条 |
||||
|
当前知乎热搜暂无国家政策相关内容 |
||||
@ -0,0 +1,4 @@ |
|||||
|
{ |
||||
|
"git.ignoreLimitWarning": true, |
||||
|
"java.debug.settings.onBuildFailureProceed": true |
||||
|
} |
||||
@ -0,0 +1,76 @@ |
|||||
|
<?xml version="1.0" encoding="UTF-8"?> |
||||
|
<project xmlns="http://maven.apache.org/POM/4.0.0" |
||||
|
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" |
||||
|
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> |
||||
|
<modelVersion>4.0.0</modelVersion> |
||||
|
|
||||
|
<groupId>com.weibo</groupId> |
||||
|
<artifactId>hotsearch</artifactId> |
||||
|
<version>1.0.0</version> |
||||
|
<packaging>jar</packaging> |
||||
|
|
||||
|
<name>HotSearch CLI Tool</name> |
||||
|
<description>热搜数据采集工具 - CLI + MVC + Command + Strategy + Exception体系</description> |
||||
|
|
||||
|
<properties> |
||||
|
<maven.compiler.source>11</maven.compiler.source> |
||||
|
<maven.compiler.target>11</maven.compiler.target> |
||||
|
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> |
||||
|
</properties> |
||||
|
|
||||
|
<dependencies> |
||||
|
<dependency> |
||||
|
<groupId>org.jsoup</groupId> |
||||
|
<artifactId>jsoup</artifactId> |
||||
|
<version>1.17.2</version> |
||||
|
</dependency> |
||||
|
|
||||
|
<dependency> |
||||
|
<groupId>org.apache.httpcomponents.client5</groupId> |
||||
|
<artifactId>httpclient5</artifactId> |
||||
|
<version>5.3.1</version> |
||||
|
</dependency> |
||||
|
|
||||
|
<dependency> |
||||
|
<groupId>org.apache.httpcomponents.client5</groupId> |
||||
|
<artifactId>httpclient5-fluent</artifactId> |
||||
|
<version>5.3.1</version> |
||||
|
</dependency> |
||||
|
|
||||
|
<dependency> |
||||
|
<groupId>com.alibaba.fastjson2</groupId> |
||||
|
<artifactId>fastjson2</artifactId> |
||||
|
<version>2.0.52</version> |
||||
|
</dependency> |
||||
|
</dependencies> |
||||
|
|
||||
|
<build> |
||||
|
<plugins> |
||||
|
<plugin> |
||||
|
<groupId>org.apache.maven.plugins</groupId> |
||||
|
<artifactId>maven-assembly-plugin</artifactId> |
||||
|
<version>3.6.0</version> |
||||
|
<configuration> |
||||
|
<archive> |
||||
|
<manifest> |
||||
|
<mainClass>com.weibo.hotsearch.Main</mainClass> |
||||
|
</manifest> |
||||
|
</archive> |
||||
|
<descriptorRefs> |
||||
|
<descriptorRef>jar-with-dependencies</descriptorRef> |
||||
|
</descriptorRefs> |
||||
|
</configuration> |
||||
|
<executions> |
||||
|
<execution> |
||||
|
<id>make-assembly</id> |
||||
|
<phase>package</phase> |
||||
|
<goals> |
||||
|
<goal>single</goal> |
||||
|
</goals> |
||||
|
</execution> |
||||
|
</executions> |
||||
|
</plugin> |
||||
|
</plugins> |
||||
|
</build> |
||||
|
|
||||
|
</project> |
||||
@ -0,0 +1,667 @@ |
|||||
|
import com.alibaba.fastjson2.JSONArray; |
||||
|
import com.alibaba.fastjson2.JSONObject; |
||||
|
import org.apache.hc.client5.http.fluent.Request; |
||||
|
import org.apache.hc.core5.util.Timeout; |
||||
|
|
||||
|
import java.io.BufferedWriter; |
||||
|
import java.io.File; |
||||
|
import java.io.FileWriter; |
||||
|
import java.io.IOException; |
||||
|
import java.io.UnsupportedEncodingException; |
||||
|
import java.net.URLEncoder; |
||||
|
import java.nio.charset.StandardCharsets; |
||||
|
import java.text.SimpleDateFormat; |
||||
|
import java.util.ArrayList; |
||||
|
import java.util.Date; |
||||
|
import java.util.List; |
||||
|
|
||||
|
public class WeiboStarHotSearcha { |
||||
|
|
||||
|
private static final String WEIBO_HOT_URL = "https://weibo.com/ajax/side/hotSearch"; |
||||
|
private static final String TIEBA_HOT_URL = "https://tieba.baidu.com/hottopic/browse/topicList"; |
||||
|
private static final String ZHIHU_HOT_URL = "https://www.zhihu.com/api/v4/search/top_search"; |
||||
|
private static final String OUTPUT_DIR = "hotsearch_results"; |
||||
|
|
||||
|
private static final String[] STAR_KEYWORDS = { |
||||
|
"明星", "演员", "歌手", "爱豆", "艺人", "红毯", "综艺", "新剧", |
||||
|
"恋情", "官宣", "演唱会", "代言", "造型", "封面" |
||||
|
}; |
||||
|
|
||||
|
private static final String[] SPORTS_KEYWORDS = { |
||||
|
"足球", "篮球", "世界杯", "NBA", "CBA", "奥运会", "世锦赛", |
||||
|
"冠军", "比赛", "夺冠", "进球", "比分", "运动员", "国足", |
||||
|
"乒乓", "排球", "羽毛球", "游泳", "田径", "体操", "跳水", |
||||
|
"MVP", "转会", "联赛", "中超", "英超", "西甲", "欧冠" |
||||
|
}; |
||||
|
|
||||
|
private static final String[] POLICY_KEYWORDS = { |
||||
|
"政策", "新规", "条例", "法规", "通知", "公告", "发布", |
||||
|
"国务院", "发改委", "财政部", "教育部", "工信部", "科技部", |
||||
|
"税收", "补贴", "优惠", "扶持", "改革", "开放", "创新", |
||||
|
"十四五", "计划", "规划", "方案", "意见", "办法", "细则", |
||||
|
"经济", "金融", "市场", "监管", "安全", "环保", "绿色" |
||||
|
}; |
||||
|
|
||||
|
private static final int CONNECT_TIMEOUT = 10000; |
||||
|
private static final int RESPONSE_TIMEOUT = 10000; |
||||
|
private static final int MAX_RETRIES = 3; |
||||
|
|
||||
|
private static final StringBuilder outputBuilder = new StringBuilder(); |
||||
|
|
||||
|
public static void main(String[] args) { |
||||
|
try { |
||||
|
outputBuilder.append("热搜数据采集报告\n"); |
||||
|
outputBuilder.append("================\n"); |
||||
|
outputBuilder.append("采集时间: ").append(getCurrentTime()).append("\n\n"); |
||||
|
|
||||
|
// ========== 微博热搜 ==========
|
||||
|
System.out.println("正在请求微博热搜数据..."); |
||||
|
outputBuilder.append("【微博热搜数据】\n"); |
||||
|
|
||||
|
String weiboJson = fetchWithRetry(WEIBO_HOT_URL, MAX_RETRIES, "https://weibo.com/"); |
||||
|
|
||||
|
if (weiboJson == null || weiboJson.isEmpty()) { |
||||
|
System.out.println("获取微博热搜数据失败"); |
||||
|
outputBuilder.append("获取微博热搜数据失败\n\n"); |
||||
|
} else { |
||||
|
parseAndFilterWeibo(weiboJson); |
||||
|
} |
||||
|
|
||||
|
// ========== 百度贴吧热搜 ==========
|
||||
|
System.out.println("\n\n正在请求百度贴吧热搜数据..."); |
||||
|
outputBuilder.append("\n【百度贴吧热搜数据】\n"); |
||||
|
|
||||
|
String tiebaJson = fetchWithRetry(TIEBA_HOT_URL, MAX_RETRIES, "https://tieba.baidu.com/"); |
||||
|
|
||||
|
if (tiebaJson == null || tiebaJson.isEmpty()) { |
||||
|
System.out.println("获取百度贴吧热搜数据失败"); |
||||
|
outputBuilder.append("获取百度贴吧热搜数据失败\n"); |
||||
|
} else { |
||||
|
parseAndFilterTieba(tiebaJson); |
||||
|
} |
||||
|
|
||||
|
// ========== 知乎热搜 ==========
|
||||
|
System.out.println("\n\n正在请求知乎热搜数据..."); |
||||
|
outputBuilder.append("\n【知乎热搜数据】\n"); |
||||
|
|
||||
|
try { |
||||
|
String zhihuJson = fetchWithRetry(ZHIHU_HOT_URL, MAX_RETRIES, "https://zhuanlan.zhihu.com/"); |
||||
|
|
||||
|
if (zhihuJson == null || zhihuJson.isEmpty()) { |
||||
|
System.out.println("获取知乎热搜数据失败"); |
||||
|
outputBuilder.append("获取知乎热搜数据失败\n"); |
||||
|
} else { |
||||
|
System.out.println("知乎返回数据长度: " + zhihuJson.length() + " 字符"); |
||||
|
if (zhihuJson.length() > 0) { |
||||
|
System.out.println("知乎返回数据预览: " + zhihuJson.substring(0, Math.min(500, zhihuJson.length())) + "..."); |
||||
|
} |
||||
|
parseAndFilterZhihu(zhihuJson); |
||||
|
} |
||||
|
} catch (Exception e) { |
||||
|
System.out.println("获取知乎热搜数据异常: " + e.getMessage()); |
||||
|
outputBuilder.append("获取知乎热搜数据异常: " + e.getMessage() + "\n"); |
||||
|
e.printStackTrace(); |
||||
|
} |
||||
|
|
||||
|
// ========== 保存到文件 ==========
|
||||
|
String filename = saveToFile(); |
||||
|
System.out.println("\n\n结果已保存到文件: " + filename); |
||||
|
|
||||
|
} catch (IOException e) { |
||||
|
System.err.println("网络请求失败: " + e.getMessage()); |
||||
|
e.printStackTrace(); |
||||
|
} catch (Exception e) { |
||||
|
System.err.println("数据解析失败: " + e.getMessage()); |
||||
|
e.printStackTrace(); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
private static String fetchWithRetry(String url, int maxRetries, String referer) throws IOException { |
||||
|
int retryCount = 0; |
||||
|
IOException lastException = null; |
||||
|
|
||||
|
while (retryCount < maxRetries) { |
||||
|
try { |
||||
|
System.out.println("正在请求: " + url); |
||||
|
String result = Request.get(url) |
||||
|
.addHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36") |
||||
|
.addHeader("Referer", referer) |
||||
|
.addHeader("Accept", "application/json, text/plain, */*;charset=UTF-8") |
||||
|
.addHeader("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8") |
||||
|
.addHeader("Accept-Encoding", "identity") |
||||
|
.addHeader("Connection", "keep-alive") |
||||
|
.addHeader("Content-Type", "application/json;charset=UTF-8") |
||||
|
.connectTimeout(Timeout.ofMilliseconds(CONNECT_TIMEOUT)) |
||||
|
.responseTimeout(Timeout.ofMilliseconds(RESPONSE_TIMEOUT)) |
||||
|
.execute() |
||||
|
.returnContent() |
||||
|
.asString(StandardCharsets.UTF_8); |
||||
|
|
||||
|
// 修复可能的编码问题
|
||||
|
result = new String(result.getBytes(StandardCharsets.UTF_8), StandardCharsets.UTF_8); |
||||
|
return result; |
||||
|
} catch (IOException e) { |
||||
|
lastException = e; |
||||
|
retryCount++; |
||||
|
System.out.println("请求失败 (" + retryCount + "/" + maxRetries + "): " + e.getMessage()); |
||||
|
if (retryCount < maxRetries) { |
||||
|
System.out.println("2秒后重试..."); |
||||
|
try { |
||||
|
Thread.sleep(2000); |
||||
|
} catch (InterruptedException ie) { |
||||
|
Thread.currentThread().interrupt(); |
||||
|
throw new IOException("重试被中断", ie); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
throw lastException != null ? lastException : new IOException("请求失败"); |
||||
|
} |
||||
|
|
||||
|
private static void parseAndFilterWeibo(String json) { |
||||
|
JSONObject root = JSONObject.parseObject(json); |
||||
|
if (root == null || !root.containsKey("data")) { |
||||
|
System.out.println("微博数据格式错误或接口返回异常"); |
||||
|
outputBuilder.append("数据格式错误或接口返回异常\n"); |
||||
|
return; |
||||
|
} |
||||
|
|
||||
|
JSONObject data = root.getJSONObject("data"); |
||||
|
if (data == null || !data.containsKey("realtime")) { |
||||
|
System.out.println("微博热搜数据为空"); |
||||
|
outputBuilder.append("热搜数据为空\n"); |
||||
|
return; |
||||
|
} |
||||
|
|
||||
|
JSONArray realtime = data.getJSONArray("realtime"); |
||||
|
if (realtime == null || realtime.isEmpty()) { |
||||
|
System.out.println("微博热搜列表为空"); |
||||
|
outputBuilder.append("热搜列表为空\n"); |
||||
|
return; |
||||
|
} |
||||
|
|
||||
|
List<JSONObject> starHotList = new ArrayList<>(); |
||||
|
List<JSONObject> sportsHotList = new ArrayList<>(); |
||||
|
List<JSONObject> policyHotList = new ArrayList<>(); |
||||
|
|
||||
|
System.out.println("\n===== 微博 - 明星相关热搜 ====="); |
||||
|
outputBuilder.append("\n--- 明星相关热搜 ---\n"); |
||||
|
|
||||
|
for (int i = 0; i < realtime.size(); i++) { |
||||
|
JSONObject item = realtime.getJSONObject(i); |
||||
|
if (item == null) continue; |
||||
|
|
||||
|
String word = item.getString("word"); |
||||
|
if (word == null || word.isEmpty()) continue; |
||||
|
|
||||
|
long num = item.getLongValue("num", 0); |
||||
|
int rank = item.getIntValue("rank", 0); |
||||
|
|
||||
|
if (isStarRelated(word)) { |
||||
|
starHotList.add(item); |
||||
|
String line = String.format("排名:%d\t热度:%d\t热搜:%s", rank, num, word); |
||||
|
System.out.println(line); |
||||
|
outputBuilder.append(line).append("\n"); |
||||
|
} |
||||
|
} |
||||
|
String summary = "\n明星相关热搜总数:" + starHotList.size() + " 条"; |
||||
|
System.out.println(summary); |
||||
|
outputBuilder.append(summary).append("\n"); |
||||
|
|
||||
|
if (starHotList.isEmpty()) { |
||||
|
String emptyMsg = "当前微博热搜暂无明星相关内容"; |
||||
|
System.out.println(emptyMsg); |
||||
|
outputBuilder.append(emptyMsg).append("\n"); |
||||
|
} |
||||
|
|
||||
|
System.out.println("\n===== 微博 - 体育相关热搜 ====="); |
||||
|
outputBuilder.append("\n--- 体育相关热搜 ---\n"); |
||||
|
|
||||
|
for (int i = 0; i < realtime.size(); i++) { |
||||
|
JSONObject item = realtime.getJSONObject(i); |
||||
|
if (item == null) continue; |
||||
|
|
||||
|
String word = item.getString("word"); |
||||
|
if (word == null || word.isEmpty()) continue; |
||||
|
|
||||
|
long num = item.getLongValue("num", 0); |
||||
|
int rank = item.getIntValue("rank", 0); |
||||
|
|
||||
|
if (isSportsRelated(word)) { |
||||
|
sportsHotList.add(item); |
||||
|
String line = String.format("排名:%d\t热度:%d\t热搜:%s", rank, num, word); |
||||
|
System.out.println(line); |
||||
|
outputBuilder.append(line).append("\n"); |
||||
|
} |
||||
|
} |
||||
|
summary = "\n体育相关热搜总数:" + sportsHotList.size() + " 条"; |
||||
|
System.out.println(summary); |
||||
|
outputBuilder.append(summary).append("\n"); |
||||
|
|
||||
|
if (sportsHotList.isEmpty()) { |
||||
|
String emptyMsg = "当前微博热搜暂无体育相关内容"; |
||||
|
System.out.println(emptyMsg); |
||||
|
outputBuilder.append(emptyMsg).append("\n"); |
||||
|
} |
||||
|
|
||||
|
System.out.println("\n===== 微博 - 国家政策相关热搜 ====="); |
||||
|
outputBuilder.append("\n--- 国家政策相关热搜 ---\n"); |
||||
|
|
||||
|
for (int i = 0; i < realtime.size(); i++) { |
||||
|
JSONObject item = realtime.getJSONObject(i); |
||||
|
if (item == null) continue; |
||||
|
|
||||
|
String word = item.getString("word"); |
||||
|
if (word == null || word.isEmpty()) continue; |
||||
|
|
||||
|
long num = item.getLongValue("num", 0); |
||||
|
int rank = item.getIntValue("rank", 0); |
||||
|
|
||||
|
if (isPolicyRelated(word)) { |
||||
|
policyHotList.add(item); |
||||
|
String line = String.format("排名:%d\t热度:%d\t热搜:%s", rank, num, word); |
||||
|
System.out.println(line); |
||||
|
outputBuilder.append(line).append("\n"); |
||||
|
} |
||||
|
} |
||||
|
summary = "\n国家政策相关热搜总数:" + policyHotList.size() + " 条"; |
||||
|
System.out.println(summary); |
||||
|
outputBuilder.append(summary).append("\n"); |
||||
|
|
||||
|
if (policyHotList.isEmpty()) { |
||||
|
String emptyMsg = "当前微博热搜暂无国家政策相关内容"; |
||||
|
System.out.println(emptyMsg); |
||||
|
outputBuilder.append(emptyMsg).append("\n"); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
private static void parseAndFilterTieba(String json) { |
||||
|
JSONObject root = JSONObject.parseObject(json); |
||||
|
if (root == null || !root.containsKey("data")) { |
||||
|
System.out.println("贴吧数据格式错误或接口返回异常"); |
||||
|
outputBuilder.append("数据格式错误或接口返回异常\n"); |
||||
|
return; |
||||
|
} |
||||
|
|
||||
|
JSONObject data = root.getJSONObject("data"); |
||||
|
if (data == null || !data.containsKey("bang_topic")) { |
||||
|
System.out.println("贴吧热搜数据为空"); |
||||
|
outputBuilder.append("热搜数据为空\n"); |
||||
|
return; |
||||
|
} |
||||
|
|
||||
|
JSONArray topics = data.getJSONArray("bang_topic"); |
||||
|
if (topics == null || topics.isEmpty()) { |
||||
|
System.out.println("贴吧热搜列表为空"); |
||||
|
outputBuilder.append("热搜列表为空\n"); |
||||
|
return; |
||||
|
} |
||||
|
|
||||
|
List<JSONObject> starHotList = new ArrayList<>(); |
||||
|
List<JSONObject> sportsHotList = new ArrayList<>(); |
||||
|
List<JSONObject> policyHotList = new ArrayList<>(); |
||||
|
|
||||
|
System.out.println("\n===== 百度贴吧 - 明星相关热搜 ====="); |
||||
|
outputBuilder.append("\n--- 明星相关热搜 ---\n"); |
||||
|
|
||||
|
for (int i = 0; i < topics.size(); i++) { |
||||
|
JSONObject item = topics.getJSONObject(i); |
||||
|
if (item == null) continue; |
||||
|
|
||||
|
String topicName = item.getString("topic_name"); |
||||
|
if (topicName == null || topicName.isEmpty()) continue; |
||||
|
|
||||
|
int discussNum = item.getIntValue("discuss_num", 0); |
||||
|
int readNum = item.getIntValue("read_num", 0); |
||||
|
|
||||
|
if (isStarRelated(topicName)) { |
||||
|
starHotList.add(item); |
||||
|
String line = String.format("序号:%d\t阅读:%d\t讨论:%d\t话题:%s", i + 1, readNum, discussNum, topicName); |
||||
|
System.out.println(line); |
||||
|
outputBuilder.append(line).append("\n"); |
||||
|
} |
||||
|
} |
||||
|
String summary = "\n明星相关热搜总数:" + starHotList.size() + " 条"; |
||||
|
System.out.println(summary); |
||||
|
outputBuilder.append(summary).append("\n"); |
||||
|
|
||||
|
if (starHotList.isEmpty()) { |
||||
|
String emptyMsg = "当前贴吧热搜暂无明星相关内容"; |
||||
|
System.out.println(emptyMsg); |
||||
|
outputBuilder.append(emptyMsg).append("\n"); |
||||
|
} |
||||
|
|
||||
|
System.out.println("\n===== 百度贴吧 - 体育相关热搜 ====="); |
||||
|
outputBuilder.append("\n--- 体育相关热搜 ---\n"); |
||||
|
|
||||
|
for (int i = 0; i < topics.size(); i++) { |
||||
|
JSONObject item = topics.getJSONObject(i); |
||||
|
if (item == null) continue; |
||||
|
|
||||
|
String topicName = item.getString("topic_name"); |
||||
|
if (topicName == null || topicName.isEmpty()) continue; |
||||
|
|
||||
|
int discussNum = item.getIntValue("discuss_num", 0); |
||||
|
int readNum = item.getIntValue("read_num", 0); |
||||
|
|
||||
|
if (isSportsRelated(topicName)) { |
||||
|
sportsHotList.add(item); |
||||
|
String line = String.format("序号:%d\t阅读:%d\t讨论:%d\t话题:%s", i + 1, readNum, discussNum, topicName); |
||||
|
System.out.println(line); |
||||
|
outputBuilder.append(line).append("\n"); |
||||
|
} |
||||
|
} |
||||
|
summary = "\n体育相关热搜总数:" + sportsHotList.size() + " 条"; |
||||
|
System.out.println(summary); |
||||
|
outputBuilder.append(summary).append("\n"); |
||||
|
|
||||
|
if (sportsHotList.isEmpty()) { |
||||
|
String emptyMsg = "当前贴吧热搜暂无体育相关内容"; |
||||
|
System.out.println(emptyMsg); |
||||
|
outputBuilder.append(emptyMsg).append("\n"); |
||||
|
} |
||||
|
|
||||
|
System.out.println("\n===== 百度贴吧 - 国家政策相关热搜 ====="); |
||||
|
outputBuilder.append("\n--- 国家政策相关热搜 ---\n"); |
||||
|
|
||||
|
for (int i = 0; i < topics.size(); i++) { |
||||
|
JSONObject item = topics.getJSONObject(i); |
||||
|
if (item == null) continue; |
||||
|
|
||||
|
String topicName = item.getString("topic_name"); |
||||
|
if (topicName == null || topicName.isEmpty()) continue; |
||||
|
|
||||
|
int discussNum = item.getIntValue("discuss_num", 0); |
||||
|
int readNum = item.getIntValue("read_num", 0); |
||||
|
|
||||
|
if (isPolicyRelated(topicName)) { |
||||
|
policyHotList.add(item); |
||||
|
String line = String.format("序号:%d\t阅读:%d\t讨论:%d\t话题:%s", i + 1, readNum, discussNum, topicName); |
||||
|
System.out.println(line); |
||||
|
outputBuilder.append(line).append("\n"); |
||||
|
} |
||||
|
} |
||||
|
summary = "\n国家政策相关热搜总数:" + policyHotList.size() + " 条"; |
||||
|
System.out.println(summary); |
||||
|
outputBuilder.append(summary).append("\n"); |
||||
|
|
||||
|
if (policyHotList.isEmpty()) { |
||||
|
String emptyMsg = "当前贴吧热搜暂无国家政策相关内容"; |
||||
|
System.out.println(emptyMsg); |
||||
|
outputBuilder.append(emptyMsg).append("\n"); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
private static void parseAndFilterZhihu(String json) { |
||||
|
try { |
||||
|
JSONObject root = JSONObject.parseObject(json); |
||||
|
if (root == null) { |
||||
|
System.out.println("知乎数据格式错误:无法解析JSON"); |
||||
|
outputBuilder.append("数据格式错误:无法解析JSON\n"); |
||||
|
return; |
||||
|
} |
||||
|
|
||||
|
// 尝试多种数据结构
|
||||
|
JSONArray data = null; |
||||
|
|
||||
|
// 结构1:直接在 data 数组中
|
||||
|
if (root.containsKey("data") && root.get("data") instanceof JSONArray) { |
||||
|
data = root.getJSONArray("data"); |
||||
|
} |
||||
|
// 结构2:在 data.topics 数组中
|
||||
|
else if (root.containsKey("data")) { |
||||
|
JSONObject dataObj = root.getJSONObject("data"); |
||||
|
if (dataObj != null && dataObj.containsKey("topics")) { |
||||
|
data = dataObj.getJSONArray("topics"); |
||||
|
} |
||||
|
} |
||||
|
// 结构3:在 top_search.words 数组中(知乎搜索API)
|
||||
|
else if (root.containsKey("top_search")) { |
||||
|
JSONObject topSearch = root.getJSONObject("top_search"); |
||||
|
if (topSearch != null && topSearch.containsKey("words")) { |
||||
|
data = topSearch.getJSONArray("words"); |
||||
|
} |
||||
|
} |
||||
|
// 结构4:直接是数组
|
||||
|
else if (json.startsWith("[")) { |
||||
|
data = JSONArray.parseArray(json); |
||||
|
} |
||||
|
|
||||
|
if (data == null || data.isEmpty()) { |
||||
|
System.out.println("知乎热搜数据为空或格式不匹配"); |
||||
|
outputBuilder.append("热搜数据为空或格式不匹配\n"); |
||||
|
return; |
||||
|
} |
||||
|
|
||||
|
List<JSONObject> starHotList = new ArrayList<>(); |
||||
|
List<JSONObject> sportsHotList = new ArrayList<>(); |
||||
|
List<JSONObject> policyHotList = new ArrayList<>(); |
||||
|
|
||||
|
System.out.println("\n===== 知乎 - 明星相关热搜 ====="); |
||||
|
outputBuilder.append("\n--- 明星相关热搜 ---\n"); |
||||
|
|
||||
|
for (int i = 0; i < data.size(); i++) { |
||||
|
JSONObject item = data.getJSONObject(i); |
||||
|
if (item == null) continue; |
||||
|
|
||||
|
// 尝试多种标题字段
|
||||
|
String title = null; |
||||
|
if (item.containsKey("title")) { |
||||
|
title = item.getString("title"); |
||||
|
} else if (item.containsKey("topic_title")) { |
||||
|
title = item.getString("topic_title"); |
||||
|
} else if (item.containsKey("name")) { |
||||
|
title = item.getString("name"); |
||||
|
} else if (item.containsKey("target")) { |
||||
|
JSONObject target = item.getJSONObject("target"); |
||||
|
if (target != null) { |
||||
|
title = target.getString("title"); |
||||
|
} |
||||
|
} else if (item.containsKey("display_query")) { |
||||
|
title = item.getString("display_query"); |
||||
|
} else if (item.containsKey("query")) { |
||||
|
title = item.getString("query"); |
||||
|
} |
||||
|
|
||||
|
if (title == null || title.isEmpty()) continue; |
||||
|
|
||||
|
// 尝试获取热度值
|
||||
|
long hotValue = 0; |
||||
|
if (item.containsKey("hot_score")) { |
||||
|
hotValue = item.getLongValue("hot_score", 0); |
||||
|
} else if (item.containsKey("score")) { |
||||
|
hotValue = item.getLongValue("score", 0); |
||||
|
} else if (item.containsKey("detail_text")) { |
||||
|
String detailText = item.getString("detail_text"); |
||||
|
if (detailText != null) { |
||||
|
try { |
||||
|
String numStr = detailText.replaceAll("[^0-9]", ""); |
||||
|
if (!numStr.isEmpty()) { |
||||
|
hotValue = Long.parseLong(numStr); |
||||
|
} |
||||
|
} catch (Exception e) { |
||||
|
hotValue = 0; |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
int rank = i + 1; |
||||
|
|
||||
|
if (isStarRelated(title)) { |
||||
|
starHotList.add(item); |
||||
|
String line = String.format("排名:%d\t热度:%d\t热搜:%s", rank, hotValue, title); |
||||
|
System.out.println(line); |
||||
|
outputBuilder.append(line).append("\n"); |
||||
|
} |
||||
|
} |
||||
|
String summary = "\n明星相关热搜总数:" + starHotList.size() + " 条"; |
||||
|
System.out.println(summary); |
||||
|
outputBuilder.append(summary).append("\n"); |
||||
|
|
||||
|
if (starHotList.isEmpty()) { |
||||
|
String emptyMsg = "当前知乎热搜暂无明星相关内容"; |
||||
|
System.out.println(emptyMsg); |
||||
|
outputBuilder.append(emptyMsg).append("\n"); |
||||
|
} |
||||
|
|
||||
|
System.out.println("\n===== 知乎 - 体育相关热搜 ====="); |
||||
|
outputBuilder.append("\n--- 体育相关热搜 ---\n"); |
||||
|
|
||||
|
for (int i = 0; i < data.size(); i++) { |
||||
|
JSONObject item = data.getJSONObject(i); |
||||
|
if (item == null) continue; |
||||
|
|
||||
|
String title = null; |
||||
|
if (item.containsKey("title")) { |
||||
|
title = item.getString("title"); |
||||
|
} else if (item.containsKey("topic_title")) { |
||||
|
title = item.getString("topic_title"); |
||||
|
} else if (item.containsKey("name")) { |
||||
|
title = item.getString("name"); |
||||
|
} else if (item.containsKey("target")) { |
||||
|
JSONObject target = item.getJSONObject("target"); |
||||
|
if (target != null) { |
||||
|
title = target.getString("title"); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
if (title == null || title.isEmpty()) continue; |
||||
|
|
||||
|
long hotValue = 0; |
||||
|
if (item.containsKey("hot_score")) { |
||||
|
hotValue = item.getLongValue("hot_score", 0); |
||||
|
} else if (item.containsKey("score")) { |
||||
|
hotValue = item.getLongValue("score", 0); |
||||
|
} |
||||
|
|
||||
|
int rank = i + 1; |
||||
|
|
||||
|
if (isSportsRelated(title)) { |
||||
|
sportsHotList.add(item); |
||||
|
String line = String.format("排名:%d\t热度:%d\t热搜:%s", rank, hotValue, title); |
||||
|
System.out.println(line); |
||||
|
outputBuilder.append(line).append("\n"); |
||||
|
} |
||||
|
} |
||||
|
summary = "\n体育相关热搜总数:" + sportsHotList.size() + " 条"; |
||||
|
System.out.println(summary); |
||||
|
outputBuilder.append(summary).append("\n"); |
||||
|
|
||||
|
if (sportsHotList.isEmpty()) { |
||||
|
String emptyMsg = "当前知乎热搜暂无体育相关内容"; |
||||
|
System.out.println(emptyMsg); |
||||
|
outputBuilder.append(emptyMsg).append("\n"); |
||||
|
} |
||||
|
|
||||
|
System.out.println("\n===== 知乎 - 国家政策相关热搜 ====="); |
||||
|
outputBuilder.append("\n--- 国家政策相关热搜 ---\n"); |
||||
|
|
||||
|
for (int i = 0; i < data.size(); i++) { |
||||
|
JSONObject item = data.getJSONObject(i); |
||||
|
if (item == null) continue; |
||||
|
|
||||
|
String title = null; |
||||
|
if (item.containsKey("title")) { |
||||
|
title = item.getString("title"); |
||||
|
} else if (item.containsKey("topic_title")) { |
||||
|
title = item.getString("topic_title"); |
||||
|
} else if (item.containsKey("name")) { |
||||
|
title = item.getString("name"); |
||||
|
} else if (item.containsKey("target")) { |
||||
|
JSONObject target = item.getJSONObject("target"); |
||||
|
if (target != null) { |
||||
|
title = target.getString("title"); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
if (title == null || title.isEmpty()) continue; |
||||
|
|
||||
|
long hotValue = 0; |
||||
|
if (item.containsKey("hot_score")) { |
||||
|
hotValue = item.getLongValue("hot_score", 0); |
||||
|
} else if (item.containsKey("score")) { |
||||
|
hotValue = item.getLongValue("score", 0); |
||||
|
} |
||||
|
|
||||
|
int rank = i + 1; |
||||
|
|
||||
|
if (isPolicyRelated(title)) { |
||||
|
policyHotList.add(item); |
||||
|
String line = String.format("排名:%d\t热度:%d\t热搜:%s", rank, hotValue, title); |
||||
|
System.out.println(line); |
||||
|
outputBuilder.append(line).append("\n"); |
||||
|
} |
||||
|
} |
||||
|
summary = "\n国家政策相关热搜总数:" + policyHotList.size() + " 条"; |
||||
|
System.out.println(summary); |
||||
|
outputBuilder.append(summary).append("\n"); |
||||
|
|
||||
|
if (policyHotList.isEmpty()) { |
||||
|
String emptyMsg = "当前知乎热搜暂无国家政策相关内容"; |
||||
|
System.out.println(emptyMsg); |
||||
|
outputBuilder.append(emptyMsg).append("\n"); |
||||
|
} |
||||
|
} catch (Exception e) { |
||||
|
System.out.println("知乎数据解析异常: " + e.getMessage()); |
||||
|
outputBuilder.append("数据解析异常: " + e.getMessage() + "\n"); |
||||
|
e.printStackTrace(); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
private static boolean isStarRelated(String word) { |
||||
|
if (word == null || word.isEmpty()) { |
||||
|
return false; |
||||
|
} |
||||
|
for (String keyword : STAR_KEYWORDS) { |
||||
|
if (word.contains(keyword)) { |
||||
|
return true; |
||||
|
} |
||||
|
} |
||||
|
return false; |
||||
|
} |
||||
|
|
||||
|
private static boolean isSportsRelated(String word) { |
||||
|
if (word == null || word.isEmpty()) { |
||||
|
return false; |
||||
|
} |
||||
|
for (String keyword : SPORTS_KEYWORDS) { |
||||
|
if (word.contains(keyword)) { |
||||
|
return true; |
||||
|
} |
||||
|
} |
||||
|
return false; |
||||
|
} |
||||
|
|
||||
|
private static boolean isPolicyRelated(String word) { |
||||
|
if (word == null || word.isEmpty()) { |
||||
|
return false; |
||||
|
} |
||||
|
for (String keyword : POLICY_KEYWORDS) { |
||||
|
if (word.contains(keyword)) { |
||||
|
return true; |
||||
|
} |
||||
|
} |
||||
|
return false; |
||||
|
} |
||||
|
|
||||
|
private static String getCurrentTime() { |
||||
|
return new SimpleDateFormat("yyyy年MM月dd日 HH:mm:ss").format(new Date()); |
||||
|
} |
||||
|
|
||||
|
private static String saveToFile() throws IOException { |
||||
|
File dir = new File(OUTPUT_DIR); |
||||
|
if (!dir.exists()) { |
||||
|
dir.mkdirs(); |
||||
|
} |
||||
|
|
||||
|
String timestamp = new SimpleDateFormat("yyyyMMdd_HHmmss").format(new Date()); |
||||
|
String filename = "hotsearch_" + timestamp + ".txt"; |
||||
|
String filepath = OUTPUT_DIR + File.separator + filename; |
||||
|
|
||||
|
try (BufferedWriter writer = new BufferedWriter(new FileWriter(filepath))) { |
||||
|
writer.write(outputBuilder.toString()); |
||||
|
} |
||||
|
|
||||
|
return filepath; |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,30 @@ |
|||||
|
package com.weibo.hotsearch; |
||||
|
|
||||
|
import com.alibaba.fastjson2.JSONObject; |
||||
|
|
||||
|
import java.util.List; |
||||
|
|
||||
|
public class ConsoleOutputHandler extends OutputHandler { |
||||
|
|
||||
|
@Override |
||||
|
public void output(List<JSONObject> hotList, String filterName) { |
||||
|
System.out.println("\n===== " + filterName + " ====="); |
||||
|
|
||||
|
if (hotList == null || hotList.isEmpty()) { |
||||
|
System.out.println("当前暂无符合条件的热搜内容"); |
||||
|
return; |
||||
|
} |
||||
|
|
||||
|
for (int i = 0; i < hotList.size(); i++) { |
||||
|
JSONObject item = hotList.get(i); |
||||
|
System.out.println(formatHotItem(item, i, null)); |
||||
|
} |
||||
|
|
||||
|
System.out.println("\n===== 热搜总数:" + hotList.size() + " 条 ====="); |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String getOutputType() { |
||||
|
return "控制台输出"; |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,24 @@ |
|||||
|
package com.weibo.hotsearch; |
||||
|
|
||||
|
public abstract class HotSearchFilter { |
||||
|
|
||||
|
protected String[] keywords; |
||||
|
|
||||
|
public HotSearchFilter(String[] keywords) { |
||||
|
this.keywords = keywords; |
||||
|
} |
||||
|
|
||||
|
public boolean matches(String word) { |
||||
|
if (word == null || word.isEmpty()) { |
||||
|
return false; |
||||
|
} |
||||
|
for (String keyword : keywords) { |
||||
|
if (word.contains(keyword)) { |
||||
|
return true; |
||||
|
} |
||||
|
} |
||||
|
return false; |
||||
|
} |
||||
|
|
||||
|
public abstract String getFilterName(); |
||||
|
} |
||||
@ -0,0 +1,16 @@ |
|||||
|
package com.weibo.hotsearch; |
||||
|
|
||||
|
import com.weibo.hotsearch.cli.CliHandler; |
||||
|
|
||||
|
public class Main { |
||||
|
|
||||
|
public static void main(String[] args) { |
||||
|
try { |
||||
|
CliHandler handler = new CliHandler(args); |
||||
|
handler.handle(); |
||||
|
} catch (Exception e) { |
||||
|
System.err.println("程序执行异常: " + e.getMessage()); |
||||
|
e.printStackTrace(); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,46 @@ |
|||||
|
package com.weibo.hotsearch; |
||||
|
|
||||
|
import com.alibaba.fastjson2.JSONObject; |
||||
|
|
||||
|
import java.util.List; |
||||
|
|
||||
|
public abstract class OutputHandler { |
||||
|
|
||||
|
public abstract void output(List<JSONObject> hotList, String filterName); |
||||
|
|
||||
|
public abstract String getOutputType(); |
||||
|
|
||||
|
protected String formatHotItem(JSONObject item, int index, String dataSourceName) { |
||||
|
String word = getHotSearchWord(item); |
||||
|
long num = getHotSearchNum(item); |
||||
|
int rank = getHotSearchRank(item); |
||||
|
|
||||
|
if (rank > 0) { |
||||
|
return String.format("排名:%d\t热度:%d\t热搜:%s", rank, num, word); |
||||
|
} else { |
||||
|
return String.format("序号:%d\t热度:%d\t热搜:%s", index + 1, num, word); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
protected String getHotSearchWord(JSONObject item) { |
||||
|
if (item == null) return "未知"; |
||||
|
String word = item.getString("word"); |
||||
|
if (word == null || word.isEmpty()) { |
||||
|
word = item.getString("topic_name"); |
||||
|
} |
||||
|
if (word == null || word.isEmpty()) { |
||||
|
word = item.getString("title"); |
||||
|
} |
||||
|
return word != null ? word : "未知"; |
||||
|
} |
||||
|
|
||||
|
protected long getHotSearchNum(JSONObject item) { |
||||
|
if (item == null) return 0; |
||||
|
return item.getLongValue("num", 0); |
||||
|
} |
||||
|
|
||||
|
protected int getHotSearchRank(JSONObject item) { |
||||
|
if (item == null) return 0; |
||||
|
return item.getIntValue("rank", 0); |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,18 @@ |
|||||
|
package com.weibo.hotsearch; |
||||
|
|
||||
|
public class StarFilter extends HotSearchFilter { |
||||
|
|
||||
|
private static final String[] STAR_KEYWORDS = { |
||||
|
"明星", "演员", "歌手", "爱豆", "艺人", "红毯", "综艺", "新剧", |
||||
|
"恋情", "官宣", "演唱会", "代言", "造型", "封面" |
||||
|
}; |
||||
|
|
||||
|
public StarFilter() { |
||||
|
super(STAR_KEYWORDS); |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String getFilterName() { |
||||
|
return "明星相关热搜"; |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,133 @@ |
|||||
|
package com.weibo.hotsearch.cli; |
||||
|
|
||||
|
import com.weibo.hotsearch.controller.HotSearchController; |
||||
|
import com.weibo.hotsearch.exception.ErrorCode; |
||||
|
import com.weibo.hotsearch.exception.HotSearchException; |
||||
|
|
||||
|
public class CliHandler { |
||||
|
|
||||
|
private final CliParser parser; |
||||
|
private final HotSearchController controller; |
||||
|
|
||||
|
public CliHandler(String[] args) { |
||||
|
this.parser = new CliParser(); |
||||
|
this.controller = new HotSearchController(); |
||||
|
this.parser.parse(args); |
||||
|
} |
||||
|
|
||||
|
public void handle() { |
||||
|
String command = parser.getCommand(); |
||||
|
|
||||
|
if (command == null || command.isEmpty()) { |
||||
|
parser.printUsage(); |
||||
|
return; |
||||
|
} |
||||
|
|
||||
|
try { |
||||
|
switch (command.toLowerCase()) { |
||||
|
case "help": |
||||
|
parser.printUsage(); |
||||
|
break; |
||||
|
case "fetch": |
||||
|
handleFetch(); |
||||
|
break; |
||||
|
case "filter": |
||||
|
handleFilter(); |
||||
|
break; |
||||
|
case "output": |
||||
|
handleOutput(); |
||||
|
break; |
||||
|
case "save": |
||||
|
handleSave(); |
||||
|
break; |
||||
|
case "run": |
||||
|
handleRun(); |
||||
|
break; |
||||
|
default: |
||||
|
throw new HotSearchException(ErrorCode.CLI_COMMAND_NOT_FOUND, "未知命令: " + command); |
||||
|
} |
||||
|
} catch (HotSearchException e) { |
||||
|
System.err.println("\n错误 [" + e.getErrorCode().getCode() + "]: " + e.getMessage()); |
||||
|
if (e.getCause() != null) { |
||||
|
e.getCause().printStackTrace(); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
private void handleFetch() throws HotSearchException { |
||||
|
String source = parser.getOption("s"); |
||||
|
if (source == null) { |
||||
|
source = parser.getOption("source"); |
||||
|
} |
||||
|
if (source == null) { |
||||
|
throw new HotSearchException(ErrorCode.PARAMETER_ERROR, "请指定数据源 (-s 或 --source)"); |
||||
|
} |
||||
|
controller.executeFetch(source); |
||||
|
} |
||||
|
|
||||
|
private void handleFilter() throws HotSearchException { |
||||
|
String filter = parser.getOption("f"); |
||||
|
if (filter == null) { |
||||
|
filter = parser.getOption("filter"); |
||||
|
} |
||||
|
if (filter == null) { |
||||
|
throw new HotSearchException(ErrorCode.PARAMETER_ERROR, "请指定过滤器 (-f 或 --filter)"); |
||||
|
} |
||||
|
controller.executeFilter(filter); |
||||
|
} |
||||
|
|
||||
|
private void handleOutput() throws HotSearchException { |
||||
|
String output = parser.getOption("o"); |
||||
|
if (output == null) { |
||||
|
output = parser.getOption("output"); |
||||
|
} |
||||
|
if (output == null) { |
||||
|
output = "console"; |
||||
|
} |
||||
|
controller.executeOutput(output); |
||||
|
} |
||||
|
|
||||
|
private void handleSave() throws HotSearchException { |
||||
|
String path = parser.getOption("p"); |
||||
|
if (path == null) { |
||||
|
path = parser.getOption("path"); |
||||
|
} |
||||
|
controller.executeSave(path); |
||||
|
} |
||||
|
|
||||
|
private void handleRun() throws HotSearchException { |
||||
|
String source = parser.getOption("s"); |
||||
|
if (source == null) { |
||||
|
source = parser.getOption("source"); |
||||
|
} |
||||
|
if (source == null) { |
||||
|
source = "all"; |
||||
|
} |
||||
|
|
||||
|
String filter = parser.getOption("f"); |
||||
|
if (filter == null) { |
||||
|
filter = parser.getOption("filter"); |
||||
|
} |
||||
|
if (filter == null) { |
||||
|
filter = "star"; |
||||
|
} |
||||
|
|
||||
|
String output = parser.getOption("o"); |
||||
|
if (output == null) { |
||||
|
output = parser.getOption("output"); |
||||
|
} |
||||
|
if (output == null) { |
||||
|
output = "console"; |
||||
|
} |
||||
|
|
||||
|
String path = parser.getOption("p"); |
||||
|
if (path == null) { |
||||
|
path = parser.getOption("path"); |
||||
|
} |
||||
|
|
||||
|
controller.executeFetch(source); |
||||
|
controller.executeFilter(filter); |
||||
|
controller.executeOutput(output); |
||||
|
controller.executeSave(path); |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,101 @@ |
|||||
|
package com.weibo.hotsearch.cli; |
||||
|
|
||||
|
import com.weibo.hotsearch.exception.CliException; |
||||
|
import com.weibo.hotsearch.exception.ErrorCode; |
||||
|
|
||||
|
import java.util.*; |
||||
|
|
||||
|
public class CliParser { |
||||
|
|
||||
|
private final Map<String, String> options = new HashMap<>(); |
||||
|
private final List<String> arguments = new ArrayList<>(); |
||||
|
private String command; |
||||
|
|
||||
|
public void parse(String[] args) { |
||||
|
if (args == null || args.length == 0) { |
||||
|
printUsage(); |
||||
|
return; |
||||
|
} |
||||
|
|
||||
|
int i = 0; |
||||
|
while (i < args.length) { |
||||
|
String arg = args[i]; |
||||
|
|
||||
|
if (arg.startsWith("--")) { |
||||
|
String[] parts = arg.substring(2).split("=", 2); |
||||
|
String key = parts[0]; |
||||
|
String value = parts.length > 1 ? parts[1] : "true"; |
||||
|
options.put(key, value); |
||||
|
i++; |
||||
|
} else if (arg.startsWith("-")) { |
||||
|
String key = arg.substring(1); |
||||
|
if (key.length() == 1) { |
||||
|
String value = "true"; |
||||
|
if (i + 1 < args.length && !args[i + 1].startsWith("-")) { |
||||
|
value = args[i + 1]; |
||||
|
i++; |
||||
|
} |
||||
|
options.put(key, value); |
||||
|
} else { |
||||
|
for (char c : key.toCharArray()) { |
||||
|
options.put(String.valueOf(c), "true"); |
||||
|
} |
||||
|
} |
||||
|
i++; |
||||
|
} else { |
||||
|
if (command == null) { |
||||
|
command = arg; |
||||
|
} else { |
||||
|
arguments.add(arg); |
||||
|
} |
||||
|
i++; |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
public String getCommand() { |
||||
|
return command; |
||||
|
} |
||||
|
|
||||
|
public String getOption(String key) { |
||||
|
return options.get(key); |
||||
|
} |
||||
|
|
||||
|
public boolean hasOption(String key) { |
||||
|
return options.containsKey(key); |
||||
|
} |
||||
|
|
||||
|
public List<String> getArguments() { |
||||
|
return arguments; |
||||
|
} |
||||
|
|
||||
|
public Map<String, String> getAllOptions() { |
||||
|
return new HashMap<>(options); |
||||
|
} |
||||
|
|
||||
|
public void printUsage() { |
||||
|
System.out.println("\n===== 热搜数据采集工具 ====="); |
||||
|
System.out.println("用法:"); |
||||
|
System.out.println(" java -jar weibo-hotsearch-1.0-SNAPSHOT-jar-with-dependencies.jar [命令] [选项]"); |
||||
|
System.out.println("\n命令:"); |
||||
|
System.out.println(" fetch 获取热搜数据"); |
||||
|
System.out.println(" filter 过滤热搜数据"); |
||||
|
System.out.println(" output 输出热搜数据"); |
||||
|
System.out.println(" save 保存数据到文件"); |
||||
|
System.out.println(" run 执行完整流程"); |
||||
|
System.out.println(" help 显示帮助信息"); |
||||
|
System.out.println("\n选项:"); |
||||
|
System.out.println(" -s, --source <数据源> 指定数据源: weibo/tieba/zhihu/all"); |
||||
|
System.out.println(" -f, --filter <过滤器> 指定过滤器: star/sports/policy"); |
||||
|
System.out.println(" -o, --output <类型> 指定输出类型: console/text"); |
||||
|
System.out.println(" -p, --path <路径> 指定保存路径"); |
||||
|
System.out.println(" -h, --help 显示帮助信息"); |
||||
|
System.out.println("\n示例:"); |
||||
|
System.out.println(" java -jar xxx.jar run -s weibo -f star -o console"); |
||||
|
System.out.println(" java -jar xxx.jar fetch -s all"); |
||||
|
System.out.println(" java -jar xxx.jar filter -f sports"); |
||||
|
System.out.println(" java -jar xxx.jar output -o console"); |
||||
|
System.out.println(" java -jar xxx.jar save -p ./result.txt"); |
||||
|
System.out.println(); |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,12 @@ |
|||||
|
package com.weibo.hotsearch.command; |
||||
|
|
||||
|
import com.weibo.hotsearch.exception.HotSearchException; |
||||
|
|
||||
|
public interface Command { |
||||
|
|
||||
|
void execute() throws HotSearchException; |
||||
|
|
||||
|
String getCommandName(); |
||||
|
|
||||
|
String getDescription(); |
||||
|
} |
||||
@ -0,0 +1,41 @@ |
|||||
|
package com.weibo.hotsearch.command; |
||||
|
|
||||
|
import com.weibo.hotsearch.exception.ErrorCode; |
||||
|
import com.weibo.hotsearch.exception.HotSearchException; |
||||
|
|
||||
|
import java.util.HashMap; |
||||
|
import java.util.Map; |
||||
|
|
||||
|
public class CommandInvoker { |
||||
|
|
||||
|
private final Map<String, Command> commands = new HashMap<>(); |
||||
|
|
||||
|
public void registerCommand(String name, Command command) { |
||||
|
commands.put(name.toLowerCase(), command); |
||||
|
} |
||||
|
|
||||
|
public void executeCommand(String name) throws HotSearchException { |
||||
|
Command command = commands.get(name.toLowerCase()); |
||||
|
if (command == null) { |
||||
|
throw new HotSearchException(ErrorCode.CLI_COMMAND_NOT_FOUND, "命令未找到: " + name); |
||||
|
} |
||||
|
command.execute(); |
||||
|
} |
||||
|
|
||||
|
public boolean hasCommand(String name) { |
||||
|
return commands.containsKey(name.toLowerCase()); |
||||
|
} |
||||
|
|
||||
|
public void printHelp() { |
||||
|
System.out.println("\n===== 命令帮助 ====="); |
||||
|
System.out.println("可用命令:"); |
||||
|
for (Map.Entry<String, Command> entry : commands.entrySet()) { |
||||
|
System.out.printf(" %-10s - %s%n", entry.getKey(), entry.getValue().getDescription()); |
||||
|
} |
||||
|
System.out.println(); |
||||
|
} |
||||
|
|
||||
|
public Map<String, Command> getCommands() { |
||||
|
return new HashMap<>(commands); |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,47 @@ |
|||||
|
package com.weibo.hotsearch.command; |
||||
|
|
||||
|
public class CommandResult { |
||||
|
|
||||
|
private boolean success; |
||||
|
private String message; |
||||
|
private Object data; |
||||
|
|
||||
|
public CommandResult(boolean success, String message) { |
||||
|
this.success = success; |
||||
|
this.message = message; |
||||
|
} |
||||
|
|
||||
|
public CommandResult(boolean success, String message, Object data) { |
||||
|
this.success = success; |
||||
|
this.message = message; |
||||
|
this.data = data; |
||||
|
} |
||||
|
|
||||
|
public boolean isSuccess() { |
||||
|
return success; |
||||
|
} |
||||
|
|
||||
|
public String getMessage() { |
||||
|
return message; |
||||
|
} |
||||
|
|
||||
|
public Object getData() { |
||||
|
return data; |
||||
|
} |
||||
|
|
||||
|
public static CommandResult success(String message) { |
||||
|
return new CommandResult(true, message); |
||||
|
} |
||||
|
|
||||
|
public static CommandResult success(String message, Object data) { |
||||
|
return new CommandResult(true, message, data); |
||||
|
} |
||||
|
|
||||
|
public static CommandResult failure(String message) { |
||||
|
return new CommandResult(false, message); |
||||
|
} |
||||
|
|
||||
|
public static CommandResult failure(String message, Object data) { |
||||
|
return new CommandResult(false, message, data); |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,34 @@ |
|||||
|
package com.weibo.hotsearch.command; |
||||
|
|
||||
|
import com.weibo.hotsearch.exception.ErrorCode; |
||||
|
import com.weibo.hotsearch.exception.HotSearchException; |
||||
|
import com.weibo.hotsearch.service.DataFetcher; |
||||
|
|
||||
|
public class FetchCommand implements Command { |
||||
|
|
||||
|
private final DataFetcher dataFetcher; |
||||
|
private final String source; |
||||
|
|
||||
|
public FetchCommand(DataFetcher dataFetcher, String source) { |
||||
|
this.dataFetcher = dataFetcher; |
||||
|
this.source = source; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public void execute() throws HotSearchException { |
||||
|
if (source == null || source.isEmpty()) { |
||||
|
throw new HotSearchException(ErrorCode.PARAMETER_ERROR, "数据源不能为空"); |
||||
|
} |
||||
|
dataFetcher.fetch(source); |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String getCommandName() { |
||||
|
return "fetch"; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String getDescription() { |
||||
|
return "从指定数据源获取热搜数据"; |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,35 @@ |
|||||
|
package com.weibo.hotsearch.command; |
||||
|
|
||||
|
import com.weibo.hotsearch.exception.ErrorCode; |
||||
|
import com.weibo.hotsearch.exception.HotSearchException; |
||||
|
import com.weibo.hotsearch.service.FilterService; |
||||
|
import com.weibo.hotsearch.strategy.FilterStrategy; |
||||
|
|
||||
|
public class FilterCommand implements Command { |
||||
|
|
||||
|
private final FilterService filterService; |
||||
|
private final FilterStrategy strategy; |
||||
|
|
||||
|
public FilterCommand(FilterService filterService, FilterStrategy strategy) { |
||||
|
this.filterService = filterService; |
||||
|
this.strategy = strategy; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public void execute() throws HotSearchException { |
||||
|
if (strategy == null) { |
||||
|
throw new HotSearchException(ErrorCode.PARAMETER_ERROR, "过滤策略不能为空"); |
||||
|
} |
||||
|
filterService.filter(strategy); |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String getCommandName() { |
||||
|
return "filter"; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String getDescription() { |
||||
|
return "使用指定策略过滤热搜数据"; |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,27 @@ |
|||||
|
package com.weibo.hotsearch.command; |
||||
|
|
||||
|
import com.weibo.hotsearch.exception.HotSearchException; |
||||
|
|
||||
|
public class HelpCommand implements Command { |
||||
|
|
||||
|
private final CommandInvoker invoker; |
||||
|
|
||||
|
public HelpCommand(CommandInvoker invoker) { |
||||
|
this.invoker = invoker; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public void execute() throws HotSearchException { |
||||
|
invoker.printHelp(); |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String getCommandName() { |
||||
|
return "help"; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String getDescription() { |
||||
|
return "显示帮助信息"; |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,34 @@ |
|||||
|
package com.weibo.hotsearch.command; |
||||
|
|
||||
|
import com.weibo.hotsearch.exception.ErrorCode; |
||||
|
import com.weibo.hotsearch.exception.HotSearchException; |
||||
|
import com.weibo.hotsearch.service.OutputService; |
||||
|
|
||||
|
public class OutputCommand implements Command { |
||||
|
|
||||
|
private final OutputService outputService; |
||||
|
private final String outputType; |
||||
|
|
||||
|
public OutputCommand(OutputService outputService, String outputType) { |
||||
|
this.outputService = outputService; |
||||
|
this.outputType = outputType; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public void execute() throws HotSearchException { |
||||
|
if (outputType == null || outputType.isEmpty()) { |
||||
|
throw new HotSearchException(ErrorCode.PARAMETER_ERROR, "输出类型不能为空"); |
||||
|
} |
||||
|
outputService.output(outputType); |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String getCommandName() { |
||||
|
return "output"; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String getDescription() { |
||||
|
return "输出过滤后的热搜数据"; |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,30 @@ |
|||||
|
package com.weibo.hotsearch.command; |
||||
|
|
||||
|
import com.weibo.hotsearch.exception.HotSearchException; |
||||
|
import com.weibo.hotsearch.service.OutputService; |
||||
|
|
||||
|
public class SaveCommand implements Command { |
||||
|
|
||||
|
private final OutputService outputService; |
||||
|
private final String filePath; |
||||
|
|
||||
|
public SaveCommand(OutputService outputService, String filePath) { |
||||
|
this.outputService = outputService; |
||||
|
this.filePath = filePath; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public void execute() throws HotSearchException { |
||||
|
outputService.saveToFile(filePath); |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String getCommandName() { |
||||
|
return "save"; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String getDescription() { |
||||
|
return "保存数据到文件"; |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,69 @@ |
|||||
|
package com.weibo.hotsearch.controller; |
||||
|
|
||||
|
import com.weibo.hotsearch.command.*; |
||||
|
import com.weibo.hotsearch.exception.HotSearchException; |
||||
|
import com.weibo.hotsearch.service.DataFetcher; |
||||
|
import com.weibo.hotsearch.service.FilterService; |
||||
|
import com.weibo.hotsearch.service.OutputService; |
||||
|
import com.weibo.hotsearch.strategy.FilterStrategy; |
||||
|
import com.weibo.hotsearch.strategy.FilterStrategyFactory; |
||||
|
|
||||
|
public class HotSearchController { |
||||
|
|
||||
|
private final DataFetcher dataFetcher; |
||||
|
private final FilterService filterService; |
||||
|
private final OutputService outputService; |
||||
|
private final CommandInvoker commandInvoker; |
||||
|
|
||||
|
public HotSearchController() { |
||||
|
this.dataFetcher = new DataFetcher(); |
||||
|
this.filterService = new FilterService(); |
||||
|
this.outputService = new OutputService(); |
||||
|
this.commandInvoker = new CommandInvoker(); |
||||
|
registerCommands(); |
||||
|
} |
||||
|
|
||||
|
private void registerCommands() { |
||||
|
commandInvoker.registerCommand("help", new HelpCommand(commandInvoker)); |
||||
|
} |
||||
|
|
||||
|
public void executeFetch(String source) throws HotSearchException { |
||||
|
FetchCommand command = new FetchCommand(dataFetcher, source); |
||||
|
command.execute(); |
||||
|
System.out.println("已从 " + source + " 获取数据"); |
||||
|
} |
||||
|
|
||||
|
public void executeFilter(String filterCode) throws HotSearchException { |
||||
|
FilterStrategy strategy = FilterStrategyFactory.getStrategy(filterCode); |
||||
|
if (strategy == null) { |
||||
|
throw new HotSearchException(com.weibo.hotsearch.exception.ErrorCode.PARAMETER_ERROR, |
||||
|
"未知的过滤策略: " + filterCode); |
||||
|
} |
||||
|
FilterCommand command = new FilterCommand(filterService, strategy); |
||||
|
command.execute(); |
||||
|
System.out.println("已应用过滤策略: " + strategy.getFilterName()); |
||||
|
} |
||||
|
|
||||
|
public void executeOutput(String outputType) throws HotSearchException { |
||||
|
OutputCommand command = new OutputCommand(outputService, outputType); |
||||
|
command.execute(); |
||||
|
} |
||||
|
|
||||
|
public void executeSave(String filePath) throws HotSearchException { |
||||
|
SaveCommand command = new SaveCommand(outputService, filePath); |
||||
|
command.execute(); |
||||
|
} |
||||
|
|
||||
|
public void showHelp() throws HotSearchException { |
||||
|
commandInvoker.executeCommand("help"); |
||||
|
} |
||||
|
|
||||
|
public void processFullPipeline(String source, String filterCode, String outputType, String savePath) throws HotSearchException { |
||||
|
executeFetch(source); |
||||
|
executeFilter(filterCode); |
||||
|
executeOutput(outputType); |
||||
|
if (savePath != null || !savePath.isEmpty()) { |
||||
|
executeSave(savePath); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,20 @@ |
|||||
|
package com.weibo.hotsearch.exception; |
||||
|
|
||||
|
public class CliException extends HotSearchException { |
||||
|
|
||||
|
public CliException(ErrorCode errorCode) { |
||||
|
super(errorCode); |
||||
|
} |
||||
|
|
||||
|
public CliException(ErrorCode errorCode, Throwable cause) { |
||||
|
super(errorCode, cause); |
||||
|
} |
||||
|
|
||||
|
public CliException(ErrorCode errorCode, String detail) { |
||||
|
super(errorCode, detail); |
||||
|
} |
||||
|
|
||||
|
public CliException(ErrorCode errorCode, String detail, Throwable cause) { |
||||
|
super(errorCode, detail, cause); |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,20 @@ |
|||||
|
package com.weibo.hotsearch.exception; |
||||
|
|
||||
|
public class DataParseException extends HotSearchException { |
||||
|
|
||||
|
public DataParseException(ErrorCode errorCode) { |
||||
|
super(errorCode); |
||||
|
} |
||||
|
|
||||
|
public DataParseException(ErrorCode errorCode, Throwable cause) { |
||||
|
super(errorCode, cause); |
||||
|
} |
||||
|
|
||||
|
public DataParseException(ErrorCode errorCode, String detail) { |
||||
|
super(errorCode, detail); |
||||
|
} |
||||
|
|
||||
|
public DataParseException(ErrorCode errorCode, String detail, Throwable cause) { |
||||
|
super(errorCode, detail, cause); |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,47 @@ |
|||||
|
package com.weibo.hotsearch.exception; |
||||
|
|
||||
|
public enum ErrorCode { |
||||
|
|
||||
|
// 通用错误
|
||||
|
SUCCESS(0, "操作成功"), |
||||
|
UNKNOWN_ERROR(1000, "未知错误"), |
||||
|
PARAMETER_ERROR(1001, "参数错误"), |
||||
|
NOT_FOUND(1002, "资源未找到"), |
||||
|
DUPLICATE_ERROR(1003, "重复操作"), |
||||
|
|
||||
|
// CLI错误
|
||||
|
CLI_PARSE_ERROR(2001, "命令行参数解析错误"), |
||||
|
CLI_COMMAND_NOT_FOUND(2002, "命令未找到"), |
||||
|
CLI_INVALID_OPTION(2003, "无效的选项"), |
||||
|
|
||||
|
// 网络错误
|
||||
|
NETWORK_ERROR(3001, "网络请求失败"), |
||||
|
CONNECTION_TIMEOUT(3002, "连接超时"), |
||||
|
HTTP_ERROR(3003, "HTTP请求错误"), |
||||
|
|
||||
|
// 数据错误
|
||||
|
DATA_PARSE_ERROR(4001, "数据解析失败"), |
||||
|
DATA_FORMAT_ERROR(4002, "数据格式错误"), |
||||
|
DATA_EMPTY(4003, "数据为空"), |
||||
|
|
||||
|
// 服务错误
|
||||
|
SERVICE_UNAVAILABLE(5001, "服务不可用"), |
||||
|
SERVICE_RATE_LIMITED(5002, "请求被限流"), |
||||
|
AUTHENTICATION_FAILED(5003, "认证失败"); |
||||
|
|
||||
|
private final int code; |
||||
|
private final String message; |
||||
|
|
||||
|
ErrorCode(int code, String message) { |
||||
|
this.code = code; |
||||
|
this.message = message; |
||||
|
} |
||||
|
|
||||
|
public int getCode() { |
||||
|
return code; |
||||
|
} |
||||
|
|
||||
|
public String getMessage() { |
||||
|
return message; |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,30 @@ |
|||||
|
package com.weibo.hotsearch.exception; |
||||
|
|
||||
|
public class HotSearchException extends Exception { |
||||
|
|
||||
|
private final ErrorCode errorCode; |
||||
|
|
||||
|
public HotSearchException(ErrorCode errorCode) { |
||||
|
super(errorCode.getMessage()); |
||||
|
this.errorCode = errorCode; |
||||
|
} |
||||
|
|
||||
|
public HotSearchException(ErrorCode errorCode, Throwable cause) { |
||||
|
super(errorCode.getMessage(), cause); |
||||
|
this.errorCode = errorCode; |
||||
|
} |
||||
|
|
||||
|
public HotSearchException(ErrorCode errorCode, String detail) { |
||||
|
super(errorCode.getMessage() + ": " + detail); |
||||
|
this.errorCode = errorCode; |
||||
|
} |
||||
|
|
||||
|
public HotSearchException(ErrorCode errorCode, String detail, Throwable cause) { |
||||
|
super(errorCode.getMessage() + ": " + detail, cause); |
||||
|
this.errorCode = errorCode; |
||||
|
} |
||||
|
|
||||
|
public ErrorCode getErrorCode() { |
||||
|
return errorCode; |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,20 @@ |
|||||
|
package com.weibo.hotsearch.exception; |
||||
|
|
||||
|
public class NetworkException extends HotSearchException { |
||||
|
|
||||
|
public NetworkException(ErrorCode errorCode) { |
||||
|
super(errorCode); |
||||
|
} |
||||
|
|
||||
|
public NetworkException(ErrorCode errorCode, Throwable cause) { |
||||
|
super(errorCode, cause); |
||||
|
} |
||||
|
|
||||
|
public NetworkException(ErrorCode errorCode, String detail) { |
||||
|
super(errorCode, detail); |
||||
|
} |
||||
|
|
||||
|
public NetworkException(ErrorCode errorCode, String detail, Throwable cause) { |
||||
|
super(errorCode, detail, cause); |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,49 @@ |
|||||
|
package com.weibo.hotsearch.model; |
||||
|
|
||||
|
import java.util.HashMap; |
||||
|
import java.util.Map; |
||||
|
|
||||
|
public class AppContext { |
||||
|
|
||||
|
private static final AppContext instance = new AppContext(); |
||||
|
|
||||
|
private final Map<String, Object> attributes = new HashMap<>(); |
||||
|
|
||||
|
private HotSearchResult currentResult; |
||||
|
|
||||
|
private AppContext() { |
||||
|
} |
||||
|
|
||||
|
public static AppContext getInstance() { |
||||
|
return instance; |
||||
|
} |
||||
|
|
||||
|
public void setAttribute(String key, Object value) { |
||||
|
attributes.put(key, value); |
||||
|
} |
||||
|
|
||||
|
public Object getAttribute(String key) { |
||||
|
return attributes.get(key); |
||||
|
} |
||||
|
|
||||
|
public void removeAttribute(String key) { |
||||
|
attributes.remove(key); |
||||
|
} |
||||
|
|
||||
|
public boolean hasAttribute(String key) { |
||||
|
return attributes.containsKey(key); |
||||
|
} |
||||
|
|
||||
|
public HotSearchResult getCurrentResult() { |
||||
|
return currentResult; |
||||
|
} |
||||
|
|
||||
|
public void setCurrentResult(HotSearchResult currentResult) { |
||||
|
this.currentResult = currentResult; |
||||
|
} |
||||
|
|
||||
|
public void clear() { |
||||
|
attributes.clear(); |
||||
|
currentResult = null; |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,69 @@ |
|||||
|
package com.weibo.hotsearch.model; |
||||
|
|
||||
|
import java.time.LocalDateTime; |
||||
|
|
||||
|
public class HotSearchItem { |
||||
|
|
||||
|
private String title; |
||||
|
private long hotValue; |
||||
|
private int rank; |
||||
|
private String source; |
||||
|
private LocalDateTime fetchTime; |
||||
|
|
||||
|
public HotSearchItem() { |
||||
|
} |
||||
|
|
||||
|
public HotSearchItem(String title, long hotValue, int rank, String source) { |
||||
|
this.title = title; |
||||
|
this.hotValue = hotValue; |
||||
|
this.rank = rank; |
||||
|
this.source = source; |
||||
|
this.fetchTime = LocalDateTime.now(); |
||||
|
} |
||||
|
|
||||
|
public String getTitle() { |
||||
|
return title; |
||||
|
} |
||||
|
|
||||
|
public void setTitle(String title) { |
||||
|
this.title = title; |
||||
|
} |
||||
|
|
||||
|
public long getHotValue() { |
||||
|
return hotValue; |
||||
|
} |
||||
|
|
||||
|
public void setHotValue(long hotValue) { |
||||
|
this.hotValue = hotValue; |
||||
|
} |
||||
|
|
||||
|
public int getRank() { |
||||
|
return rank; |
||||
|
} |
||||
|
|
||||
|
public void setRank(int rank) { |
||||
|
this.rank = rank; |
||||
|
} |
||||
|
|
||||
|
public String getSource() { |
||||
|
return source; |
||||
|
} |
||||
|
|
||||
|
public void setSource(String source) { |
||||
|
this.source = source; |
||||
|
} |
||||
|
|
||||
|
public LocalDateTime getFetchTime() { |
||||
|
return fetchTime; |
||||
|
} |
||||
|
|
||||
|
public void setFetchTime(LocalDateTime fetchTime) { |
||||
|
this.fetchTime = fetchTime; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String toString() { |
||||
|
return String.format("HotSearchItem{title='%s', hotValue=%d, rank=%d, source='%s'}", |
||||
|
title, hotValue, rank, source); |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,75 @@ |
|||||
|
package com.weibo.hotsearch.model; |
||||
|
|
||||
|
import java.time.LocalDateTime; |
||||
|
import java.util.ArrayList; |
||||
|
import java.util.List; |
||||
|
|
||||
|
public class HotSearchResult { |
||||
|
|
||||
|
private List<HotSearchItem> items; |
||||
|
private String filterName; |
||||
|
private String dataSource; |
||||
|
private LocalDateTime fetchTime; |
||||
|
private int totalCount; |
||||
|
|
||||
|
public HotSearchResult() { |
||||
|
this.items = new ArrayList<>(); |
||||
|
this.fetchTime = LocalDateTime.now(); |
||||
|
} |
||||
|
|
||||
|
public HotSearchResult(List<HotSearchItem> items, String filterName, String dataSource) { |
||||
|
this.items = items != null ? items : new ArrayList<>(); |
||||
|
this.filterName = filterName; |
||||
|
this.dataSource = dataSource; |
||||
|
this.fetchTime = LocalDateTime.now(); |
||||
|
this.totalCount = this.items.size(); |
||||
|
} |
||||
|
|
||||
|
public List<HotSearchItem> getItems() { |
||||
|
return items; |
||||
|
} |
||||
|
|
||||
|
public void setItems(List<HotSearchItem> items) { |
||||
|
this.items = items != null ? items : new ArrayList<>(); |
||||
|
this.totalCount = this.items.size(); |
||||
|
} |
||||
|
|
||||
|
public String getFilterName() { |
||||
|
return filterName; |
||||
|
} |
||||
|
|
||||
|
public void setFilterName(String filterName) { |
||||
|
this.filterName = filterName; |
||||
|
} |
||||
|
|
||||
|
public String getDataSource() { |
||||
|
return dataSource; |
||||
|
} |
||||
|
|
||||
|
public void setDataSource(String dataSource) { |
||||
|
this.dataSource = dataSource; |
||||
|
} |
||||
|
|
||||
|
public LocalDateTime getFetchTime() { |
||||
|
return fetchTime; |
||||
|
} |
||||
|
|
||||
|
public void setFetchTime(LocalDateTime fetchTime) { |
||||
|
this.fetchTime = fetchTime; |
||||
|
} |
||||
|
|
||||
|
public int getTotalCount() { |
||||
|
return totalCount; |
||||
|
} |
||||
|
|
||||
|
public void addItem(HotSearchItem item) { |
||||
|
if (item != null) { |
||||
|
this.items.add(item); |
||||
|
this.totalCount++; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
public boolean isEmpty() { |
||||
|
return items == null || items.isEmpty(); |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,307 @@ |
|||||
|
package com.weibo.hotsearch.service; |
||||
|
|
||||
|
import com.alibaba.fastjson2.JSONArray; |
||||
|
import com.alibaba.fastjson2.JSONObject; |
||||
|
import com.weibo.hotsearch.exception.DataParseException; |
||||
|
import com.weibo.hotsearch.exception.ErrorCode; |
||||
|
import com.weibo.hotsearch.exception.HotSearchException; |
||||
|
import com.weibo.hotsearch.exception.NetworkException; |
||||
|
import com.weibo.hotsearch.model.AppContext; |
||||
|
import com.weibo.hotsearch.model.HotSearchItem; |
||||
|
import com.weibo.hotsearch.model.HotSearchResult; |
||||
|
import org.apache.hc.client5.http.fluent.Request; |
||||
|
import org.apache.hc.core5.util.Timeout; |
||||
|
|
||||
|
import java.nio.charset.StandardCharsets; |
||||
|
import java.time.LocalDateTime; |
||||
|
import java.util.ArrayList; |
||||
|
import java.util.List; |
||||
|
|
||||
|
public class DataFetcher { |
||||
|
|
||||
|
private static final String WEIBO_URL = "https://weibo.com/ajax/side/hotSearch"; |
||||
|
private static final String TIEBA_URL = "https://tieba.baidu.com/hottopic/browse/topicList"; |
||||
|
private static final String ZHIHU_URL = "https://www.zhihu.com/api/v4/search/top_search"; |
||||
|
|
||||
|
private static final int CONNECT_TIMEOUT = 10000; |
||||
|
private static final int RESPONSE_TIMEOUT = 10000; |
||||
|
private static final int MAX_RETRIES = 3; |
||||
|
|
||||
|
public void fetch(String source) throws HotSearchException { |
||||
|
List<HotSearchItem> items = new ArrayList<>(); |
||||
|
|
||||
|
switch (source.toLowerCase()) { |
||||
|
case "weibo": |
||||
|
items = fetchWeibo(); |
||||
|
break; |
||||
|
case "tieba": |
||||
|
items = fetchTieba(); |
||||
|
break; |
||||
|
case "zhihu": |
||||
|
items = fetchZhihu(); |
||||
|
break; |
||||
|
case "all": |
||||
|
items.addAll(fetchWeibo()); |
||||
|
items.addAll(fetchTieba()); |
||||
|
items.addAll(fetchZhihu()); |
||||
|
break; |
||||
|
default: |
||||
|
throw new HotSearchException(ErrorCode.PARAMETER_ERROR, "未知数据源: " + source); |
||||
|
} |
||||
|
|
||||
|
HotSearchResult result = new HotSearchResult(items, null, source); |
||||
|
AppContext.getInstance().setCurrentResult(result); |
||||
|
} |
||||
|
|
||||
|
private List<HotSearchItem> fetchWeibo() throws HotSearchException { |
||||
|
List<HotSearchItem> items = new ArrayList<>(); |
||||
|
try { |
||||
|
String json = fetchUrlWithRetry(WEIBO_URL, "https://weibo.com/"); |
||||
|
JSONObject root = JSONObject.parseObject(json); |
||||
|
|
||||
|
if (!root.containsKey("data")) { |
||||
|
throw new DataParseException(ErrorCode.DATA_FORMAT_ERROR, "微博数据格式错误"); |
||||
|
} |
||||
|
|
||||
|
JSONObject data = root.getJSONObject("data"); |
||||
|
if (!data.containsKey("realtime")) { |
||||
|
throw new DataParseException(ErrorCode.DATA_EMPTY, "微博数据为空"); |
||||
|
} |
||||
|
|
||||
|
JSONArray realtime = data.getJSONArray("realtime"); |
||||
|
for (int i = 0; i < realtime.size(); i++) { |
||||
|
JSONObject item = realtime.getJSONObject(i); |
||||
|
if (item != null) { |
||||
|
HotSearchItem hotItem = parseWeiboItem(item); |
||||
|
if (hotItem != null) { |
||||
|
items.add(hotItem); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
} catch (NetworkException e) { |
||||
|
throw e; |
||||
|
} catch (Exception e) { |
||||
|
throw new DataParseException(ErrorCode.DATA_PARSE_ERROR, "微博数据解析失败", e); |
||||
|
} |
||||
|
return items; |
||||
|
} |
||||
|
|
||||
|
private HotSearchItem parseWeiboItem(JSONObject item) { |
||||
|
String word = item.getString("word"); |
||||
|
if (word == null || word.isEmpty()) { |
||||
|
return null; |
||||
|
} |
||||
|
long num = item.getLongValue("num", 0); |
||||
|
int rank = item.getIntValue("rank", 0); |
||||
|
return new HotSearchItem(word, num, rank, "微博"); |
||||
|
} |
||||
|
|
||||
|
private List<HotSearchItem> fetchTieba() throws HotSearchException { |
||||
|
List<HotSearchItem> items = new ArrayList<>(); |
||||
|
try { |
||||
|
String json = fetchUrlWithRetry(TIEBA_URL, "https://tieba.baidu.com/"); |
||||
|
JSONObject root = JSONObject.parseObject(json); |
||||
|
|
||||
|
if (!root.containsKey("data")) { |
||||
|
throw new DataParseException(ErrorCode.DATA_FORMAT_ERROR, "贴吧数据格式错误"); |
||||
|
} |
||||
|
|
||||
|
JSONObject data = root.getJSONObject("data"); |
||||
|
if (!data.containsKey("bang_topic")) { |
||||
|
throw new DataParseException(ErrorCode.DATA_EMPTY, "贴吧数据为空"); |
||||
|
} |
||||
|
|
||||
|
JSONArray topics = data.getJSONArray("bang_topic"); |
||||
|
for (int i = 0; i < topics.size(); i++) { |
||||
|
JSONObject item = topics.getJSONObject(i); |
||||
|
if (item != null) { |
||||
|
HotSearchItem hotItem = parseTiebaItem(item, i + 1); |
||||
|
if (hotItem != null) { |
||||
|
items.add(hotItem); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
} catch (NetworkException e) { |
||||
|
throw e; |
||||
|
} catch (Exception e) { |
||||
|
throw new DataParseException(ErrorCode.DATA_PARSE_ERROR, "贴吧数据解析失败", e); |
||||
|
} |
||||
|
return items; |
||||
|
} |
||||
|
|
||||
|
private HotSearchItem parseTiebaItem(JSONObject item, int index) { |
||||
|
String topicName = item.getString("topic_name"); |
||||
|
if (topicName == null || topicName.isEmpty()) { |
||||
|
return null; |
||||
|
} |
||||
|
int readNum = item.getIntValue("read_num", 0); |
||||
|
int discussNum = item.getIntValue("discuss_num", 0); |
||||
|
return new HotSearchItem(topicName, (long) readNum + discussNum, 0, "百度贴吧"); |
||||
|
} |
||||
|
|
||||
|
private List<HotSearchItem> fetchZhihu() throws HotSearchException { |
||||
|
List<HotSearchItem> items = new ArrayList<>(); |
||||
|
try { |
||||
|
String json = fetchUrlWithRetry(ZHIHU_URL, "https://zhuanlan.zhihu.com/"); |
||||
|
JSONObject root = JSONObject.parseObject(json); |
||||
|
|
||||
|
JSONArray data = findZhihuData(root, json); |
||||
|
if (data == null || data.isEmpty()) { |
||||
|
throw new DataParseException(ErrorCode.DATA_EMPTY, "知乎数据为空"); |
||||
|
} |
||||
|
|
||||
|
for (int i = 0; i < data.size(); i++) { |
||||
|
JSONObject item = data.getJSONObject(i); |
||||
|
if (item != null) { |
||||
|
HotSearchItem hotItem = parseZhihuItem(item, i + 1); |
||||
|
if (hotItem != null) { |
||||
|
items.add(hotItem); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
} catch (NetworkException e) { |
||||
|
throw e; |
||||
|
} catch (Exception e) { |
||||
|
throw new DataParseException(ErrorCode.DATA_PARSE_ERROR, "知乎数据解析失败", e); |
||||
|
} |
||||
|
return items; |
||||
|
} |
||||
|
|
||||
|
private JSONArray findZhihuData(JSONObject root, String json) { |
||||
|
if (root.containsKey("data") && root.get("data") instanceof JSONArray) { |
||||
|
return root.getJSONArray("data"); |
||||
|
} else if (root.containsKey("data")) { |
||||
|
JSONObject dataObj = root.getJSONObject("data"); |
||||
|
if (dataObj != null && dataObj.containsKey("topics")) { |
||||
|
return dataObj.getJSONArray("topics"); |
||||
|
} |
||||
|
} else if (root.containsKey("top_search")) { |
||||
|
JSONObject topSearch = root.getJSONObject("top_search"); |
||||
|
if (topSearch != null && topSearch.containsKey("words")) { |
||||
|
return topSearch.getJSONArray("words"); |
||||
|
} |
||||
|
} else if (json.startsWith("[")) { |
||||
|
return JSONArray.parseArray(json); |
||||
|
} |
||||
|
return null; |
||||
|
} |
||||
|
|
||||
|
private HotSearchItem parseZhihuItem(JSONObject item, int index) { |
||||
|
String title = getItemTitle(item); |
||||
|
if (title == null || title.isEmpty()) { |
||||
|
return null; |
||||
|
} |
||||
|
|
||||
|
long hotValue = 0; |
||||
|
if (item.containsKey("hot_score")) { |
||||
|
hotValue = item.getLongValue("hot_score", 0); |
||||
|
} else if (item.containsKey("score")) { |
||||
|
hotValue = item.getLongValue("score", 0); |
||||
|
} else if (item.containsKey("detail_text")) { |
||||
|
String detailText = item.getString("detail_text"); |
||||
|
if (detailText != null) { |
||||
|
try { |
||||
|
String numStr = detailText.replaceAll("[^0-9]", ""); |
||||
|
if (!numStr.isEmpty()) { |
||||
|
hotValue = Long.parseLong(numStr); |
||||
|
} |
||||
|
} catch (Exception e) { |
||||
|
hotValue = 0; |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
return new HotSearchItem(title, hotValue, index, "知乎"); |
||||
|
} |
||||
|
|
||||
|
private String getItemTitle(JSONObject item) { |
||||
|
if (item == null) { |
||||
|
return null; |
||||
|
} |
||||
|
|
||||
|
String title = item.getString("title"); |
||||
|
if (title != null && !title.isEmpty()) { |
||||
|
return title; |
||||
|
} |
||||
|
|
||||
|
title = item.getString("topic_title"); |
||||
|
if (title != null && !title.isEmpty()) { |
||||
|
return title; |
||||
|
} |
||||
|
|
||||
|
title = item.getString("name"); |
||||
|
if (title != null && !title.isEmpty()) { |
||||
|
return title; |
||||
|
} |
||||
|
|
||||
|
if (item.containsKey("target")) { |
||||
|
JSONObject target = item.getJSONObject("target"); |
||||
|
if (target != null) { |
||||
|
title = target.getString("title"); |
||||
|
if (title != null && !title.isEmpty()) { |
||||
|
return title; |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
title = item.getString("display_query"); |
||||
|
if (title != null && !title.isEmpty()) { |
||||
|
return title; |
||||
|
} |
||||
|
|
||||
|
return item.getString("query"); |
||||
|
} |
||||
|
|
||||
|
private String fetchUrlWithRetry(String url, String referer) throws NetworkException { |
||||
|
int retryCount = 0; |
||||
|
Exception lastException = null; |
||||
|
|
||||
|
while (retryCount < MAX_RETRIES) { |
||||
|
try { |
||||
|
return Request.get(url) |
||||
|
.addHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36") |
||||
|
.addHeader("Referer", referer) |
||||
|
.addHeader("Accept", "application/json, text/plain, */*;charset=UTF-8") |
||||
|
.addHeader("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8") |
||||
|
.addHeader("Accept-Encoding", "identity") |
||||
|
.addHeader("Connection", "keep-alive") |
||||
|
.addHeader("Content-Type", "application/json;charset=UTF-8") |
||||
|
.connectTimeout(Timeout.ofMilliseconds(CONNECT_TIMEOUT)) |
||||
|
.responseTimeout(Timeout.ofMilliseconds(RESPONSE_TIMEOUT)) |
||||
|
.execute() |
||||
|
.returnContent() |
||||
|
.asString(StandardCharsets.UTF_8); |
||||
|
} catch (java.net.SocketTimeoutException e) { |
||||
|
lastException = e; |
||||
|
retryCount++; |
||||
|
if (retryCount >= MAX_RETRIES) { |
||||
|
throw new NetworkException(ErrorCode.CONNECTION_TIMEOUT, "连接超时", e); |
||||
|
} |
||||
|
sleep(2000); |
||||
|
} catch (java.net.ConnectException e) { |
||||
|
lastException = e; |
||||
|
retryCount++; |
||||
|
if (retryCount >= MAX_RETRIES) { |
||||
|
throw new NetworkException(ErrorCode.NETWORK_ERROR, "连接失败", e); |
||||
|
} |
||||
|
sleep(2000); |
||||
|
} catch (Exception e) { |
||||
|
lastException = e; |
||||
|
retryCount++; |
||||
|
if (retryCount >= MAX_RETRIES) { |
||||
|
throw new NetworkException(ErrorCode.NETWORK_ERROR, "网络请求失败", e); |
||||
|
} |
||||
|
sleep(2000); |
||||
|
} |
||||
|
} |
||||
|
throw new NetworkException(ErrorCode.NETWORK_ERROR, "网络请求失败", lastException); |
||||
|
} |
||||
|
|
||||
|
private void sleep(long millis) { |
||||
|
try { |
||||
|
Thread.sleep(millis); |
||||
|
} catch (InterruptedException e) { |
||||
|
Thread.currentThread().interrupt(); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,44 @@ |
|||||
|
package com.weibo.hotsearch.service; |
||||
|
|
||||
|
import com.alibaba.fastjson2.JSONObject; |
||||
|
import com.weibo.hotsearch.exception.ErrorCode; |
||||
|
import com.weibo.hotsearch.exception.HotSearchException; |
||||
|
import com.weibo.hotsearch.model.AppContext; |
||||
|
import com.weibo.hotsearch.model.HotSearchItem; |
||||
|
import com.weibo.hotsearch.model.HotSearchResult; |
||||
|
import com.weibo.hotsearch.strategy.FilterStrategy; |
||||
|
|
||||
|
import java.util.ArrayList; |
||||
|
import java.util.List; |
||||
|
|
||||
|
public class FilterService { |
||||
|
|
||||
|
public void filter(FilterStrategy strategy) throws HotSearchException { |
||||
|
HotSearchResult currentResult = AppContext.getInstance().getCurrentResult(); |
||||
|
|
||||
|
if (currentResult == null || currentResult.isEmpty()) { |
||||
|
throw new HotSearchException(ErrorCode.DATA_EMPTY, "没有可过滤的数据,请先获取数据"); |
||||
|
} |
||||
|
|
||||
|
List<HotSearchItem> filteredItems = new ArrayList<>(); |
||||
|
|
||||
|
for (HotSearchItem item : currentResult.getItems()) { |
||||
|
JSONObject jsonItem = convertToJson(item); |
||||
|
if (strategy.match(jsonItem)) { |
||||
|
filteredItems.add(item); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
HotSearchResult filteredResult = new HotSearchResult(filteredItems, strategy.getFilterName(), currentResult.getDataSource()); |
||||
|
AppContext.getInstance().setCurrentResult(filteredResult); |
||||
|
} |
||||
|
|
||||
|
private JSONObject convertToJson(HotSearchItem item) { |
||||
|
JSONObject json = new JSONObject(); |
||||
|
json.put("word", item.getTitle()); |
||||
|
json.put("title", item.getTitle()); |
||||
|
json.put("num", item.getHotValue()); |
||||
|
json.put("rank", item.getRank()); |
||||
|
return json; |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,66 @@ |
|||||
|
package com.weibo.hotsearch.service; |
||||
|
|
||||
|
import com.weibo.hotsearch.exception.ErrorCode; |
||||
|
import com.weibo.hotsearch.exception.HotSearchException; |
||||
|
import com.weibo.hotsearch.model.AppContext; |
||||
|
import com.weibo.hotsearch.model.HotSearchResult; |
||||
|
import com.weibo.hotsearch.view.TextView; |
||||
|
import com.weibo.hotsearch.view.View; |
||||
|
import com.weibo.hotsearch.view.ViewFactory; |
||||
|
|
||||
|
import java.io.BufferedWriter; |
||||
|
import java.io.File; |
||||
|
import java.io.FileWriter; |
||||
|
import java.io.IOException; |
||||
|
import java.time.LocalDateTime; |
||||
|
import java.time.format.DateTimeFormatter; |
||||
|
|
||||
|
public class OutputService { |
||||
|
|
||||
|
private static final DateTimeFormatter FILE_FORMATTER = DateTimeFormatter.ofPattern("yyyyMMdd_HHmmss"); |
||||
|
private static final String OUTPUT_DIR = "hotsearch_results"; |
||||
|
|
||||
|
public void output(String outputType) throws HotSearchException { |
||||
|
HotSearchResult result = AppContext.getInstance().getCurrentResult(); |
||||
|
|
||||
|
if (result == null) { |
||||
|
throw new HotSearchException(ErrorCode.DATA_EMPTY, "没有可输出的数据"); |
||||
|
} |
||||
|
|
||||
|
View view = ViewFactory.getView(outputType); |
||||
|
view.render(result); |
||||
|
} |
||||
|
|
||||
|
public void saveToFile(String filePath) throws HotSearchException { |
||||
|
HotSearchResult result = AppContext.getInstance().getCurrentResult(); |
||||
|
|
||||
|
if (result == null) { |
||||
|
throw new HotSearchException(ErrorCode.DATA_EMPTY, "没有可保存的数据"); |
||||
|
} |
||||
|
|
||||
|
try { |
||||
|
File dir = new File(OUTPUT_DIR); |
||||
|
if (!dir.exists()) { |
||||
|
dir.mkdirs(); |
||||
|
} |
||||
|
|
||||
|
String actualPath = filePath; |
||||
|
if (filePath == null || filePath.isEmpty()) { |
||||
|
String timestamp = LocalDateTime.now().format(FILE_FORMATTER); |
||||
|
actualPath = OUTPUT_DIR + File.separator + "hotsearch_" + timestamp + ".txt"; |
||||
|
} |
||||
|
|
||||
|
TextView textView = (TextView) ViewFactory.getView("text"); |
||||
|
String content = textView.renderToString(result); |
||||
|
|
||||
|
try (BufferedWriter writer = new BufferedWriter(new FileWriter(actualPath))) { |
||||
|
writer.write(content); |
||||
|
} |
||||
|
|
||||
|
System.out.println("\n结果已保存到文件: " + actualPath); |
||||
|
|
||||
|
} catch (IOException e) { |
||||
|
throw new HotSearchException(ErrorCode.UNKNOWN_ERROR, "保存文件失败", e); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,12 @@ |
|||||
|
package com.weibo.hotsearch.strategy; |
||||
|
|
||||
|
import com.alibaba.fastjson2.JSONObject; |
||||
|
|
||||
|
public interface FilterStrategy { |
||||
|
|
||||
|
boolean match(JSONObject item); |
||||
|
|
||||
|
String getFilterName(); |
||||
|
|
||||
|
String getFilterCode(); |
||||
|
} |
||||
@ -0,0 +1,30 @@ |
|||||
|
package com.weibo.hotsearch.strategy; |
||||
|
|
||||
|
import java.util.HashMap; |
||||
|
import java.util.Map; |
||||
|
|
||||
|
public class FilterStrategyFactory { |
||||
|
|
||||
|
private static final Map<String, FilterStrategy> strategies = new HashMap<>(); |
||||
|
|
||||
|
static { |
||||
|
strategies.put("star", new StarFilterStrategy()); |
||||
|
strategies.put("sports", new SportsFilterStrategy()); |
||||
|
strategies.put("policy", new PolicyFilterStrategy()); |
||||
|
} |
||||
|
|
||||
|
public static FilterStrategy getStrategy(String code) { |
||||
|
if (code == null || code.isEmpty()) { |
||||
|
return null; |
||||
|
} |
||||
|
return strategies.get(code.toLowerCase()); |
||||
|
} |
||||
|
|
||||
|
public static Map<String, FilterStrategy> getAllStrategies() { |
||||
|
return new HashMap<>(strategies); |
||||
|
} |
||||
|
|
||||
|
public static boolean hasStrategy(String code) { |
||||
|
return code != null && strategies.containsKey(code.toLowerCase()); |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,55 @@ |
|||||
|
package com.weibo.hotsearch.strategy; |
||||
|
|
||||
|
import com.alibaba.fastjson2.JSONObject; |
||||
|
|
||||
|
public class PolicyFilterStrategy implements FilterStrategy { |
||||
|
|
||||
|
private static final String[] KEYWORDS = { |
||||
|
"政策", "新规", "条例", "法规", "通知", "公告", "发布", |
||||
|
"国务院", "发改委", "财政部", "教育部", "工信部", "科技部", |
||||
|
"税收", "补贴", "优惠", "扶持", "改革", "开放", "创新", |
||||
|
"十四五", "计划", "规划", "方案", "意见", "办法", "细则", |
||||
|
"经济", "金融", "市场", "监管", "安全", "环保", "绿色" |
||||
|
}; |
||||
|
|
||||
|
@Override |
||||
|
public boolean match(JSONObject item) { |
||||
|
if (item == null) { |
||||
|
return false; |
||||
|
} |
||||
|
String word = getItemTitle(item); |
||||
|
if (word == null || word.isEmpty()) { |
||||
|
return false; |
||||
|
} |
||||
|
for (String keyword : KEYWORDS) { |
||||
|
if (word.contains(keyword)) { |
||||
|
return true; |
||||
|
} |
||||
|
} |
||||
|
return false; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String getFilterName() { |
||||
|
return "国家政策相关热搜"; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String getFilterCode() { |
||||
|
return "policy"; |
||||
|
} |
||||
|
|
||||
|
private String getItemTitle(JSONObject item) { |
||||
|
String title = item.getString("word"); |
||||
|
if (title == null || title.isEmpty()) { |
||||
|
title = item.getString("topic_name"); |
||||
|
} |
||||
|
if (title == null || title.isEmpty()) { |
||||
|
title = item.getString("title"); |
||||
|
} |
||||
|
if (title == null || title.isEmpty()) { |
||||
|
title = item.getString("name"); |
||||
|
} |
||||
|
return title; |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,54 @@ |
|||||
|
package com.weibo.hotsearch.strategy; |
||||
|
|
||||
|
import com.alibaba.fastjson2.JSONObject; |
||||
|
|
||||
|
public class SportsFilterStrategy implements FilterStrategy { |
||||
|
|
||||
|
private static final String[] KEYWORDS = { |
||||
|
"足球", "篮球", "世界杯", "NBA", "CBA", "奥运会", "世锦赛", |
||||
|
"冠军", "比赛", "夺冠", "进球", "比分", "运动员", "国足", |
||||
|
"乒乓", "排球", "羽毛球", "游泳", "田径", "体操", "跳水", |
||||
|
"MVP", "转会", "联赛", "中超", "英超", "西甲", "欧冠" |
||||
|
}; |
||||
|
|
||||
|
@Override |
||||
|
public boolean match(JSONObject item) { |
||||
|
if (item == null) { |
||||
|
return false; |
||||
|
} |
||||
|
String word = getItemTitle(item); |
||||
|
if (word == null || word.isEmpty()) { |
||||
|
return false; |
||||
|
} |
||||
|
for (String keyword : KEYWORDS) { |
||||
|
if (word.contains(keyword)) { |
||||
|
return true; |
||||
|
} |
||||
|
} |
||||
|
return false; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String getFilterName() { |
||||
|
return "体育相关热搜"; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String getFilterCode() { |
||||
|
return "sports"; |
||||
|
} |
||||
|
|
||||
|
private String getItemTitle(JSONObject item) { |
||||
|
String title = item.getString("word"); |
||||
|
if (title == null || title.isEmpty()) { |
||||
|
title = item.getString("topic_name"); |
||||
|
} |
||||
|
if (title == null || title.isEmpty()) { |
||||
|
title = item.getString("title"); |
||||
|
} |
||||
|
if (title == null || title.isEmpty()) { |
||||
|
title = item.getString("name"); |
||||
|
} |
||||
|
return title; |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,76 @@ |
|||||
|
package com.weibo.hotsearch.strategy; |
||||
|
|
||||
|
import com.alibaba.fastjson2.JSONObject; |
||||
|
|
||||
|
public class StarFilterStrategy implements FilterStrategy { |
||||
|
|
||||
|
private static final String[] KEYWORDS = { |
||||
|
"明星", "演员", "歌手", "爱豆", "艺人", "红毯", "综艺", "新剧", |
||||
|
"恋情", "官宣", "演唱会", "代言", "造型", "封面" |
||||
|
}; |
||||
|
|
||||
|
@Override |
||||
|
public boolean match(JSONObject item) { |
||||
|
if (item == null) { |
||||
|
return false; |
||||
|
} |
||||
|
String word = getItemTitle(item); |
||||
|
if (word == null || word.isEmpty()) { |
||||
|
return false; |
||||
|
} |
||||
|
for (String keyword : KEYWORDS) { |
||||
|
if (word.contains(keyword)) { |
||||
|
return true; |
||||
|
} |
||||
|
} |
||||
|
return false; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String getFilterName() { |
||||
|
return "明星相关热搜"; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String getFilterCode() { |
||||
|
return "star"; |
||||
|
} |
||||
|
|
||||
|
private String getItemTitle(JSONObject item) { |
||||
|
if (item == null) { |
||||
|
return null; |
||||
|
} |
||||
|
|
||||
|
String title = item.getString("word"); |
||||
|
if (title != null && !title.isEmpty()) { |
||||
|
return title; |
||||
|
} |
||||
|
|
||||
|
title = item.getString("topic_name"); |
||||
|
if (title != null && !title.isEmpty()) { |
||||
|
return title; |
||||
|
} |
||||
|
|
||||
|
title = item.getString("title"); |
||||
|
if (title != null && !title.isEmpty()) { |
||||
|
return title; |
||||
|
} |
||||
|
|
||||
|
title = item.getString("name"); |
||||
|
if (title != null && !title.isEmpty()) { |
||||
|
return title; |
||||
|
} |
||||
|
|
||||
|
if (item.containsKey("target")) { |
||||
|
JSONObject target = item.getJSONObject("target"); |
||||
|
if (target != null) { |
||||
|
title = target.getString("title"); |
||||
|
if (title != null && !title.isEmpty()) { |
||||
|
return title; |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
return title; |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,51 @@ |
|||||
|
package com.weibo.hotsearch.view; |
||||
|
|
||||
|
import com.weibo.hotsearch.model.HotSearchItem; |
||||
|
import com.weibo.hotsearch.model.HotSearchResult; |
||||
|
|
||||
|
import java.time.format.DateTimeFormatter; |
||||
|
|
||||
|
public class ConsoleView implements View { |
||||
|
|
||||
|
private static final DateTimeFormatter FORMATTER = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss"); |
||||
|
|
||||
|
@Override |
||||
|
public void render(HotSearchResult result) { |
||||
|
if (result == null) { |
||||
|
System.out.println("没有数据可显示"); |
||||
|
return; |
||||
|
} |
||||
|
|
||||
|
System.out.println("\n===== " + (result.getFilterName() != null ? result.getFilterName() : "热搜结果") + " ====="); |
||||
|
System.out.println("数据源: " + result.getDataSource()); |
||||
|
System.out.println("采集时间: " + result.getFetchTime().format(FORMATTER)); |
||||
|
System.out.println("----------------------------------------"); |
||||
|
|
||||
|
if (result.isEmpty()) { |
||||
|
System.out.println("当前暂无符合条件的热搜内容"); |
||||
|
} else { |
||||
|
for (int i = 0; i < result.getItems().size(); i++) { |
||||
|
HotSearchItem item = result.getItems().get(i); |
||||
|
String line = formatItem(item, i); |
||||
|
System.out.println(line); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
System.out.println("\n===== 热搜总数:" + result.getTotalCount() + " 条 ====="); |
||||
|
} |
||||
|
|
||||
|
private String formatItem(HotSearchItem item, int index) { |
||||
|
if (item.getRank() > 0) { |
||||
|
return String.format("排名:%d\t热度:%d\t来源:%s\t热搜:%s", |
||||
|
item.getRank(), item.getHotValue(), item.getSource(), item.getTitle()); |
||||
|
} else { |
||||
|
return String.format("序号:%d\t热度:%d\t来源:%s\t热搜:%s", |
||||
|
index + 1, item.getHotValue(), item.getSource(), item.getTitle()); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String getViewType() { |
||||
|
return "console"; |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,83 @@ |
|||||
|
package com.weibo.hotsearch.view; |
||||
|
|
||||
|
import com.weibo.hotsearch.model.HotSearchItem; |
||||
|
import com.weibo.hotsearch.model.HotSearchResult; |
||||
|
|
||||
|
import java.time.format.DateTimeFormatter; |
||||
|
import java.util.StringJoiner; |
||||
|
|
||||
|
public class TextView implements View { |
||||
|
|
||||
|
private static final DateTimeFormatter FORMATTER = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss"); |
||||
|
|
||||
|
@Override |
||||
|
public void render(HotSearchResult result) { |
||||
|
if (result == null) { |
||||
|
System.out.println("没有数据可显示"); |
||||
|
return; |
||||
|
} |
||||
|
|
||||
|
StringJoiner sj = new StringJoiner("\n"); |
||||
|
|
||||
|
sj.add("===== " + (result.getFilterName() != null ? result.getFilterName() : "热搜结果") + " ====="); |
||||
|
sj.add("数据源: " + result.getDataSource()); |
||||
|
sj.add("采集时间: " + result.getFetchTime().format(FORMATTER)); |
||||
|
sj.add("----------------------------------------"); |
||||
|
|
||||
|
if (result.isEmpty()) { |
||||
|
sj.add("当前暂无符合条件的热搜内容"); |
||||
|
} else { |
||||
|
for (int i = 0; i < result.getItems().size(); i++) { |
||||
|
HotSearchItem item = result.getItems().get(i); |
||||
|
sj.add(formatItem(item, i)); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
sj.add(""); |
||||
|
sj.add("===== 热搜总数:" + result.getTotalCount() + " 条 ====="); |
||||
|
|
||||
|
System.out.println(sj.toString()); |
||||
|
} |
||||
|
|
||||
|
private String formatItem(HotSearchItem item, int index) { |
||||
|
if (item.getRank() > 0) { |
||||
|
return String.format("排名:%d\t热度:%d\t来源:%s\t热搜:%s", |
||||
|
item.getRank(), item.getHotValue(), item.getSource(), item.getTitle()); |
||||
|
} else { |
||||
|
return String.format("序号:%d\t热度:%d\t来源:%s\t热搜:%s", |
||||
|
index + 1, item.getHotValue(), item.getSource(), item.getTitle()); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
public String renderToString(HotSearchResult result) { |
||||
|
if (result == null) { |
||||
|
return "没有数据可显示"; |
||||
|
} |
||||
|
|
||||
|
StringJoiner sj = new StringJoiner("\n"); |
||||
|
|
||||
|
sj.add("===== " + (result.getFilterName() != null ? result.getFilterName() : "热搜结果") + " ====="); |
||||
|
sj.add("数据源: " + result.getDataSource()); |
||||
|
sj.add("采集时间: " + result.getFetchTime().format(FORMATTER)); |
||||
|
sj.add("----------------------------------------"); |
||||
|
|
||||
|
if (result.isEmpty()) { |
||||
|
sj.add("当前暂无符合条件的热搜内容"); |
||||
|
} else { |
||||
|
for (int i = 0; i < result.getItems().size(); i++) { |
||||
|
HotSearchItem item = result.getItems().get(i); |
||||
|
sj.add(formatItem(item, i)); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
sj.add(""); |
||||
|
sj.add("===== 热搜总数:" + result.getTotalCount() + " 条 ====="); |
||||
|
|
||||
|
return sj.toString(); |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String getViewType() { |
||||
|
return "text"; |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,10 @@ |
|||||
|
package com.weibo.hotsearch.view; |
||||
|
|
||||
|
import com.weibo.hotsearch.model.HotSearchResult; |
||||
|
|
||||
|
public interface View { |
||||
|
|
||||
|
void render(HotSearchResult result); |
||||
|
|
||||
|
String getViewType(); |
||||
|
} |
||||
@ -0,0 +1,29 @@ |
|||||
|
package com.weibo.hotsearch.view; |
||||
|
|
||||
|
import java.util.HashMap; |
||||
|
import java.util.Map; |
||||
|
|
||||
|
public class ViewFactory { |
||||
|
|
||||
|
private static final Map<String, View> views = new HashMap<>(); |
||||
|
|
||||
|
static { |
||||
|
views.put("console", new ConsoleView()); |
||||
|
views.put("text", new TextView()); |
||||
|
} |
||||
|
|
||||
|
public static View getView(String type) { |
||||
|
if (type == null || type.isEmpty()) { |
||||
|
return views.get("console"); |
||||
|
} |
||||
|
return views.getOrDefault(type.toLowerCase(), views.get("console")); |
||||
|
} |
||||
|
|
||||
|
public static boolean hasView(String type) { |
||||
|
return type != null && views.containsKey(type.toLowerCase()); |
||||
|
} |
||||
|
|
||||
|
public static Map<String, View> getAllViews() { |
||||
|
return new HashMap<>(views); |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,4 @@ |
|||||
|
@echo off |
||||
|
set CLASSPATH=target\classes;C:\Users\ruiruirui\.m2\repository\org\jsoup\jsoup\1.17.2\jsoup-1.17.2.jar |
||||
|
java WeiboHotSearcha |
||||
|
pause |
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Some files were not shown because too many files changed in this diff
Loading…
Reference in new issue