diff --git a/w11/.gitignore b/w11/.gitignore new file mode 100644 index 0000000..0ebcf1a --- /dev/null +++ b/w11/.gitignore @@ -0,0 +1,4 @@ +*.jar +*.jar +*.class +*.log \ No newline at end of file diff --git a/w11/.vscode/settings.json b/w11/.vscode/settings.json new file mode 100644 index 0000000..192fc44 --- /dev/null +++ b/w11/.vscode/settings.json @@ -0,0 +1,4 @@ +{ + "git.ignoreLimitWarning": true, + "java.configuration.updateBuildConfiguration": "interactive" +} \ No newline at end of file diff --git a/w11/README.md b/w11/README.md new file mode 100644 index 0000000..3ea02ec --- /dev/null +++ b/w11/README.md @@ -0,0 +1,17 @@ +# DataCollect 教学项目 — 最小可运行版本 + +这是一个最小可用的 Java CLI 演示工程,目标:打印帮助信息以验证运行环境。 + +构建: +```bash +mvn -q package +``` + +运行(示例): +```bash +java -jar target/datacollect-cli-0.1.0-jar-with-dependencies.jar --help +``` + +项目结构(最小): +- `src/main/java/com/example/datacollect/Main.java` — CLI 入口,打印帮助 +- `pom.xml` — Maven 构建配置,生成可执行 jar diff --git a/w11/pom.xml b/w11/pom.xml new file mode 100644 index 0000000..a24f629 --- /dev/null +++ b/w11/pom.xml @@ -0,0 +1,71 @@ + + 4.0.0 + com.example + datacollect-cli + 0.1.0 + + 11 + 11 + 2.0.9 + 1.4.11 + 1.17.2 + 4.12.0 + + + + org.slf4j + slf4j-api + ${slf4j.version} + + + ch.qos.logback + logback-classic + ${logback.version} + + + org.jsoup + jsoup + ${jsoup.version} + + + com.squareup.okhttp3 + okhttp + ${okhttp.version} + + + + + + org.apache.maven.plugins + maven-compiler-plugin + 3.8.1 + + + org.apache.maven.plugins + maven-assembly-plugin + 3.3.0 + + + + com.example.datacollect.Main + + + + jar-with-dependencies + + + + + make-assembly + package + + single + + + + + + + diff --git a/w11/src/main/java/com/example/datacollect/Main.java b/w11/src/main/java/com/example/datacollect/Main.java new file mode 100644 index 0000000..a136ba6 --- /dev/null +++ b/w11/src/main/java/com/example/datacollect/Main.java @@ -0,0 +1,19 @@ +package com.example.datacollect; + +import com.example.datacollect.controller.CrawlerController; +import com.example.datacollect.repository.ArticleRepository; +import com.example.datacollect.view.ConsoleView; + +public class Main { + + public static void main(String[] args) { + ConsoleView view = new ConsoleView(); + ArticleRepository repository = new ArticleRepository(); + CrawlerController controller = new CrawlerController(view, repository); + + view.printSuccess("Welcome to CLI Crawler (W10)! Type help for commands."); + while (true) { + controller.handle(view.readLine()); + } + } +} diff --git a/w11/src/main/java/com/example/datacollect/command/Command.java b/w11/src/main/java/com/example/datacollect/command/Command.java new file mode 100644 index 0000000..4cdadd9 --- /dev/null +++ b/w11/src/main/java/com/example/datacollect/command/Command.java @@ -0,0 +1,6 @@ +package com.example.datacollect.command; + +public interface Command { + String getName(); + void execute(String[] args, CommandContext context); +} diff --git a/w11/src/main/java/com/example/datacollect/command/CommandContext.java b/w11/src/main/java/com/example/datacollect/command/CommandContext.java new file mode 100644 index 0000000..cb1c0db --- /dev/null +++ b/w11/src/main/java/com/example/datacollect/command/CommandContext.java @@ -0,0 +1,42 @@ +package com.example.datacollect.command; + +import com.example.datacollect.repository.ArticleRepository; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.ArrayList; +import java.util.List; + +public class CommandContext { + private static final Logger logger = LoggerFactory.getLogger(CommandContext.class); + private final ArticleRepository repository; + private final List history; + + public CommandContext(ArticleRepository repository) { + this.repository = repository; + this.history = new ArrayList<>(); + logger.debug("CommandContext initialized"); + } + + public ArticleRepository getRepository() { + return repository; + } + + public List getHistory() { + return new ArrayList<>(history); + } + + public void addToHistory(String command) { + history.add(command); + logger.trace("Command added to history: {}", command); + } + + public int getHistorySize() { + return history.size(); + } + + public void clearHistory() { + history.clear(); + logger.debug("Command history cleared"); + } +} \ No newline at end of file diff --git a/w11/src/main/java/com/example/datacollect/command/CrawlCommand.java b/w11/src/main/java/com/example/datacollect/command/CrawlCommand.java new file mode 100644 index 0000000..fe02f56 --- /dev/null +++ b/w11/src/main/java/com/example/datacollect/command/CrawlCommand.java @@ -0,0 +1,77 @@ +package com.example.datacollect.command; + +import com.example.datacollect.exception.CrawlerException; +import com.example.datacollect.model.Article; +import com.example.datacollect.strategy.CrawlStrategy; +import com.example.datacollect.strategy.StrategyFactory; +import com.example.datacollect.view.ConsoleView; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.List; + +public class CrawlCommand implements Command { + private static final Logger logger = LoggerFactory.getLogger(CrawlCommand.class); + private static final int MAX_RETRIES = 3; + private static final long RETRY_DELAY_MS = 1000; + private final ConsoleView view; + + public CrawlCommand(ConsoleView view) { + this.view = view; + } + + @Override + public String getName() { + return "crawl"; + } + + @Override + public void execute(String[] args, CommandContext context) { + if (args.length < 3) { + view.printError("Usage: crawl "); + view.printError("Supported types: " + StrategyFactory.getSupportedTypes()); + return; + } + + String type = args[1]; + String url = args[2]; + + if (!StrategyFactory.hasStrategy(type)) { + view.printError("Unknown strategy type: " + type); + view.printError("Supported types: " + StrategyFactory.getSupportedTypes()); + return; + } + + int attempt = 0; + Exception lastException = null; + + while (attempt < MAX_RETRIES) { + try { + attempt++; + logger.info("Crawl attempt {} of {} for: {}", attempt, MAX_RETRIES, url); + CrawlStrategy strategy = StrategyFactory.getStrategy(type); + List
articles = strategy.crawl(url); + context.getRepository().addAll(articles); + view.printSuccess("Crawled " + articles.size() + " articles using " + type + " strategy"); + return; + } catch (CrawlerException e) { + lastException = e; + logger.warn("Crawl attempt {} failed: {}", attempt, e.getMessage()); + if (attempt < MAX_RETRIES) { + try { + Thread.sleep(RETRY_DELAY_MS); + } catch (InterruptedException ie) { + Thread.currentThread().interrupt(); + break; + } + } + } catch (Exception e) { + lastException = e; + logger.error("Crawl failed with unexpected error: {}", e.getMessage()); + break; + } + } + + view.printError("Crawl failed after " + attempt + " attempts: " + (lastException != null ? lastException.getMessage() : "Unknown error")); + } +} diff --git a/w11/src/main/java/com/example/datacollect/command/ExitCommand.java b/w11/src/main/java/com/example/datacollect/command/ExitCommand.java new file mode 100644 index 0000000..f8440d3 --- /dev/null +++ b/w11/src/main/java/com/example/datacollect/command/ExitCommand.java @@ -0,0 +1,26 @@ +package com.example.datacollect.command; + +import com.example.datacollect.view.ConsoleView; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class ExitCommand implements Command { + private static final Logger logger = LoggerFactory.getLogger(ExitCommand.class); + private final ConsoleView view; + + public ExitCommand(ConsoleView view) { + this.view = view; + } + + @Override + public String getName() { + return "exit"; + } + + @Override + public void execute(String[] args, CommandContext context) { + logger.info("Exit command executed"); + view.printSuccess("Bye!"); + System.exit(0); + } +} diff --git a/w11/src/main/java/com/example/datacollect/command/HelpCommand.java b/w11/src/main/java/com/example/datacollect/command/HelpCommand.java new file mode 100644 index 0000000..8ff02f7 --- /dev/null +++ b/w11/src/main/java/com/example/datacollect/command/HelpCommand.java @@ -0,0 +1,34 @@ +package com.example.datacollect.command; + +import com.example.datacollect.view.ConsoleView; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class HelpCommand implements Command { + private static final Logger logger = LoggerFactory.getLogger(HelpCommand.class); + private final ConsoleView view; + + public HelpCommand(ConsoleView view) { + this.view = view; + } + + @Override + public String getName() { + return "help"; + } + + @Override + public void execute(String[] args, CommandContext context) { + logger.debug("Help command executed"); + view.printInfo("Commands:"); + view.printInfo(" crawl - Crawl articles"); + view.printInfo(" Types:"); + view.printInfo(" blog - 模拟博客爬取(演示用)"); + view.printInfo(" news - 模拟新闻爬取(演示用)"); + view.printInfo(" real - 真实网页爬取(从目标网站获取真实内容)"); + view.printInfo(" list - List all articles"); + view.printInfo(" history - Show command history"); + view.printInfo(" help - Show this help"); + view.printInfo(" exit - Exit program"); + } +} diff --git a/w11/src/main/java/com/example/datacollect/command/HistoryCommand.java b/w11/src/main/java/com/example/datacollect/command/HistoryCommand.java new file mode 100644 index 0000000..7ee207f --- /dev/null +++ b/w11/src/main/java/com/example/datacollect/command/HistoryCommand.java @@ -0,0 +1,37 @@ +package com.example.datacollect.command; + +import com.example.datacollect.view.ConsoleView; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.List; + +public class HistoryCommand implements Command { + private static final Logger logger = LoggerFactory.getLogger(HistoryCommand.class); + private final ConsoleView view; + + public HistoryCommand(ConsoleView view) { + this.view = view; + } + + @Override + public String getName() { + return "history"; + } + + @Override + public void execute(String[] args, CommandContext context) { + List history = context.getHistory(); + logger.debug("History command executed, {} entries", history.size()); + + if (history.isEmpty()) { + view.printInfo("暂无命令历史"); + return; + } + + view.printInfo("命令历史:"); + for (int i = 0; i < history.size(); i++) { + System.out.println((i + 1) + ". " + history.get(i)); + } + } +} \ No newline at end of file diff --git a/w11/src/main/java/com/example/datacollect/command/ListCommand.java b/w11/src/main/java/com/example/datacollect/command/ListCommand.java new file mode 100644 index 0000000..9c136fa --- /dev/null +++ b/w11/src/main/java/com/example/datacollect/command/ListCommand.java @@ -0,0 +1,25 @@ +package com.example.datacollect.command; + +import com.example.datacollect.view.ConsoleView; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class ListCommand implements Command { + private static final Logger logger = LoggerFactory.getLogger(ListCommand.class); + private final ConsoleView view; + + public ListCommand(ConsoleView view) { + this.view = view; + } + + @Override + public String getName() { + return "list"; + } + + @Override + public void execute(String[] args, CommandContext context) { + logger.debug("List command executed"); + view.display(context.getRepository().findAll()); + } +} diff --git a/w11/src/main/java/com/example/datacollect/controller/CrawlerController.java b/w11/src/main/java/com/example/datacollect/controller/CrawlerController.java new file mode 100644 index 0000000..d6390d9 --- /dev/null +++ b/w11/src/main/java/com/example/datacollect/controller/CrawlerController.java @@ -0,0 +1,59 @@ +package com.example.datacollect.controller; + +import com.example.datacollect.command.Command; +import com.example.datacollect.command.CommandContext; +import com.example.datacollect.command.CrawlCommand; +import com.example.datacollect.command.ExitCommand; +import com.example.datacollect.command.HelpCommand; +import com.example.datacollect.command.HistoryCommand; +import com.example.datacollect.command.ListCommand; +import com.example.datacollect.repository.ArticleRepository; +import com.example.datacollect.view.ConsoleView; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.HashMap; +import java.util.Map; + +public class CrawlerController { + private static final Logger logger = LoggerFactory.getLogger(CrawlerController.class); + private final Map commands = new HashMap<>(); + private final ConsoleView view; + private final CommandContext context; + + public CrawlerController(ConsoleView view, ArticleRepository repository) { + this.view = view; + this.context = new CommandContext(repository); + register(new HelpCommand(view)); + register(new ListCommand(view)); + register(new CrawlCommand(view)); + register(new ExitCommand(view)); + register(new HistoryCommand(view)); + logger.info("CrawlerController initialized with {} commands", commands.size()); + } + + private void register(Command command) { + commands.put(command.getName(), command); + logger.debug("Registered command: {}", command.getName()); + } + + public void handle(String input) { + String text = input == null ? "" : input.trim(); + if (text.isEmpty()) { + return; + } + + context.addToHistory(text); + + String[] args = text.split("\\s+"); + String cmdName = args[0].toLowerCase(); + Command command = commands.get(cmdName); + if (command == null) { + logger.warn("Unknown command received: {}", cmdName); + view.printError("Unknown command: " + cmdName); + return; + } + logger.debug("Executing command: {}", cmdName); + command.execute(args, context); + } +} diff --git a/w11/src/main/java/com/example/datacollect/exception/CrawlerException.java b/w11/src/main/java/com/example/datacollect/exception/CrawlerException.java new file mode 100644 index 0000000..bde38fd --- /dev/null +++ b/w11/src/main/java/com/example/datacollect/exception/CrawlerException.java @@ -0,0 +1,11 @@ +package com.example.datacollect.exception; + +public class CrawlerException extends Exception { + public CrawlerException(String message) { + super(message); + } + + public CrawlerException(String message, Throwable cause) { + super(message, cause); + } +} diff --git a/w11/src/main/java/com/example/datacollect/exception/NetworkException.java b/w11/src/main/java/com/example/datacollect/exception/NetworkException.java new file mode 100644 index 0000000..b80f1bb --- /dev/null +++ b/w11/src/main/java/com/example/datacollect/exception/NetworkException.java @@ -0,0 +1,11 @@ +package com.example.datacollect.exception; + +public class NetworkException extends CrawlerException { + public NetworkException(String message) { + super(message); + } + + public NetworkException(String message, Throwable cause) { + super(message, cause); + } +} diff --git a/w11/src/main/java/com/example/datacollect/exception/ParseException.java b/w11/src/main/java/com/example/datacollect/exception/ParseException.java new file mode 100644 index 0000000..ef4c5a1 --- /dev/null +++ b/w11/src/main/java/com/example/datacollect/exception/ParseException.java @@ -0,0 +1,11 @@ +package com.example.datacollect.exception; + +public class ParseException extends CrawlerException { + public ParseException(String message) { + super(message); + } + + public ParseException(String message, Throwable cause) { + super(message, cause); + } +} diff --git a/w11/src/main/java/com/example/datacollect/model/Article.java b/w11/src/main/java/com/example/datacollect/model/Article.java new file mode 100644 index 0000000..f3b0ca8 --- /dev/null +++ b/w11/src/main/java/com/example/datacollect/model/Article.java @@ -0,0 +1,75 @@ +package com.example.datacollect.model; + +import java.time.LocalDate; + +public class Article { + private String title; + private String url; + private String content; + private String author; + private LocalDate publishDate; + + public Article(String title, String url, String content) { + this.title = title; + this.url = url; + this.content = content; + } + + public Article(String title, String url, String content, String author, LocalDate publishDate) { + this.title = title; + this.url = url; + this.content = content; + this.author = author; + this.publishDate = publishDate; + } + + public String getTitle() { + return title; + } + + public void setTitle(String title) { + this.title = title; + } + + public String getUrl() { + return url; + } + + public void setUrl(String url) { + this.url = url; + } + + public String getContent() { + return content; + } + + public void setContent(String content) { + this.content = content; + } + + public String getAuthor() { + return author; + } + + public void setAuthor(String author) { + this.author = author; + } + + public LocalDate getPublishDate() { + return publishDate; + } + + public void setPublishDate(LocalDate publishDate) { + this.publishDate = publishDate; + } + + @Override + public String toString() { + return "Article{" + + "title='" + title + '\'' + + ", url='" + url + '\'' + + ", author='" + author + '\'' + + ", publishDate=" + publishDate + + '}'; + } +} diff --git a/w11/src/main/java/com/example/datacollect/repository/ArticleRepository.java b/w11/src/main/java/com/example/datacollect/repository/ArticleRepository.java new file mode 100644 index 0000000..e007d6e --- /dev/null +++ b/w11/src/main/java/com/example/datacollect/repository/ArticleRepository.java @@ -0,0 +1,73 @@ +package com.example.datacollect.repository; + +import com.example.datacollect.model.Article; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Optional; + +public class ArticleRepository { + private static final Logger logger = LoggerFactory.getLogger(ArticleRepository.class); + private final List
articles = new ArrayList<>(); + + public void add(Article article) { + if (article == null) { + logger.warn("Attempted to add null article to repository"); + throw new IllegalArgumentException("Article cannot be null"); + } + if (article.getUrl() == null || article.getUrl().isEmpty()) { + logger.warn("Attempted to add article with null or empty URL"); + throw new IllegalArgumentException("Article URL cannot be null or empty"); + } + if (findByUrl(article.getUrl()).isPresent()) { + logger.debug("Article with URL {} already exists, skipping", article.getUrl()); + return; + } + articles.add(article); + logger.debug("Added article: {} ({})", article.getTitle(), article.getUrl()); + } + + public void addAll(List
articleList) { + if (articleList == null) { + logger.warn("Attempted to add null article list to repository"); + throw new IllegalArgumentException("Article list cannot be null"); + } + int count = 0; + for (Article article : articleList) { + try { + add(article); + count++; + } catch (IllegalArgumentException e) { + logger.warn("Skipping invalid article: {}", e.getMessage()); + } + } + logger.info("Added {} articles to repository (total: {})", count, articles.size()); + } + + public List
findAll() { + return Collections.unmodifiableList(new ArrayList<>(articles)); + } + + public Optional
findByUrl(String url) { + if (url == null || url.isEmpty()) { + logger.warn("findByUrl called with null or empty URL"); + return Optional.empty(); + } + return articles.stream() + .filter(a -> a.getUrl().equals(url)) + .findFirst(); + } + + public int count() { + return articles.size(); + } + + public void clear() { + int size = articles.size(); + articles.clear(); + logger.info("Cleared {} articles from repository", size); + } +} \ No newline at end of file diff --git a/w11/src/main/java/com/example/datacollect/strategy/BlogStrategy.java b/w11/src/main/java/com/example/datacollect/strategy/BlogStrategy.java new file mode 100644 index 0000000..585adcc --- /dev/null +++ b/w11/src/main/java/com/example/datacollect/strategy/BlogStrategy.java @@ -0,0 +1,33 @@ +package com.example.datacollect.strategy; + +import com.example.datacollect.exception.CrawlerException; +import com.example.datacollect.exception.ParseException; +import com.example.datacollect.model.Article; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.ArrayList; +import java.util.List; + +public class BlogStrategy implements CrawlStrategy { + private static final Logger logger = LoggerFactory.getLogger(BlogStrategy.class); + + @Override + public String getType() { + return "blog"; + } + + @Override + public List
crawl(String url) throws CrawlerException { + logger.info("Starting blog crawl for: {}", url); + List
articles = new ArrayList<>(); + articles.add(new Article("Java编程入门教程", "https://www.oracle.com/java/technologies/get-started/", "Oracle官方Java入门教程,涵盖Java基础知识和开发环境配置")); + articles.add(new Article("Java最佳实践指南", "https://www.baeldung.com/java-best-practices", "Baeldung提供的Java编程最佳实践,包括代码规范、性能优化等")); + logger.info("Successfully crawled {} articles from {}", articles.size(), url); + return articles; + } + + @Override + public void parse(String html, String url) throws ParseException { + } +} \ No newline at end of file diff --git a/w11/src/main/java/com/example/datacollect/strategy/CrawlStrategy.java b/w11/src/main/java/com/example/datacollect/strategy/CrawlStrategy.java new file mode 100644 index 0000000..7ab4013 --- /dev/null +++ b/w11/src/main/java/com/example/datacollect/strategy/CrawlStrategy.java @@ -0,0 +1,13 @@ +package com.example.datacollect.strategy; + +import com.example.datacollect.exception.CrawlerException; +import com.example.datacollect.exception.ParseException; +import com.example.datacollect.model.Article; + +import java.util.List; + +public interface CrawlStrategy { + String getType(); + List
crawl(String url) throws CrawlerException; + void parse(String html, String url) throws ParseException; +} \ No newline at end of file diff --git a/w11/src/main/java/com/example/datacollect/strategy/NewsStrategy.java b/w11/src/main/java/com/example/datacollect/strategy/NewsStrategy.java new file mode 100644 index 0000000..14abe34 --- /dev/null +++ b/w11/src/main/java/com/example/datacollect/strategy/NewsStrategy.java @@ -0,0 +1,56 @@ +package com.example.datacollect.strategy; + +import com.example.datacollect.exception.CrawlerException; +import com.example.datacollect.exception.ParseException; +import com.example.datacollect.model.Article; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.ArrayList; +import java.util.List; + +public class NewsStrategy implements CrawlStrategy { + private static final Logger logger = LoggerFactory.getLogger(NewsStrategy.class); + + @Override + public String getType() { + return "news"; + } + + @Override + public List
crawl(String url) throws CrawlerException { + logger.info("Starting news crawl for: {}", url); + List
articles = new ArrayList<>(); + + String baseUrl = url.replaceAll("/$", ""); + + if (url.contains("toutiao.com")) { + articles.add(new Article("今日头条 - 热点新闻", "https://www.toutiao.com/", "今日头条热点新闻聚合平台")); + articles.add(new Article("今日头条科技", "https://www.toutiao.com/c/user/token/MS4wLjABAAAAlS0q8OYF0X0Kf2dJ7w0wFg/", "科技资讯频道")); + articles.add(new Article("今日头条娱乐", "https://www.toutiao.com/c/user/token/MS4wLjABAAAAj80G9D28h2a8q9F9x9x9x9/", "娱乐新闻频道")); + } else if (url.contains("sina.com") || url.contains("sina.cn")) { + articles.add(new Article("新浪新闻 - 国内新闻", "https://news.sina.com.cn/", "新浪国内新闻频道")); + articles.add(new Article("新浪财经", "https://finance.sina.com.cn/", "财经资讯")); + articles.add(new Article("新浪体育", "https://sports.sina.com.cn/", "体育新闻")); + } else if (url.contains("qq.com")) { + articles.add(new Article("腾讯新闻", "https://news.qq.com/", "腾讯新闻频道")); + articles.add(new Article("腾讯财经", "https://finance.qq.com/", "财经资讯")); + articles.add(new Article("腾讯科技", "https://tech.qq.com/", "科技新闻")); + } else if (url.contains("163.com")) { + articles.add(new Article("网易新闻", "https://news.163.com/", "网易新闻频道")); + articles.add(new Article("网易财经", "https://money.163.com/", "财经资讯")); + articles.add(new Article("网易科技", "https://tech.163.com/", "科技新闻")); + } else { + articles.add(new Article("科技新闻 - TechCrunch", "https://techcrunch.com/", "TechCrunch提供最新的科技新闻")); + articles.add(new Article("国际新闻 - BBC News", "https://www.bbc.com/news", "BBC News全球新闻")); + articles.add(new Article("商业新闻 - Reuters", "https://www.reuters.com/", "路透社商业新闻")); + } + + logger.info("Successfully crawled {} articles from {}", articles.size(), url); + return articles; + } + + @Override + public void parse(String html, String url) throws ParseException { + } +} \ No newline at end of file diff --git a/w11/src/main/java/com/example/datacollect/strategy/RealCrawlStrategy.java b/w11/src/main/java/com/example/datacollect/strategy/RealCrawlStrategy.java new file mode 100644 index 0000000..5005b5b --- /dev/null +++ b/w11/src/main/java/com/example/datacollect/strategy/RealCrawlStrategy.java @@ -0,0 +1,178 @@ +package com.example.datacollect.strategy; + +import com.example.datacollect.exception.CrawlerException; +import com.example.datacollect.exception.NetworkException; +import com.example.datacollect.exception.ParseException; +import com.example.datacollect.model.Article; +import org.jsoup.nodes.Document; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.ArrayList; +import java.util.List; +import java.util.Set; +import java.util.HashSet; + +public class RealCrawlStrategy implements CrawlStrategy { + private static final Logger logger = LoggerFactory.getLogger(RealCrawlStrategy.class); + + private static final Set NEWS_KEYWORDS = Set.of("news", "article", "post", "story", "report", "blog"); + + @Override + public String getType() { + return "real"; + } + + @Override + public List
crawl(String url) throws CrawlerException { + List
articles = new ArrayList<>(); + Set visitedUrls = new HashSet<>(); + + try { + logger.info("Starting real crawl for: {}", url); + + Document doc = WebCrawler.fetchDocument(url); + String pageTitle = WebCrawler.extractTitle(doc); + String pageContent = WebCrawler.extractContent(doc); + + logger.debug("Page title extracted: {}", pageTitle); + articles.add(new Article(pageTitle, url, pageContent)); + visitedUrls.add(url); + + List links = WebCrawler.extractLinks(doc, url); + logger.debug("Found {} links on page", links.size()); + + if (links.size() > 0) { + logger.debug("First 5 links:"); + for (int i = 0; i < Math.min(5, links.size()); i++) { + logger.debug(" {}: {} (isArticle: {})", i + 1, links.get(i), isArticleLink(links.get(i))); + } + } + + int count = 0; + for (String link : links) { + if (count >= 5) break; + if (visitedUrls.contains(link)) continue; + if (!isArticleLink(link)) continue; + + try { + Document articleDoc = WebCrawler.fetchDocument(link); + String articleTitle = WebCrawler.extractTitle(articleDoc); + String articleContent = WebCrawler.extractContent(articleDoc); + + articles.add(new Article(articleTitle, link, articleContent)); + visitedUrls.add(link); + count++; + + logger.debug("Crawled article: {} - {}", articleTitle, link); + } catch (NetworkException e) { + logger.warn("Failed to crawl article: {} - {}", link, e.getMessage()); + } + } + + if (articles.size() == 1 && "No Title".equals(pageTitle)) { + logger.warn("Only 1 article found with no title, trying alternative extraction"); + articles = extractArticlesFromPage(doc, url); + } + + logger.info("Successfully crawled {} articles from {}", articles.size(), url); + + } catch (NetworkException e) { + logger.error("Failed to crawl {}: {}", url, e.getMessage()); + throw e; + } + + return articles; + } + + private List
extractArticlesFromPage(Document doc, String baseUrl) { + List
articles = new ArrayList<>(); + + articles.add(new Article(doc.title(), baseUrl, WebCrawler.extractContent(doc))); + + return articles; + } + + private boolean isArticleLink(String url) { + String lowerUrl = url.toLowerCase(); + + // 排除非文章链接 + if (lowerUrl.contains("/login") || + lowerUrl.contains("/register") || + lowerUrl.contains("/logout") || + lowerUrl.contains("/search") || + lowerUrl.contains("/about") || + lowerUrl.contains("/contact") || + lowerUrl.contains("/terms") || + lowerUrl.contains("/privacy") || + lowerUrl.contains("/sitemap") || + lowerUrl.contains("/feed") || + lowerUrl.contains("?") || + lowerUrl.contains(".json") || + lowerUrl.contains(".xml") || + lowerUrl.contains("#") || + lowerUrl.contains("/courses/") || + lowerUrl.endsWith("/") || + lowerUrl.endsWith("/start-here") || + lowerUrl.endsWith("/home") || + lowerUrl.endsWith("/index")) { + return false; + } + + // 包含文章相关关键词的链接 + if (lowerUrl.contains("/article/") || + lowerUrl.contains("/posts/") || + lowerUrl.contains("/blog/") || + lowerUrl.contains("/news/") || + lowerUrl.contains("/story/") || + lowerUrl.contains("/post/") || + lowerUrl.contains("/tutorial/") || + lowerUrl.contains("/guide/") || + lowerUrl.contains("/learn/") || + lowerUrl.contains("/articles/") || + lowerUrl.contains("/java-") || + lowerUrl.contains("/spring-") || + lowerUrl.contains("/kotlin-") || + lowerUrl.contains("/maven-") || + lowerUrl.contains("/gradle-") || + lowerUrl.contains("/junit-") || + lowerUrl.contains("/hibernate-") || + lowerUrl.contains("/jdbc-") || + lowerUrl.contains("/concurrent-") || + lowerUrl.contains("/stream-") || + lowerUrl.contains("/regex-") || + lowerUrl.contains("/json-") || + lowerUrl.contains("/xml-") || + lowerUrl.contains("/security-") || + lowerUrl.contains("/test-")) { + return true; + } + + // 带数字ID或日期格式的链接 + if (lowerUrl.matches(".*/\\d+\\.html?$") || + lowerUrl.matches(".*/[\\w-]+\\.html?$") || + lowerUrl.matches(".*/\\d+/\\d+/\\d+/.*") || + lowerUrl.matches(".*/\\d{4}/\\d{2}/.*") || + lowerUrl.matches(".*/\\d{4}/\\d{2}/\\d{2}/.*") || + lowerUrl.matches(".*/\\d{2}-\\d{2}-.*")) { + return true; + } + + // 对于 baeldung 这类技术博客,识别 /java-something 形式的链接 + if (lowerUrl.matches(".*/java-[\\w-]+$") || + lowerUrl.matches(".*/spring-[\\w-]+$") || + lowerUrl.matches(".*/kotlin-[\\w-]+$")) { + return true; + } + + return false; + } + + @Override + public void parse(String html, String url) throws ParseException { + if (html == null || html.isEmpty()) { + throw new ParseException("HTML content is null or empty for URL: " + url); + } + logger.debug("Parsing HTML content for URL: {}", url); + } +} \ No newline at end of file diff --git a/w11/src/main/java/com/example/datacollect/strategy/StrategyFactory.java b/w11/src/main/java/com/example/datacollect/strategy/StrategyFactory.java new file mode 100644 index 0000000..5390ffa --- /dev/null +++ b/w11/src/main/java/com/example/datacollect/strategy/StrategyFactory.java @@ -0,0 +1,46 @@ +package com.example.datacollect.strategy; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.HashMap; +import java.util.Map; +import java.util.ServiceLoader; + +public class StrategyFactory { + private static final Logger logger = LoggerFactory.getLogger(StrategyFactory.class); + private static final Map strategies = new HashMap<>(); + + static { + loadStrategies(); + } + + private static synchronized void loadStrategies() { + if (!strategies.isEmpty()) { + return; + } + ServiceLoader loader = ServiceLoader.load(CrawlStrategy.class); + for (CrawlStrategy strategy : loader) { + strategies.put(strategy.getType().toLowerCase(), strategy); + logger.debug("Loaded strategy: {}", strategy.getType()); + } + logger.info("Loaded {} strategies", strategies.size()); + } + + public static CrawlStrategy getStrategy(String type) { + CrawlStrategy strategy = strategies.get(type.toLowerCase()); + if (strategy == null) { + logger.error("Unknown strategy type requested: {}", type); + throw new IllegalArgumentException("Unknown strategy type: " + type); + } + return strategy; + } + + public static boolean hasStrategy(String type) { + return strategies.containsKey(type.toLowerCase()); + } + + public static String getSupportedTypes() { + return String.join(", ", strategies.keySet()); + } +} \ No newline at end of file diff --git a/w11/src/main/java/com/example/datacollect/strategy/WebCrawler.java b/w11/src/main/java/com/example/datacollect/strategy/WebCrawler.java new file mode 100644 index 0000000..468b292 --- /dev/null +++ b/w11/src/main/java/com/example/datacollect/strategy/WebCrawler.java @@ -0,0 +1,244 @@ +package com.example.datacollect.strategy; + +import com.example.datacollect.exception.NetworkException; +import okhttp3.OkHttpClient; +import okhttp3.Request; +import okhttp3.Response; +import okhttp3.ResponseBody; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.regex.Pattern; + +public class WebCrawler { + private static final Logger logger = LoggerFactory.getLogger(WebCrawler.class); + + private static final OkHttpClient client = new OkHttpClient.Builder() + .connectTimeout(java.time.Duration.ofSeconds(30)) + .readTimeout(java.time.Duration.ofSeconds(30)) + .followRedirects(true) + .followSslRedirects(true) + .build(); + + private static final Pattern URL_PATTERN = Pattern.compile("^https?://"); + + private static final Set VALID_EXTENSIONS = Set.of( + ".html", ".htm", ".php", ".asp", ".aspx", ".jsp", "" + ); + + private static final int MAX_RETRIES = 3; + private static final long RETRY_DELAY_MS = 1000; + + public static Document fetchDocument(String url) throws NetworkException { + int attempt = 0; + IOException lastException = null; + + while (attempt < MAX_RETRIES) { + attempt++; + try { + logger.debug("Fetching document attempt {} of {} for URL: {}", attempt, MAX_RETRIES, url); + return doFetchDocument(url); + } catch (IOException e) { + lastException = e; + logger.warn("Fetch attempt {} failed for URL {}: {}", attempt, url, e.getMessage()); + if (attempt < MAX_RETRIES) { + try { + Thread.sleep(RETRY_DELAY_MS); + } catch (InterruptedException ie) { + Thread.currentThread().interrupt(); + throw new NetworkException("Fetching interrupted for URL: " + url, ie); + } + } + } + } + + throw new NetworkException("Failed to fetch document after " + MAX_RETRIES + " attempts for URL: " + url, lastException); + } + + private static Document doFetchDocument(String url) throws IOException { + Request request = new Request.Builder() + .url(url) + .header("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36") + .header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8") + .header("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8") + .header("Connection", "keep-alive") + .get() + .build(); + + try (Response response = client.newCall(request).execute()) { + if (!response.isSuccessful()) { + throw new IOException("HTTP request failed with code: " + response.code()); + } + + String contentType = response.header("Content-Type"); + logger.debug("Content-Type: {}", contentType); + + if (contentType != null && !contentType.contains("text/html")) { + throw new IOException("Not an HTML document: " + contentType); + } + + ResponseBody body = response.body(); + if (body == null) { + throw new IOException("Response body is null"); + } + + byte[] bytes = body.bytes(); + String bodyString = new String(bytes, StandardCharsets.UTF_8); + logger.debug("Fetched HTML content length: {} bytes, {} characters", bytes.length, bodyString.length()); + + if (bodyString.length() < 100) { + logger.warn("HTML content is very short"); + } + + Document doc = Jsoup.parse(bodyString, url); + logger.debug("Parsed document title: '{}'", doc.title()); + logger.debug("Number of elements in body: {}", doc.body() != null ? doc.body().getAllElements().size() : 0); + + return doc; + } + } + + public static List extractLinks(Document doc, String baseUrl) { + List links = new ArrayList<>(); + Set seen = new HashSet<>(); + + Elements anchorTags = doc.select("a"); + logger.debug("Found {} anchor tags", anchorTags.size()); + + for (Element anchor : anchorTags) { + String href = anchor.attr("abs:href"); + + if (href == null || href.isEmpty()) { + href = anchor.attr("href"); + if (href != null && !href.isEmpty() && !href.startsWith("http")) { + href = resolveUrl(baseUrl, href); + } + } + + if (href != null && isValidUrl(href) && !seen.contains(href)) { + seen.add(href); + links.add(href); + logger.trace("Added link: {}", href); + } + } + + logger.debug("Extracted {} valid links from page", links.size()); + return links; + } + + private static String resolveUrl(String baseUrl, String relativeUrl) { + try { + java.net.URL base = new java.net.URL(baseUrl); + return new java.net.URL(base, relativeUrl).toString(); + } catch (java.net.MalformedURLException e) { + logger.warn("Failed to resolve URL: {} + {}", baseUrl, relativeUrl); + return relativeUrl; + } + } + + private static boolean isValidUrl(String url) { + if (url == null || url.isEmpty()) { + return false; + } + + if (!URL_PATTERN.matcher(url).find()) { + return false; + } + + if (url.contains("#")) { + url = url.substring(0, url.indexOf("#")); + } + + String lowerUrl = url.toLowerCase(); + + // 排除常见的非HTML文件类型 + if (lowerUrl.endsWith(".pdf") || lowerUrl.endsWith(".doc") || + lowerUrl.endsWith(".docx") || lowerUrl.endsWith(".xls") || + lowerUrl.endsWith(".xlsx") || lowerUrl.endsWith(".zip") || + lowerUrl.endsWith(".rar") || lowerUrl.endsWith(".exe") || + lowerUrl.endsWith(".jpg") || lowerUrl.endsWith(".jpeg") || + lowerUrl.endsWith(".png") || lowerUrl.endsWith(".gif") || + lowerUrl.endsWith(".svg") || lowerUrl.endsWith(".css") || + lowerUrl.endsWith(".js") || lowerUrl.endsWith(".json") || + lowerUrl.endsWith(".xml") || lowerUrl.endsWith(".txt")) { + return false; + } + + // 允许所有其他类型的URL(包括无扩展名的URL) + return true; + } + + private static String getExtension(String url) { + int lastDot = url.lastIndexOf('.'); + int lastSlash = url.lastIndexOf('/'); + if (lastDot > lastSlash) { + int queryIndex = url.indexOf('?'); + if (queryIndex > lastDot) { + return url.substring(lastDot, queryIndex); + } + return url.substring(lastDot); + } + return ""; + } + + public static String extractTitle(Document doc) { + // 尝试获取title标签 + Element titleElement = doc.selectFirst("title"); + if (titleElement != null && !titleElement.text().trim().isEmpty()) { + return titleElement.text().trim(); + } + + // 尝试获取h1标签 + Element h1Element = doc.selectFirst("h1"); + if (h1Element != null && !h1Element.text().trim().isEmpty()) { + return h1Element.text().trim(); + } + + // 尝试获取文章标题类 + Elements titleClasses = doc.select(".title, .post-title, .article-title, .entry-title, [class*=title]"); + if (!titleClasses.isEmpty() && !titleClasses.first().text().trim().isEmpty()) { + return titleClasses.first().text().trim(); + } + + // 尝试获取meta title + Element metaTitle = doc.selectFirst("meta[property=og:title], meta[name=title]"); + if (metaTitle != null && !metaTitle.attr("content").trim().isEmpty()) { + return metaTitle.attr("content").trim(); + } + + return "No Title"; + } + + public static String extractMetaDescription(Document doc) { + Element metaDesc = doc.selectFirst("meta[name=description]"); + if (metaDesc != null) { + return metaDesc.attr("content").trim(); + } + return ""; + } + + public static String extractContent(Document doc) { + Elements contentSelectors = doc.select("article, .article, .content, .post-content, .entry-content, main"); + + if (!contentSelectors.isEmpty()) { + return contentSelectors.first().text().trim(); + } + + Element body = doc.body(); + if (body != null) { + return body.text().trim().substring(0, Math.min(body.text().length(), 500)) + "..."; + } + + return ""; + } +} \ No newline at end of file diff --git a/w11/src/main/java/com/example/datacollect/test/CrawlerDemo.java b/w11/src/main/java/com/example/datacollect/test/CrawlerDemo.java new file mode 100644 index 0000000..e62f00f --- /dev/null +++ b/w11/src/main/java/com/example/datacollect/test/CrawlerDemo.java @@ -0,0 +1,118 @@ +package com.example.datacollect.test; + +import com.example.datacollect.command.CommandContext; +import com.example.datacollect.command.CrawlCommand; +import com.example.datacollect.command.ListCommand; +import com.example.datacollect.command.HelpCommand; +import com.example.datacollect.command.HistoryCommand; +import com.example.datacollect.model.Article; +import com.example.datacollect.repository.ArticleRepository; +import com.example.datacollect.strategy.CrawlStrategy; +import com.example.datacollect.strategy.StrategyFactory; +import com.example.datacollect.view.ConsoleView; + +import java.util.List; + +public class CrawlerDemo { + + public static void main(String[] args) { + ConsoleView view = new ConsoleView(); + ArticleRepository repository = new ArticleRepository(); + CommandContext context = new CommandContext(repository); + + System.out.println("========================================"); + System.out.println(" 爬虫功能演示"); + System.out.println("========================================\n"); + + demo1_StrategyFactory(view); + demo2_BlogCrawl(view, repository, context); + demo3_NewsCrawl(view, repository, context); + demo4_ListArticles(view, repository, context); + demo5_CommandHistory(view, context); + demo6_RepositoryFeatures(repository); + + System.out.println("\n========================================"); + System.out.println(" 演示完成!"); + System.out.println("========================================"); + } + + private static void demo1_StrategyFactory(ConsoleView view) { + System.out.println("【演示1】策略工厂功能"); + System.out.println("-".repeat(40)); + + System.out.println("支持的策略类型: " + StrategyFactory.getSupportedTypes()); + + CrawlStrategy blogStrategy = StrategyFactory.getStrategy("blog"); + System.out.println("Blog策略类型: " + blogStrategy.getType()); + + CrawlStrategy newsStrategy = StrategyFactory.getStrategy("news"); + System.out.println("News策略类型: " + newsStrategy.getType()); + + try { + StrategyFactory.getStrategy("unknown"); + } catch (IllegalArgumentException e) { + System.out.println("未知策略测试: " + e.getMessage()); + } + System.out.println(); + } + + private static void demo2_BlogCrawl(ConsoleView view, ArticleRepository repository, CommandContext context) { + System.out.println("【演示2】博客爬取"); + System.out.println("-".repeat(40)); + + CrawlCommand crawlCommand = new CrawlCommand(view); + crawlCommand.execute(new String[]{"crawl", "blog", "http://example.com"}, context); + + System.out.println("仓库中文章数量: " + repository.count()); + System.out.println(); + } + + private static void demo3_NewsCrawl(ConsoleView view, ArticleRepository repository, CommandContext context) { + System.out.println("【演示3】新闻爬取"); + System.out.println("-".repeat(40)); + + CrawlCommand crawlCommand = new CrawlCommand(view); + crawlCommand.execute(new String[]{"crawl", "news", "http://news.com"}, context); + + System.out.println("仓库中文章总数: " + repository.count()); + System.out.println(); + } + + private static void demo4_ListArticles(ConsoleView view, ArticleRepository repository, CommandContext context) { + System.out.println("【演示4】列出所有文章"); + System.out.println("-".repeat(40)); + + ListCommand listCommand = new ListCommand(view); + listCommand.execute(new String[]{"list"}, context); + System.out.println(); + } + + private static void demo5_CommandHistory(ConsoleView view, CommandContext context) { + System.out.println("【演示5】命令历史"); + System.out.println("-".repeat(40)); + + HistoryCommand historyCommand = new HistoryCommand(view); + historyCommand.execute(new String[]{"history"}, context); + System.out.println(); + } + + private static void demo6_RepositoryFeatures(ArticleRepository repository) { + System.out.println("【演示6】仓库功能"); + System.out.println("-".repeat(40)); + + System.out.println("文章总数: " + repository.count()); + + Article firstArticle = repository.findAll().get(0); + System.out.println("第一篇文章标题: " + firstArticle.getTitle()); + + String url = firstArticle.getUrl(); + repository.findByUrl(url).ifPresent(article -> + System.out.println("按URL查找成功: " + article.getTitle()) + ); + + System.out.println("清空前文章数: " + repository.count()); + repository.clear(); + System.out.println("清空后文章数: " + repository.count()); + System.out.println(); + } +} \ No newline at end of file diff --git a/w11/src/main/java/com/example/datacollect/test/CrawlerTest.java b/w11/src/main/java/com/example/datacollect/test/CrawlerTest.java new file mode 100644 index 0000000..aa0ab2f --- /dev/null +++ b/w11/src/main/java/com/example/datacollect/test/CrawlerTest.java @@ -0,0 +1,83 @@ +package com.example.datacollect.test; + +import com.example.datacollect.command.CommandContext; +import com.example.datacollect.command.CrawlCommand; +import com.example.datacollect.command.ListCommand; +import com.example.datacollect.model.Article; +import com.example.datacollect.repository.ArticleRepository; +import com.example.datacollect.strategy.CrawlStrategy; +import com.example.datacollect.strategy.StrategyFactory; +import com.example.datacollect.view.ConsoleView; + +import java.util.List; + +public class CrawlerTest { + + public static void main(String[] args) { + System.out.println("=== Crawler Test Suite ==="); + + testStrategyFactory(); + testArticleRepositoryImmutable(); + testCrawlToListFlow(); + + System.out.println("\n=== All tests passed! ==="); + } + + private static void testStrategyFactory() { + System.out.println("\n1. Testing StrategyFactory SPI loading..."); + + assert StrategyFactory.hasStrategy("blog") : "blog strategy should be registered"; + assert StrategyFactory.hasStrategy("news") : "news strategy should be registered"; + assert !StrategyFactory.hasStrategy("unknown") : "unknown strategy should not be registered"; + + CrawlStrategy blogStrategy = StrategyFactory.getStrategy("blog"); + assert "blog".equals(blogStrategy.getType()) : "blog strategy type mismatch"; + + CrawlStrategy newsStrategy = StrategyFactory.getStrategy("news"); + assert "news".equals(newsStrategy.getType()) : "news strategy type mismatch"; + + System.out.println(" ✓ StrategyFactory loads strategies via SPI"); + System.out.println(" ✓ Supported types: " + StrategyFactory.getSupportedTypes()); + } + + private static void testArticleRepositoryImmutable() { + System.out.println("\n2. Testing ArticleRepository immutability..."); + + ArticleRepository repository = new ArticleRepository(); + repository.add(new Article("Test", "http://test.com", "content")); + + List
articles = repository.findAll(); + + try { + articles.add(new Article("Should Not Add", "http://fail.com", "fail")); + assert false : "Should have thrown UnsupportedOperationException"; + } catch (UnsupportedOperationException e) { + System.out.println(" ✓ getAll() returns immutable view"); + } + + assert repository.count() == 1 : "Repository should have 1 article"; + System.out.println(" ✓ Repository count is correct"); + } + + private static void testCrawlToListFlow() { + System.out.println("\n3. Testing crawl → list flow..."); + + ConsoleView view = new ConsoleView(); + ArticleRepository repository = new ArticleRepository(); + CommandContext context = new CommandContext(repository); + + CrawlCommand crawlCommand = new CrawlCommand(view); + ListCommand listCommand = new ListCommand(view); + + crawlCommand.execute(new String[]{"crawl", "blog", "http://example.com"}, context); + assert repository.count() == 2 : "Should have 2 blog articles"; + System.out.println(" ✓ Crawl blog strategy: " + repository.count() + " articles"); + + crawlCommand.execute(new String[]{"crawl", "news", "http://news.com"}, context); + assert repository.count() == 5 : "Should have 5 articles total (2 blog + 3 news)"; + System.out.println(" ✓ Crawl news strategy: " + repository.count() + " articles total"); + + listCommand.execute(new String[]{"list"}, context); + System.out.println(" ✓ List command executed successfully"); + } +} diff --git a/w11/src/main/java/com/example/datacollect/test/RealCrawlTest.java b/w11/src/main/java/com/example/datacollect/test/RealCrawlTest.java new file mode 100644 index 0000000..ec06d88 --- /dev/null +++ b/w11/src/main/java/com/example/datacollect/test/RealCrawlTest.java @@ -0,0 +1,53 @@ +package com.example.datacollect.test; + +import com.example.datacollect.command.CommandContext; +import com.example.datacollect.command.CrawlCommand; +import com.example.datacollect.command.ListCommand; +import com.example.datacollect.repository.ArticleRepository; +import com.example.datacollect.view.ConsoleView; + +public class RealCrawlTest { + + public static void main(String[] args) { + ConsoleView view = new ConsoleView(); + ArticleRepository repository = new ArticleRepository(); + CommandContext context = new CommandContext(repository); + + System.out.println("========================================"); + System.out.println(" 测试真实网页爬取功能"); + System.out.println("========================================"); + System.out.println(); + + CrawlCommand crawlCommand = new CrawlCommand(view); + + // 测试真实爬取(使用真实博客网站) + System.out.println("【测试1】爬取 Baeldung 博客"); + System.out.println("-".repeat(40)); + try { + crawlCommand.execute(new String[]{"crawl", "real", "https://www.baeldung.com/"}, context); + } catch (Exception e) { + System.out.println("爬取失败: " + e.getMessage()); + } + + System.out.println(); + System.out.println("【测试2】爬取今日头条"); + System.out.println("-".repeat(40)); + try { + crawlCommand.execute(new String[]{"crawl", "real", "https://www.toutiao.com/"}, context); + } catch (Exception e) { + System.out.println("爬取失败: " + e.getMessage()); + } + + System.out.println(); + System.out.println("【爬取结果】"); + System.out.println("-".repeat(40)); + + ListCommand listCommand = new ListCommand(view); + listCommand.execute(new String[]{"list"}, context); + + System.out.println(); + System.out.println("========================================"); + System.out.println(" 测试完成!"); + System.out.println("========================================"); + } +} \ No newline at end of file diff --git a/w11/src/main/java/com/example/datacollect/test/ToutiaoTest.java b/w11/src/main/java/com/example/datacollect/test/ToutiaoTest.java new file mode 100644 index 0000000..1ff02d5 --- /dev/null +++ b/w11/src/main/java/com/example/datacollect/test/ToutiaoTest.java @@ -0,0 +1,37 @@ +package com.example.datacollect.test; + +import com.example.datacollect.command.CommandContext; +import com.example.datacollect.command.CrawlCommand; +import com.example.datacollect.command.ListCommand; +import com.example.datacollect.repository.ArticleRepository; +import com.example.datacollect.view.ConsoleView; + +public class ToutiaoTest { + + public static void main(String[] args) { + ConsoleView view = new ConsoleView(); + ArticleRepository repository = new ArticleRepository(); + CommandContext context = new CommandContext(repository); + + System.out.println("========================================"); + System.out.println(" 测试今日头条爬取"); + System.out.println("========================================"); + System.out.println(); + + // 测试爬取今日头条 + CrawlCommand crawlCommand = new CrawlCommand(view); + crawlCommand.execute(new String[]{"crawl", "news", "https://www.toutiao.com/"}, context); + + System.out.println(); + System.out.println("【爬取结果】"); + System.out.println("-".repeat(40)); + + ListCommand listCommand = new ListCommand(view); + listCommand.execute(new String[]{"list"}, context); + + System.out.println(); + System.out.println("========================================"); + System.out.println(" 测试完成!"); + System.out.println("========================================"); + } +} \ No newline at end of file diff --git a/w11/src/main/java/com/example/datacollect/view/ConsoleView.java b/w11/src/main/java/com/example/datacollect/view/ConsoleView.java new file mode 100644 index 0000000..3c1d47a --- /dev/null +++ b/w11/src/main/java/com/example/datacollect/view/ConsoleView.java @@ -0,0 +1,42 @@ +package com.example.datacollect.view; + +import com.example.datacollect.model.Article; +import java.util.List; +import java.util.Scanner; + +public class ConsoleView { + private static final String ANSI_RESET = "\u001B[0m"; + private static final String ANSI_GREEN = "\u001B[32m"; + private static final String ANSI_RED = "\u001B[31m"; + private static final String ANSI_BLUE = "\u001B[34m"; + + private final Scanner scanner = new Scanner(System.in); + + public String readLine() { + System.out.print("> "); + return scanner.nextLine(); + } + + public void printSuccess(String msg) { + System.out.println(ANSI_GREEN + msg + ANSI_RESET); + } + + public void printError(String msg) { + System.out.println(ANSI_RED + msg + ANSI_RESET); + } + + public void printInfo(String msg) { + System.out.println(ANSI_BLUE + msg + ANSI_RESET); + } + + public void display(List
articles) { + if (articles.isEmpty()) { + printInfo("暂无文章,请先执行 crawl。"); + return; + } + for (int i = 0; i < articles.size(); i++) { + Article a = articles.get(i); + System.out.println((i + 1) + ". " + a.getTitle() + " | " + a.getUrl()); + } + } +} diff --git a/w11/src/main/resources/META-INF/services/com.example.datacollect.strategy.CrawlStrategy b/w11/src/main/resources/META-INF/services/com.example.datacollect.strategy.CrawlStrategy new file mode 100644 index 0000000..d3d7287 --- /dev/null +++ b/w11/src/main/resources/META-INF/services/com.example.datacollect.strategy.CrawlStrategy @@ -0,0 +1,3 @@ +com.example.datacollect.strategy.BlogStrategy +com.example.datacollect.strategy.NewsStrategy +com.example.datacollect.strategy.RealCrawlStrategy diff --git a/w11/src/main/resources/logback.xml b/w11/src/main/resources/logback.xml new file mode 100644 index 0000000..716d4dc --- /dev/null +++ b/w11/src/main/resources/logback.xml @@ -0,0 +1,45 @@ + + + + + %d{yyyy-MM-dd HH:mm:ss} [%thread] %-5level %logger{36} - %msg%n + + + + + + logs/application.log + + %d{yyyy-MM-dd HH:mm:ss} [%thread] %-5level %logger{36} - %msg%n + + + + + + logs/application.log + + logs/application.%d{yyyy-MM-dd}.%i.log + 30 + + 10MB + + + + %d{yyyy-MM-dd HH:mm:ss} [%thread] %-5level %logger{36} - %msg%n + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/w11/target/classes/META-INF/services/com.example.datacollect.strategy.CrawlStrategy b/w11/target/classes/META-INF/services/com.example.datacollect.strategy.CrawlStrategy new file mode 100644 index 0000000..d3d7287 --- /dev/null +++ b/w11/target/classes/META-INF/services/com.example.datacollect.strategy.CrawlStrategy @@ -0,0 +1,3 @@ +com.example.datacollect.strategy.BlogStrategy +com.example.datacollect.strategy.NewsStrategy +com.example.datacollect.strategy.RealCrawlStrategy diff --git a/w11/target/classes/logback.xml b/w11/target/classes/logback.xml new file mode 100644 index 0000000..716d4dc --- /dev/null +++ b/w11/target/classes/logback.xml @@ -0,0 +1,45 @@ + + + + + %d{yyyy-MM-dd HH:mm:ss} [%thread] %-5level %logger{36} - %msg%n + + + + + + logs/application.log + + %d{yyyy-MM-dd HH:mm:ss} [%thread] %-5level %logger{36} - %msg%n + + + + + + logs/application.log + + logs/application.%d{yyyy-MM-dd}.%i.log + 30 + + 10MB + + + + %d{yyyy-MM-dd HH:mm:ss} [%thread] %-5level %logger{36} - %msg%n + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/w11/target/maven-archiver/pom.properties b/w11/target/maven-archiver/pom.properties new file mode 100644 index 0000000..5c1de34 --- /dev/null +++ b/w11/target/maven-archiver/pom.properties @@ -0,0 +1,3 @@ +artifactId=datacollect-cli +groupId=com.example +version=0.1.0 diff --git a/w11/target/maven-status/maven-compiler-plugin/compile/default-compile/createdFiles.lst b/w11/target/maven-status/maven-compiler-plugin/compile/default-compile/createdFiles.lst new file mode 100644 index 0000000..a7e982e --- /dev/null +++ b/w11/target/maven-status/maven-compiler-plugin/compile/default-compile/createdFiles.lst @@ -0,0 +1,20 @@ +com\example\datacollect\command\ListCommand.class +com\example\datacollect\command\CrawlCommand.class +com\example\datacollect\view\ConsoleView.class +com\example\datacollect\test\ToutiaoTest.class +com\example\datacollect\strategy\NewsStrategy.class +com\example\datacollect\command\CommandContext.class +com\example\datacollect\command\Command.class +com\example\datacollect\test\CrawlerTest.class +com\example\datacollect\test\CrawlerDemo.class +com\example\datacollect\strategy\CrawlStrategy.class +com\example\datacollect\model\Article.class +com\example\datacollect\strategy\WebCrawler.class +com\example\datacollect\strategy\BlogStrategy.class +com\example\datacollect\repository\ArticleRepository.class +com\example\datacollect\Main.class +com\example\datacollect\command\ExitCommand.class +com\example\datacollect\command\HelpCommand.class +com\example\datacollect\command\HistoryCommand.class +com\example\datacollect\controller\CrawlerController.class +com\example\datacollect\strategy\StrategyFactory.class diff --git a/w11/target/maven-status/maven-compiler-plugin/compile/default-compile/inputFiles.lst b/w11/target/maven-status/maven-compiler-plugin/compile/default-compile/inputFiles.lst new file mode 100644 index 0000000..77c6c35 --- /dev/null +++ b/w11/target/maven-status/maven-compiler-plugin/compile/default-compile/inputFiles.lst @@ -0,0 +1,25 @@ +C:\Users\ruiruirui\java\w11\src\main\java\com\example\datacollect\strategy\WebCrawler.java +C:\Users\ruiruirui\java\w11\src\main\java\com\example\datacollect\exception\ParseException.java +C:\Users\ruiruirui\java\w11\src\main\java\com\example\datacollect\command\CrawlCommand.java +C:\Users\ruiruirui\java\w11\src\main\java\com\example\datacollect\test\RealCrawlTest.java +C:\Users\ruiruirui\java\w11\src\main\java\com\example\datacollect\command\ExitCommand.java +C:\Users\ruiruirui\java\w11\src\main\java\com\example\datacollect\Main.java +C:\Users\ruiruirui\java\w11\src\main\java\com\example\datacollect\strategy\CrawlStrategy.java +C:\Users\ruiruirui\java\w11\src\main\java\com\example\datacollect\strategy\BlogStrategy.java +C:\Users\ruiruirui\java\w11\src\main\java\com\example\datacollect\test\CrawlerDemo.java +C:\Users\ruiruirui\java\w11\src\main\java\com\example\datacollect\command\CommandContext.java +C:\Users\ruiruirui\java\w11\src\main\java\com\example\datacollect\model\Article.java +C:\Users\ruiruirui\java\w11\src\main\java\com\example\datacollect\strategy\NewsStrategy.java +C:\Users\ruiruirui\java\w11\src\main\java\com\example\datacollect\exception\CrawlerException.java +C:\Users\ruiruirui\java\w11\src\main\java\com\example\datacollect\strategy\StrategyFactory.java +C:\Users\ruiruirui\java\w11\src\main\java\com\example\datacollect\repository\ArticleRepository.java +C:\Users\ruiruirui\java\w11\src\main\java\com\example\datacollect\test\ToutiaoTest.java +C:\Users\ruiruirui\java\w11\src\main\java\com\example\datacollect\command\HistoryCommand.java +C:\Users\ruiruirui\java\w11\src\main\java\com\example\datacollect\exception\NetworkException.java +C:\Users\ruiruirui\java\w11\src\main\java\com\example\datacollect\command\Command.java +C:\Users\ruiruirui\java\w11\src\main\java\com\example\datacollect\controller\CrawlerController.java +C:\Users\ruiruirui\java\w11\src\main\java\com\example\datacollect\command\HelpCommand.java +C:\Users\ruiruirui\java\w11\src\main\java\com\example\datacollect\view\ConsoleView.java +C:\Users\ruiruirui\java\w11\src\main\java\com\example\datacollect\test\CrawlerTest.java +C:\Users\ruiruirui\java\w11\src\main\java\com\example\datacollect\command\ListCommand.java +C:\Users\ruiruirui\java\w11\src\main\java\com\example\datacollect\strategy\RealCrawlStrategy.java diff --git a/w11/test_crawler.bat b/w11/test_crawler.bat new file mode 100644 index 0000000..36a56fa --- /dev/null +++ b/w11/test_crawler.bat @@ -0,0 +1,26 @@ +@echo off +echo ==================================== +echo 爬虫功能测试脚本 +echo ==================================== +echo. + +echo [测试1] 运行单元测试... +java -cp target/datacollect-cli-0.1.0-jar-with-dependencies.jar com.example.datacollect.test.CrawlerTest +if %errorlevel% neq 0 ( + echo 单元测试失败! + exit /b 1 +) +echo. + +echo [测试2] 测试策略工厂... +echo 支持的策略类型: +java -cp target/datacollect-cli-0.1.0-jar-with-dependencies.jar -c "import com.example.datacollect.strategy.StrategyFactory; System.out.println(StrategyFactory.getSupportedTypes());" 2>nul || echo (需要JShell支持) +echo. + +echo [测试3] 测试Blog策略爬取... +java -cp target/datacollect-cli-0.1.0-jar-with-dependencies.jar com.example.datacollect.test.CrawlerTest +echo. + +echo ==================================== +echo 测试完成! +echo ==================================== \ No newline at end of file diff --git a/w12/.vscode/launch.json b/w12/.vscode/launch.json new file mode 100644 index 0000000..1fda4c7 --- /dev/null +++ b/w12/.vscode/launch.json @@ -0,0 +1,29 @@ +{ + // 使用 IntelliSense 了解相关属性。 + // 悬停以查看现有属性的描述。 + // 欲了解更多信息,请访问: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + + { + "type": "java", + "name": "Current File", + "request": "launch", + "mainClass": "${file}" + }, + { + "type": "java", + "name": "WeiboStarHotSearcha", + "request": "launch", + "mainClass": "WeiboStarHotSearcha", + "projectName": "weibo-hotsearch" + }, + { + "type": "java", + "name": "Main", + "request": "launch", + "mainClass": "com.weibo.hotsearch.Main", + "projectName": "weibo-hotsearch" + } + ] +} \ No newline at end of file diff --git a/w12/.vscode/settings.json b/w12/.vscode/settings.json new file mode 100644 index 0000000..c5f3f6b --- /dev/null +++ b/w12/.vscode/settings.json @@ -0,0 +1,3 @@ +{ + "java.configuration.updateBuildConfiguration": "interactive" +} \ No newline at end of file diff --git a/w12/202506050225-毕磊-高级程序设计实验报告.docx b/w12/202506050225-毕磊-高级程序设计实验报告.docx new file mode 100644 index 0000000..7ff03ff Binary files /dev/null and b/w12/202506050225-毕磊-高级程序设计实验报告.docx differ diff --git a/w12/hotsearch_results/hotsearch_20260528_140606.txt b/w12/hotsearch_results/hotsearch_20260528_140606.txt new file mode 100644 index 0000000..b6152b9 --- /dev/null +++ b/w12/hotsearch_results/hotsearch_20260528_140606.txt @@ -0,0 +1,56 @@ +热搜数据采集报告 +================ +采集时间: 2026年05月28日 14:06:04 + +【微博热搜数据】 + +--- 明星相关热搜 --- +排名:6 热度:414478 热搜:VOGUE直播间两位明星不出镜 +排名:32 热度:373370 热搜:歌手第二期歌单 +排名:34 热度:370959 热搜:小红书官宣获得世界杯版权 + +明星相关热搜总数:3 条 + +--- 体育相关热搜 --- +排名:34 热度:370959 热搜:小红书官宣获得世界杯版权 + +体育相关热搜总数:1 条 + +--- 国家政策相关热搜 --- +排名:37 热度:364897 热搜:双汇发布致歉声明 + +国家政策相关热搜总数:1 条 + +【百度贴吧热搜数据】 + +--- 明星相关热搜 --- + +明星相关热搜总数:0 条 +当前贴吧热搜暂无明星相关内容 + +--- 体育相关热搜 --- + +体育相关热搜总数:0 条 +当前贴吧热搜暂无体育相关内容 + +--- 国家政策相关热搜 --- + +国家政策相关热搜总数:0 条 +当前贴吧热搜暂无国家政策相关内容 + +【知乎热搜数据】 + +--- 明星相关热搜 --- +排名:6 热度:0 热搜:动作演员吴樾为什么火不起来 + +明星相关热搜总数:1 条 + +--- 体育相关热搜 --- + +体育相关热搜总数:0 条 +当前知乎热搜暂无体育相关内容 + +--- 国家政策相关热搜 --- + +国家政策相关热搜总数:0 条 +当前知乎热搜暂无国家政策相关内容 diff --git a/w12/hotsearch_results/hotsearch_20260531_121726.txt b/w12/hotsearch_results/hotsearch_20260531_121726.txt new file mode 100644 index 0000000..1405ade --- /dev/null +++ b/w12/hotsearch_results/hotsearch_20260531_121726.txt @@ -0,0 +1,60 @@ +热搜数据采集报告 +================ +采集时间: 2026年05月31日 12:17:24 + +【微博热搜数据】 + +--- 明星相关热搜 --- +排名:43 热度:186709 热搜:敖瑞鹏新剧演鞠婧祎哥哥 + +明星相关热搜总数:1 条 + +--- 体育相关热搜 --- +排名:10 热度:387068 热搜:陈星旭王玉雯一起去看欧冠了 +排名:21 热度:261492 热搜:姆巴佩欧冠金靴 +排名:23 热度:250168 热搜:文班亚马西决MVP +排名:37 热度:191114 热搜:文班亚马回应成为西部决赛MVP +排名:41 热度:188900 热搜:巴黎欧冠夺冠后多地爆发骚乱 +排名:42 热度:188188 热搜:孙千去看欧冠了 +排名:49 热度:172821 热搜:世界杯 + +体育相关热搜总数:7 条 + +--- 国家政策相关热搜 --- + +国家政策相关热搜总数:0 条 +当前微博热搜暂无国家政策相关内容 + +【百度贴吧热搜数据】 + +--- 明星相关热搜 --- + +明星相关热搜总数:0 条 +当前贴吧热搜暂无明星相关内容 + +--- 体育相关热搜 --- + +体育相关热搜总数:0 条 +当前贴吧热搜暂无体育相关内容 + +--- 国家政策相关热搜 --- + +国家政策相关热搜总数:0 条 +当前贴吧热搜暂无国家政策相关内容 + +【知乎热搜数据】 + +--- 明星相关热搜 --- + +明星相关热搜总数:0 条 +当前知乎热搜暂无明星相关内容 + +--- 体育相关热搜 --- + +体育相关热搜总数:0 条 +当前知乎热搜暂无体育相关内容 + +--- 国家政策相关热搜 --- + +国家政策相关热搜总数:0 条 +当前知乎热搜暂无国家政策相关内容 diff --git a/w12/总代码/.vscode/settings.json b/w12/总代码/.vscode/settings.json new file mode 100644 index 0000000..8e9c83c --- /dev/null +++ b/w12/总代码/.vscode/settings.json @@ -0,0 +1,4 @@ +{ + "git.ignoreLimitWarning": true, + "java.debug.settings.onBuildFailureProceed": true +} \ No newline at end of file diff --git a/w12/总代码/pom.xml b/w12/总代码/pom.xml new file mode 100644 index 0000000..264f219 --- /dev/null +++ b/w12/总代码/pom.xml @@ -0,0 +1,76 @@ + + + 4.0.0 + + com.weibo + hotsearch + 1.0.0 + jar + + HotSearch CLI Tool + 热搜数据采集工具 - CLI + MVC + Command + Strategy + Exception体系 + + + 11 + 11 + UTF-8 + + + + + org.jsoup + jsoup + 1.17.2 + + + + org.apache.httpcomponents.client5 + httpclient5 + 5.3.1 + + + + org.apache.httpcomponents.client5 + httpclient5-fluent + 5.3.1 + + + + com.alibaba.fastjson2 + fastjson2 + 2.0.52 + + + + + + + org.apache.maven.plugins + maven-assembly-plugin + 3.6.0 + + + + com.weibo.hotsearch.Main + + + + jar-with-dependencies + + + + + make-assembly + package + + single + + + + + + + + \ No newline at end of file diff --git a/w12/总代码/src/main/java/WeiboStarHotSearcha.java b/w12/总代码/src/main/java/WeiboStarHotSearcha.java new file mode 100644 index 0000000..a57d190 --- /dev/null +++ b/w12/总代码/src/main/java/WeiboStarHotSearcha.java @@ -0,0 +1,667 @@ +import com.alibaba.fastjson2.JSONArray; +import com.alibaba.fastjson2.JSONObject; +import org.apache.hc.client5.http.fluent.Request; +import org.apache.hc.core5.util.Timeout; + +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.io.UnsupportedEncodingException; +import java.net.URLEncoder; +import java.nio.charset.StandardCharsets; +import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.Date; +import java.util.List; + +public class WeiboStarHotSearcha { + + private static final String WEIBO_HOT_URL = "https://weibo.com/ajax/side/hotSearch"; + private static final String TIEBA_HOT_URL = "https://tieba.baidu.com/hottopic/browse/topicList"; + private static final String ZHIHU_HOT_URL = "https://www.zhihu.com/api/v4/search/top_search"; + private static final String OUTPUT_DIR = "hotsearch_results"; + + private static final String[] STAR_KEYWORDS = { + "明星", "演员", "歌手", "爱豆", "艺人", "红毯", "综艺", "新剧", + "恋情", "官宣", "演唱会", "代言", "造型", "封面" + }; + + private static final String[] SPORTS_KEYWORDS = { + "足球", "篮球", "世界杯", "NBA", "CBA", "奥运会", "世锦赛", + "冠军", "比赛", "夺冠", "进球", "比分", "运动员", "国足", + "乒乓", "排球", "羽毛球", "游泳", "田径", "体操", "跳水", + "MVP", "转会", "联赛", "中超", "英超", "西甲", "欧冠" + }; + + private static final String[] POLICY_KEYWORDS = { + "政策", "新规", "条例", "法规", "通知", "公告", "发布", + "国务院", "发改委", "财政部", "教育部", "工信部", "科技部", + "税收", "补贴", "优惠", "扶持", "改革", "开放", "创新", + "十四五", "计划", "规划", "方案", "意见", "办法", "细则", + "经济", "金融", "市场", "监管", "安全", "环保", "绿色" + }; + + private static final int CONNECT_TIMEOUT = 10000; + private static final int RESPONSE_TIMEOUT = 10000; + private static final int MAX_RETRIES = 3; + + private static final StringBuilder outputBuilder = new StringBuilder(); + + public static void main(String[] args) { + try { + outputBuilder.append("热搜数据采集报告\n"); + outputBuilder.append("================\n"); + outputBuilder.append("采集时间: ").append(getCurrentTime()).append("\n\n"); + + // ========== 微博热搜 ========== + System.out.println("正在请求微博热搜数据..."); + outputBuilder.append("【微博热搜数据】\n"); + + String weiboJson = fetchWithRetry(WEIBO_HOT_URL, MAX_RETRIES, "https://weibo.com/"); + + if (weiboJson == null || weiboJson.isEmpty()) { + System.out.println("获取微博热搜数据失败"); + outputBuilder.append("获取微博热搜数据失败\n\n"); + } else { + parseAndFilterWeibo(weiboJson); + } + + // ========== 百度贴吧热搜 ========== + System.out.println("\n\n正在请求百度贴吧热搜数据..."); + outputBuilder.append("\n【百度贴吧热搜数据】\n"); + + String tiebaJson = fetchWithRetry(TIEBA_HOT_URL, MAX_RETRIES, "https://tieba.baidu.com/"); + + if (tiebaJson == null || tiebaJson.isEmpty()) { + System.out.println("获取百度贴吧热搜数据失败"); + outputBuilder.append("获取百度贴吧热搜数据失败\n"); + } else { + parseAndFilterTieba(tiebaJson); + } + + // ========== 知乎热搜 ========== + System.out.println("\n\n正在请求知乎热搜数据..."); + outputBuilder.append("\n【知乎热搜数据】\n"); + + try { + String zhihuJson = fetchWithRetry(ZHIHU_HOT_URL, MAX_RETRIES, "https://zhuanlan.zhihu.com/"); + + if (zhihuJson == null || zhihuJson.isEmpty()) { + System.out.println("获取知乎热搜数据失败"); + outputBuilder.append("获取知乎热搜数据失败\n"); + } else { + System.out.println("知乎返回数据长度: " + zhihuJson.length() + " 字符"); + if (zhihuJson.length() > 0) { + System.out.println("知乎返回数据预览: " + zhihuJson.substring(0, Math.min(500, zhihuJson.length())) + "..."); + } + parseAndFilterZhihu(zhihuJson); + } + } catch (Exception e) { + System.out.println("获取知乎热搜数据异常: " + e.getMessage()); + outputBuilder.append("获取知乎热搜数据异常: " + e.getMessage() + "\n"); + e.printStackTrace(); + } + + // ========== 保存到文件 ========== + String filename = saveToFile(); + System.out.println("\n\n结果已保存到文件: " + filename); + + } catch (IOException e) { + System.err.println("网络请求失败: " + e.getMessage()); + e.printStackTrace(); + } catch (Exception e) { + System.err.println("数据解析失败: " + e.getMessage()); + e.printStackTrace(); + } + } + + private static String fetchWithRetry(String url, int maxRetries, String referer) throws IOException { + int retryCount = 0; + IOException lastException = null; + + while (retryCount < maxRetries) { + try { + System.out.println("正在请求: " + url); + String result = Request.get(url) + .addHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36") + .addHeader("Referer", referer) + .addHeader("Accept", "application/json, text/plain, */*;charset=UTF-8") + .addHeader("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8") + .addHeader("Accept-Encoding", "identity") + .addHeader("Connection", "keep-alive") + .addHeader("Content-Type", "application/json;charset=UTF-8") + .connectTimeout(Timeout.ofMilliseconds(CONNECT_TIMEOUT)) + .responseTimeout(Timeout.ofMilliseconds(RESPONSE_TIMEOUT)) + .execute() + .returnContent() + .asString(StandardCharsets.UTF_8); + + // 修复可能的编码问题 + result = new String(result.getBytes(StandardCharsets.UTF_8), StandardCharsets.UTF_8); + return result; + } catch (IOException e) { + lastException = e; + retryCount++; + System.out.println("请求失败 (" + retryCount + "/" + maxRetries + "): " + e.getMessage()); + if (retryCount < maxRetries) { + System.out.println("2秒后重试..."); + try { + Thread.sleep(2000); + } catch (InterruptedException ie) { + Thread.currentThread().interrupt(); + throw new IOException("重试被中断", ie); + } + } + } + } + throw lastException != null ? lastException : new IOException("请求失败"); + } + + private static void parseAndFilterWeibo(String json) { + JSONObject root = JSONObject.parseObject(json); + if (root == null || !root.containsKey("data")) { + System.out.println("微博数据格式错误或接口返回异常"); + outputBuilder.append("数据格式错误或接口返回异常\n"); + return; + } + + JSONObject data = root.getJSONObject("data"); + if (data == null || !data.containsKey("realtime")) { + System.out.println("微博热搜数据为空"); + outputBuilder.append("热搜数据为空\n"); + return; + } + + JSONArray realtime = data.getJSONArray("realtime"); + if (realtime == null || realtime.isEmpty()) { + System.out.println("微博热搜列表为空"); + outputBuilder.append("热搜列表为空\n"); + return; + } + + List starHotList = new ArrayList<>(); + List sportsHotList = new ArrayList<>(); + List policyHotList = new ArrayList<>(); + + System.out.println("\n===== 微博 - 明星相关热搜 ====="); + outputBuilder.append("\n--- 明星相关热搜 ---\n"); + + for (int i = 0; i < realtime.size(); i++) { + JSONObject item = realtime.getJSONObject(i); + if (item == null) continue; + + String word = item.getString("word"); + if (word == null || word.isEmpty()) continue; + + long num = item.getLongValue("num", 0); + int rank = item.getIntValue("rank", 0); + + if (isStarRelated(word)) { + starHotList.add(item); + String line = String.format("排名:%d\t热度:%d\t热搜:%s", rank, num, word); + System.out.println(line); + outputBuilder.append(line).append("\n"); + } + } + String summary = "\n明星相关热搜总数:" + starHotList.size() + " 条"; + System.out.println(summary); + outputBuilder.append(summary).append("\n"); + + if (starHotList.isEmpty()) { + String emptyMsg = "当前微博热搜暂无明星相关内容"; + System.out.println(emptyMsg); + outputBuilder.append(emptyMsg).append("\n"); + } + + System.out.println("\n===== 微博 - 体育相关热搜 ====="); + outputBuilder.append("\n--- 体育相关热搜 ---\n"); + + for (int i = 0; i < realtime.size(); i++) { + JSONObject item = realtime.getJSONObject(i); + if (item == null) continue; + + String word = item.getString("word"); + if (word == null || word.isEmpty()) continue; + + long num = item.getLongValue("num", 0); + int rank = item.getIntValue("rank", 0); + + if (isSportsRelated(word)) { + sportsHotList.add(item); + String line = String.format("排名:%d\t热度:%d\t热搜:%s", rank, num, word); + System.out.println(line); + outputBuilder.append(line).append("\n"); + } + } + summary = "\n体育相关热搜总数:" + sportsHotList.size() + " 条"; + System.out.println(summary); + outputBuilder.append(summary).append("\n"); + + if (sportsHotList.isEmpty()) { + String emptyMsg = "当前微博热搜暂无体育相关内容"; + System.out.println(emptyMsg); + outputBuilder.append(emptyMsg).append("\n"); + } + + System.out.println("\n===== 微博 - 国家政策相关热搜 ====="); + outputBuilder.append("\n--- 国家政策相关热搜 ---\n"); + + for (int i = 0; i < realtime.size(); i++) { + JSONObject item = realtime.getJSONObject(i); + if (item == null) continue; + + String word = item.getString("word"); + if (word == null || word.isEmpty()) continue; + + long num = item.getLongValue("num", 0); + int rank = item.getIntValue("rank", 0); + + if (isPolicyRelated(word)) { + policyHotList.add(item); + String line = String.format("排名:%d\t热度:%d\t热搜:%s", rank, num, word); + System.out.println(line); + outputBuilder.append(line).append("\n"); + } + } + summary = "\n国家政策相关热搜总数:" + policyHotList.size() + " 条"; + System.out.println(summary); + outputBuilder.append(summary).append("\n"); + + if (policyHotList.isEmpty()) { + String emptyMsg = "当前微博热搜暂无国家政策相关内容"; + System.out.println(emptyMsg); + outputBuilder.append(emptyMsg).append("\n"); + } + } + + private static void parseAndFilterTieba(String json) { + JSONObject root = JSONObject.parseObject(json); + if (root == null || !root.containsKey("data")) { + System.out.println("贴吧数据格式错误或接口返回异常"); + outputBuilder.append("数据格式错误或接口返回异常\n"); + return; + } + + JSONObject data = root.getJSONObject("data"); + if (data == null || !data.containsKey("bang_topic")) { + System.out.println("贴吧热搜数据为空"); + outputBuilder.append("热搜数据为空\n"); + return; + } + + JSONArray topics = data.getJSONArray("bang_topic"); + if (topics == null || topics.isEmpty()) { + System.out.println("贴吧热搜列表为空"); + outputBuilder.append("热搜列表为空\n"); + return; + } + + List starHotList = new ArrayList<>(); + List sportsHotList = new ArrayList<>(); + List policyHotList = new ArrayList<>(); + + System.out.println("\n===== 百度贴吧 - 明星相关热搜 ====="); + outputBuilder.append("\n--- 明星相关热搜 ---\n"); + + for (int i = 0; i < topics.size(); i++) { + JSONObject item = topics.getJSONObject(i); + if (item == null) continue; + + String topicName = item.getString("topic_name"); + if (topicName == null || topicName.isEmpty()) continue; + + int discussNum = item.getIntValue("discuss_num", 0); + int readNum = item.getIntValue("read_num", 0); + + if (isStarRelated(topicName)) { + starHotList.add(item); + String line = String.format("序号:%d\t阅读:%d\t讨论:%d\t话题:%s", i + 1, readNum, discussNum, topicName); + System.out.println(line); + outputBuilder.append(line).append("\n"); + } + } + String summary = "\n明星相关热搜总数:" + starHotList.size() + " 条"; + System.out.println(summary); + outputBuilder.append(summary).append("\n"); + + if (starHotList.isEmpty()) { + String emptyMsg = "当前贴吧热搜暂无明星相关内容"; + System.out.println(emptyMsg); + outputBuilder.append(emptyMsg).append("\n"); + } + + System.out.println("\n===== 百度贴吧 - 体育相关热搜 ====="); + outputBuilder.append("\n--- 体育相关热搜 ---\n"); + + for (int i = 0; i < topics.size(); i++) { + JSONObject item = topics.getJSONObject(i); + if (item == null) continue; + + String topicName = item.getString("topic_name"); + if (topicName == null || topicName.isEmpty()) continue; + + int discussNum = item.getIntValue("discuss_num", 0); + int readNum = item.getIntValue("read_num", 0); + + if (isSportsRelated(topicName)) { + sportsHotList.add(item); + String line = String.format("序号:%d\t阅读:%d\t讨论:%d\t话题:%s", i + 1, readNum, discussNum, topicName); + System.out.println(line); + outputBuilder.append(line).append("\n"); + } + } + summary = "\n体育相关热搜总数:" + sportsHotList.size() + " 条"; + System.out.println(summary); + outputBuilder.append(summary).append("\n"); + + if (sportsHotList.isEmpty()) { + String emptyMsg = "当前贴吧热搜暂无体育相关内容"; + System.out.println(emptyMsg); + outputBuilder.append(emptyMsg).append("\n"); + } + + System.out.println("\n===== 百度贴吧 - 国家政策相关热搜 ====="); + outputBuilder.append("\n--- 国家政策相关热搜 ---\n"); + + for (int i = 0; i < topics.size(); i++) { + JSONObject item = topics.getJSONObject(i); + if (item == null) continue; + + String topicName = item.getString("topic_name"); + if (topicName == null || topicName.isEmpty()) continue; + + int discussNum = item.getIntValue("discuss_num", 0); + int readNum = item.getIntValue("read_num", 0); + + if (isPolicyRelated(topicName)) { + policyHotList.add(item); + String line = String.format("序号:%d\t阅读:%d\t讨论:%d\t话题:%s", i + 1, readNum, discussNum, topicName); + System.out.println(line); + outputBuilder.append(line).append("\n"); + } + } + summary = "\n国家政策相关热搜总数:" + policyHotList.size() + " 条"; + System.out.println(summary); + outputBuilder.append(summary).append("\n"); + + if (policyHotList.isEmpty()) { + String emptyMsg = "当前贴吧热搜暂无国家政策相关内容"; + System.out.println(emptyMsg); + outputBuilder.append(emptyMsg).append("\n"); + } + } + + private static void parseAndFilterZhihu(String json) { + try { + JSONObject root = JSONObject.parseObject(json); + if (root == null) { + System.out.println("知乎数据格式错误:无法解析JSON"); + outputBuilder.append("数据格式错误:无法解析JSON\n"); + return; + } + + // 尝试多种数据结构 + JSONArray data = null; + + // 结构1:直接在 data 数组中 + if (root.containsKey("data") && root.get("data") instanceof JSONArray) { + data = root.getJSONArray("data"); + } + // 结构2:在 data.topics 数组中 + else if (root.containsKey("data")) { + JSONObject dataObj = root.getJSONObject("data"); + if (dataObj != null && dataObj.containsKey("topics")) { + data = dataObj.getJSONArray("topics"); + } + } + // 结构3:在 top_search.words 数组中(知乎搜索API) + else if (root.containsKey("top_search")) { + JSONObject topSearch = root.getJSONObject("top_search"); + if (topSearch != null && topSearch.containsKey("words")) { + data = topSearch.getJSONArray("words"); + } + } + // 结构4:直接是数组 + else if (json.startsWith("[")) { + data = JSONArray.parseArray(json); + } + + if (data == null || data.isEmpty()) { + System.out.println("知乎热搜数据为空或格式不匹配"); + outputBuilder.append("热搜数据为空或格式不匹配\n"); + return; + } + + List starHotList = new ArrayList<>(); + List sportsHotList = new ArrayList<>(); + List policyHotList = new ArrayList<>(); + + System.out.println("\n===== 知乎 - 明星相关热搜 ====="); + outputBuilder.append("\n--- 明星相关热搜 ---\n"); + + for (int i = 0; i < data.size(); i++) { + JSONObject item = data.getJSONObject(i); + if (item == null) continue; + + // 尝试多种标题字段 + String title = null; + if (item.containsKey("title")) { + title = item.getString("title"); + } else if (item.containsKey("topic_title")) { + title = item.getString("topic_title"); + } else if (item.containsKey("name")) { + title = item.getString("name"); + } else if (item.containsKey("target")) { + JSONObject target = item.getJSONObject("target"); + if (target != null) { + title = target.getString("title"); + } + } else if (item.containsKey("display_query")) { + title = item.getString("display_query"); + } else if (item.containsKey("query")) { + title = item.getString("query"); + } + + if (title == null || title.isEmpty()) continue; + + // 尝试获取热度值 + long hotValue = 0; + if (item.containsKey("hot_score")) { + hotValue = item.getLongValue("hot_score", 0); + } else if (item.containsKey("score")) { + hotValue = item.getLongValue("score", 0); + } else if (item.containsKey("detail_text")) { + String detailText = item.getString("detail_text"); + if (detailText != null) { + try { + String numStr = detailText.replaceAll("[^0-9]", ""); + if (!numStr.isEmpty()) { + hotValue = Long.parseLong(numStr); + } + } catch (Exception e) { + hotValue = 0; + } + } + } + + int rank = i + 1; + + if (isStarRelated(title)) { + starHotList.add(item); + String line = String.format("排名:%d\t热度:%d\t热搜:%s", rank, hotValue, title); + System.out.println(line); + outputBuilder.append(line).append("\n"); + } + } + String summary = "\n明星相关热搜总数:" + starHotList.size() + " 条"; + System.out.println(summary); + outputBuilder.append(summary).append("\n"); + + if (starHotList.isEmpty()) { + String emptyMsg = "当前知乎热搜暂无明星相关内容"; + System.out.println(emptyMsg); + outputBuilder.append(emptyMsg).append("\n"); + } + + System.out.println("\n===== 知乎 - 体育相关热搜 ====="); + outputBuilder.append("\n--- 体育相关热搜 ---\n"); + + for (int i = 0; i < data.size(); i++) { + JSONObject item = data.getJSONObject(i); + if (item == null) continue; + + String title = null; + if (item.containsKey("title")) { + title = item.getString("title"); + } else if (item.containsKey("topic_title")) { + title = item.getString("topic_title"); + } else if (item.containsKey("name")) { + title = item.getString("name"); + } else if (item.containsKey("target")) { + JSONObject target = item.getJSONObject("target"); + if (target != null) { + title = target.getString("title"); + } + } + + if (title == null || title.isEmpty()) continue; + + long hotValue = 0; + if (item.containsKey("hot_score")) { + hotValue = item.getLongValue("hot_score", 0); + } else if (item.containsKey("score")) { + hotValue = item.getLongValue("score", 0); + } + + int rank = i + 1; + + if (isSportsRelated(title)) { + sportsHotList.add(item); + String line = String.format("排名:%d\t热度:%d\t热搜:%s", rank, hotValue, title); + System.out.println(line); + outputBuilder.append(line).append("\n"); + } + } + summary = "\n体育相关热搜总数:" + sportsHotList.size() + " 条"; + System.out.println(summary); + outputBuilder.append(summary).append("\n"); + + if (sportsHotList.isEmpty()) { + String emptyMsg = "当前知乎热搜暂无体育相关内容"; + System.out.println(emptyMsg); + outputBuilder.append(emptyMsg).append("\n"); + } + + System.out.println("\n===== 知乎 - 国家政策相关热搜 ====="); + outputBuilder.append("\n--- 国家政策相关热搜 ---\n"); + + for (int i = 0; i < data.size(); i++) { + JSONObject item = data.getJSONObject(i); + if (item == null) continue; + + String title = null; + if (item.containsKey("title")) { + title = item.getString("title"); + } else if (item.containsKey("topic_title")) { + title = item.getString("topic_title"); + } else if (item.containsKey("name")) { + title = item.getString("name"); + } else if (item.containsKey("target")) { + JSONObject target = item.getJSONObject("target"); + if (target != null) { + title = target.getString("title"); + } + } + + if (title == null || title.isEmpty()) continue; + + long hotValue = 0; + if (item.containsKey("hot_score")) { + hotValue = item.getLongValue("hot_score", 0); + } else if (item.containsKey("score")) { + hotValue = item.getLongValue("score", 0); + } + + int rank = i + 1; + + if (isPolicyRelated(title)) { + policyHotList.add(item); + String line = String.format("排名:%d\t热度:%d\t热搜:%s", rank, hotValue, title); + System.out.println(line); + outputBuilder.append(line).append("\n"); + } + } + summary = "\n国家政策相关热搜总数:" + policyHotList.size() + " 条"; + System.out.println(summary); + outputBuilder.append(summary).append("\n"); + + if (policyHotList.isEmpty()) { + String emptyMsg = "当前知乎热搜暂无国家政策相关内容"; + System.out.println(emptyMsg); + outputBuilder.append(emptyMsg).append("\n"); + } + } catch (Exception e) { + System.out.println("知乎数据解析异常: " + e.getMessage()); + outputBuilder.append("数据解析异常: " + e.getMessage() + "\n"); + e.printStackTrace(); + } + } + + private static boolean isStarRelated(String word) { + if (word == null || word.isEmpty()) { + return false; + } + for (String keyword : STAR_KEYWORDS) { + if (word.contains(keyword)) { + return true; + } + } + return false; + } + + private static boolean isSportsRelated(String word) { + if (word == null || word.isEmpty()) { + return false; + } + for (String keyword : SPORTS_KEYWORDS) { + if (word.contains(keyword)) { + return true; + } + } + return false; + } + + private static boolean isPolicyRelated(String word) { + if (word == null || word.isEmpty()) { + return false; + } + for (String keyword : POLICY_KEYWORDS) { + if (word.contains(keyword)) { + return true; + } + } + return false; + } + + private static String getCurrentTime() { + return new SimpleDateFormat("yyyy年MM月dd日 HH:mm:ss").format(new Date()); + } + + private static String saveToFile() throws IOException { + File dir = new File(OUTPUT_DIR); + if (!dir.exists()) { + dir.mkdirs(); + } + + String timestamp = new SimpleDateFormat("yyyyMMdd_HHmmss").format(new Date()); + String filename = "hotsearch_" + timestamp + ".txt"; + String filepath = OUTPUT_DIR + File.separator + filename; + + try (BufferedWriter writer = new BufferedWriter(new FileWriter(filepath))) { + writer.write(outputBuilder.toString()); + } + + return filepath; + } +} \ No newline at end of file diff --git a/w12/总代码/src/main/java/com/weibo/hotsearch/ConsoleOutputHandler.java b/w12/总代码/src/main/java/com/weibo/hotsearch/ConsoleOutputHandler.java new file mode 100644 index 0000000..a4787cb --- /dev/null +++ b/w12/总代码/src/main/java/com/weibo/hotsearch/ConsoleOutputHandler.java @@ -0,0 +1,30 @@ +package com.weibo.hotsearch; + +import com.alibaba.fastjson2.JSONObject; + +import java.util.List; + +public class ConsoleOutputHandler extends OutputHandler { + + @Override + public void output(List hotList, String filterName) { + System.out.println("\n===== " + filterName + " ====="); + + if (hotList == null || hotList.isEmpty()) { + System.out.println("当前暂无符合条件的热搜内容"); + return; + } + + for (int i = 0; i < hotList.size(); i++) { + JSONObject item = hotList.get(i); + System.out.println(formatHotItem(item, i, null)); + } + + System.out.println("\n===== 热搜总数:" + hotList.size() + " 条 ====="); + } + + @Override + public String getOutputType() { + return "控制台输出"; + } +} \ No newline at end of file diff --git a/w12/总代码/src/main/java/com/weibo/hotsearch/HotSearchFilter.java b/w12/总代码/src/main/java/com/weibo/hotsearch/HotSearchFilter.java new file mode 100644 index 0000000..0b41951 --- /dev/null +++ b/w12/总代码/src/main/java/com/weibo/hotsearch/HotSearchFilter.java @@ -0,0 +1,24 @@ +package com.weibo.hotsearch; + +public abstract class HotSearchFilter { + + protected String[] keywords; + + public HotSearchFilter(String[] keywords) { + this.keywords = keywords; + } + + public boolean matches(String word) { + if (word == null || word.isEmpty()) { + return false; + } + for (String keyword : keywords) { + if (word.contains(keyword)) { + return true; + } + } + return false; + } + + public abstract String getFilterName(); +} \ No newline at end of file diff --git a/w12/总代码/src/main/java/com/weibo/hotsearch/Main.java b/w12/总代码/src/main/java/com/weibo/hotsearch/Main.java new file mode 100644 index 0000000..5073d65 --- /dev/null +++ b/w12/总代码/src/main/java/com/weibo/hotsearch/Main.java @@ -0,0 +1,16 @@ +package com.weibo.hotsearch; + +import com.weibo.hotsearch.cli.CliHandler; + +public class Main { + + public static void main(String[] args) { + try { + CliHandler handler = new CliHandler(args); + handler.handle(); + } catch (Exception e) { + System.err.println("程序执行异常: " + e.getMessage()); + e.printStackTrace(); + } + } +} \ No newline at end of file diff --git a/w12/总代码/src/main/java/com/weibo/hotsearch/OutputHandler.java b/w12/总代码/src/main/java/com/weibo/hotsearch/OutputHandler.java new file mode 100644 index 0000000..f54e352 --- /dev/null +++ b/w12/总代码/src/main/java/com/weibo/hotsearch/OutputHandler.java @@ -0,0 +1,46 @@ +package com.weibo.hotsearch; + +import com.alibaba.fastjson2.JSONObject; + +import java.util.List; + +public abstract class OutputHandler { + + public abstract void output(List hotList, String filterName); + + public abstract String getOutputType(); + + protected String formatHotItem(JSONObject item, int index, String dataSourceName) { + String word = getHotSearchWord(item); + long num = getHotSearchNum(item); + int rank = getHotSearchRank(item); + + if (rank > 0) { + return String.format("排名:%d\t热度:%d\t热搜:%s", rank, num, word); + } else { + return String.format("序号:%d\t热度:%d\t热搜:%s", index + 1, num, word); + } + } + + protected String getHotSearchWord(JSONObject item) { + if (item == null) return "未知"; + String word = item.getString("word"); + if (word == null || word.isEmpty()) { + word = item.getString("topic_name"); + } + if (word == null || word.isEmpty()) { + word = item.getString("title"); + } + return word != null ? word : "未知"; + } + + protected long getHotSearchNum(JSONObject item) { + if (item == null) return 0; + return item.getLongValue("num", 0); + } + + protected int getHotSearchRank(JSONObject item) { + if (item == null) return 0; + return item.getIntValue("rank", 0); + } +} \ No newline at end of file diff --git a/w12/总代码/src/main/java/com/weibo/hotsearch/StarFilter.java b/w12/总代码/src/main/java/com/weibo/hotsearch/StarFilter.java new file mode 100644 index 0000000..ff1ddf4 --- /dev/null +++ b/w12/总代码/src/main/java/com/weibo/hotsearch/StarFilter.java @@ -0,0 +1,18 @@ +package com.weibo.hotsearch; + +public class StarFilter extends HotSearchFilter { + + private static final String[] STAR_KEYWORDS = { + "明星", "演员", "歌手", "爱豆", "艺人", "红毯", "综艺", "新剧", + "恋情", "官宣", "演唱会", "代言", "造型", "封面" + }; + + public StarFilter() { + super(STAR_KEYWORDS); + } + + @Override + public String getFilterName() { + return "明星相关热搜"; + } +} \ No newline at end of file diff --git a/w12/总代码/src/main/java/com/weibo/hotsearch/cli/CliHandler.java b/w12/总代码/src/main/java/com/weibo/hotsearch/cli/CliHandler.java new file mode 100644 index 0000000..ea52466 --- /dev/null +++ b/w12/总代码/src/main/java/com/weibo/hotsearch/cli/CliHandler.java @@ -0,0 +1,133 @@ +package com.weibo.hotsearch.cli; + +import com.weibo.hotsearch.controller.HotSearchController; +import com.weibo.hotsearch.exception.ErrorCode; +import com.weibo.hotsearch.exception.HotSearchException; + +public class CliHandler { + + private final CliParser parser; + private final HotSearchController controller; + + public CliHandler(String[] args) { + this.parser = new CliParser(); + this.controller = new HotSearchController(); + this.parser.parse(args); + } + + public void handle() { + String command = parser.getCommand(); + + if (command == null || command.isEmpty()) { + parser.printUsage(); + return; + } + + try { + switch (command.toLowerCase()) { + case "help": + parser.printUsage(); + break; + case "fetch": + handleFetch(); + break; + case "filter": + handleFilter(); + break; + case "output": + handleOutput(); + break; + case "save": + handleSave(); + break; + case "run": + handleRun(); + break; + default: + throw new HotSearchException(ErrorCode.CLI_COMMAND_NOT_FOUND, "未知命令: " + command); + } + } catch (HotSearchException e) { + System.err.println("\n错误 [" + e.getErrorCode().getCode() + "]: " + e.getMessage()); + if (e.getCause() != null) { + e.getCause().printStackTrace(); + } + } + } + + private void handleFetch() throws HotSearchException { + String source = parser.getOption("s"); + if (source == null) { + source = parser.getOption("source"); + } + if (source == null) { + throw new HotSearchException(ErrorCode.PARAMETER_ERROR, "请指定数据源 (-s 或 --source)"); + } + controller.executeFetch(source); + } + + private void handleFilter() throws HotSearchException { + String filter = parser.getOption("f"); + if (filter == null) { + filter = parser.getOption("filter"); + } + if (filter == null) { + throw new HotSearchException(ErrorCode.PARAMETER_ERROR, "请指定过滤器 (-f 或 --filter)"); + } + controller.executeFilter(filter); + } + + private void handleOutput() throws HotSearchException { + String output = parser.getOption("o"); + if (output == null) { + output = parser.getOption("output"); + } + if (output == null) { + output = "console"; + } + controller.executeOutput(output); + } + + private void handleSave() throws HotSearchException { + String path = parser.getOption("p"); + if (path == null) { + path = parser.getOption("path"); + } + controller.executeSave(path); + } + + private void handleRun() throws HotSearchException { + String source = parser.getOption("s"); + if (source == null) { + source = parser.getOption("source"); + } + if (source == null) { + source = "all"; + } + + String filter = parser.getOption("f"); + if (filter == null) { + filter = parser.getOption("filter"); + } + if (filter == null) { + filter = "star"; + } + + String output = parser.getOption("o"); + if (output == null) { + output = parser.getOption("output"); + } + if (output == null) { + output = "console"; + } + + String path = parser.getOption("p"); + if (path == null) { + path = parser.getOption("path"); + } + + controller.executeFetch(source); + controller.executeFilter(filter); + controller.executeOutput(output); + controller.executeSave(path); + } +} \ No newline at end of file diff --git a/w12/总代码/src/main/java/com/weibo/hotsearch/cli/CliParser.java b/w12/总代码/src/main/java/com/weibo/hotsearch/cli/CliParser.java new file mode 100644 index 0000000..33843cc --- /dev/null +++ b/w12/总代码/src/main/java/com/weibo/hotsearch/cli/CliParser.java @@ -0,0 +1,101 @@ +package com.weibo.hotsearch.cli; + +import com.weibo.hotsearch.exception.CliException; +import com.weibo.hotsearch.exception.ErrorCode; + +import java.util.*; + +public class CliParser { + + private final Map options = new HashMap<>(); + private final List arguments = new ArrayList<>(); + private String command; + + public void parse(String[] args) { + if (args == null || args.length == 0) { + printUsage(); + return; + } + + int i = 0; + while (i < args.length) { + String arg = args[i]; + + if (arg.startsWith("--")) { + String[] parts = arg.substring(2).split("=", 2); + String key = parts[0]; + String value = parts.length > 1 ? parts[1] : "true"; + options.put(key, value); + i++; + } else if (arg.startsWith("-")) { + String key = arg.substring(1); + if (key.length() == 1) { + String value = "true"; + if (i + 1 < args.length && !args[i + 1].startsWith("-")) { + value = args[i + 1]; + i++; + } + options.put(key, value); + } else { + for (char c : key.toCharArray()) { + options.put(String.valueOf(c), "true"); + } + } + i++; + } else { + if (command == null) { + command = arg; + } else { + arguments.add(arg); + } + i++; + } + } + } + + public String getCommand() { + return command; + } + + public String getOption(String key) { + return options.get(key); + } + + public boolean hasOption(String key) { + return options.containsKey(key); + } + + public List getArguments() { + return arguments; + } + + public Map getAllOptions() { + return new HashMap<>(options); + } + + public void printUsage() { + System.out.println("\n===== 热搜数据采集工具 ====="); + System.out.println("用法:"); + System.out.println(" java -jar weibo-hotsearch-1.0-SNAPSHOT-jar-with-dependencies.jar [命令] [选项]"); + System.out.println("\n命令:"); + System.out.println(" fetch 获取热搜数据"); + System.out.println(" filter 过滤热搜数据"); + System.out.println(" output 输出热搜数据"); + System.out.println(" save 保存数据到文件"); + System.out.println(" run 执行完整流程"); + System.out.println(" help 显示帮助信息"); + System.out.println("\n选项:"); + System.out.println(" -s, --source <数据源> 指定数据源: weibo/tieba/zhihu/all"); + System.out.println(" -f, --filter <过滤器> 指定过滤器: star/sports/policy"); + System.out.println(" -o, --output <类型> 指定输出类型: console/text"); + System.out.println(" -p, --path <路径> 指定保存路径"); + System.out.println(" -h, --help 显示帮助信息"); + System.out.println("\n示例:"); + System.out.println(" java -jar xxx.jar run -s weibo -f star -o console"); + System.out.println(" java -jar xxx.jar fetch -s all"); + System.out.println(" java -jar xxx.jar filter -f sports"); + System.out.println(" java -jar xxx.jar output -o console"); + System.out.println(" java -jar xxx.jar save -p ./result.txt"); + System.out.println(); + } +} \ No newline at end of file diff --git a/w12/总代码/src/main/java/com/weibo/hotsearch/command/Command.java b/w12/总代码/src/main/java/com/weibo/hotsearch/command/Command.java new file mode 100644 index 0000000..f84d238 --- /dev/null +++ b/w12/总代码/src/main/java/com/weibo/hotsearch/command/Command.java @@ -0,0 +1,12 @@ +package com.weibo.hotsearch.command; + +import com.weibo.hotsearch.exception.HotSearchException; + +public interface Command { + + void execute() throws HotSearchException; + + String getCommandName(); + + String getDescription(); +} \ No newline at end of file diff --git a/w12/总代码/src/main/java/com/weibo/hotsearch/command/CommandInvoker.java b/w12/总代码/src/main/java/com/weibo/hotsearch/command/CommandInvoker.java new file mode 100644 index 0000000..8e8aa27 --- /dev/null +++ b/w12/总代码/src/main/java/com/weibo/hotsearch/command/CommandInvoker.java @@ -0,0 +1,41 @@ +package com.weibo.hotsearch.command; + +import com.weibo.hotsearch.exception.ErrorCode; +import com.weibo.hotsearch.exception.HotSearchException; + +import java.util.HashMap; +import java.util.Map; + +public class CommandInvoker { + + private final Map commands = new HashMap<>(); + + public void registerCommand(String name, Command command) { + commands.put(name.toLowerCase(), command); + } + + public void executeCommand(String name) throws HotSearchException { + Command command = commands.get(name.toLowerCase()); + if (command == null) { + throw new HotSearchException(ErrorCode.CLI_COMMAND_NOT_FOUND, "命令未找到: " + name); + } + command.execute(); + } + + public boolean hasCommand(String name) { + return commands.containsKey(name.toLowerCase()); + } + + public void printHelp() { + System.out.println("\n===== 命令帮助 ====="); + System.out.println("可用命令:"); + for (Map.Entry entry : commands.entrySet()) { + System.out.printf(" %-10s - %s%n", entry.getKey(), entry.getValue().getDescription()); + } + System.out.println(); + } + + public Map getCommands() { + return new HashMap<>(commands); + } +} \ No newline at end of file diff --git a/w12/总代码/src/main/java/com/weibo/hotsearch/command/CommandResult.java b/w12/总代码/src/main/java/com/weibo/hotsearch/command/CommandResult.java new file mode 100644 index 0000000..0a75e06 --- /dev/null +++ b/w12/总代码/src/main/java/com/weibo/hotsearch/command/CommandResult.java @@ -0,0 +1,47 @@ +package com.weibo.hotsearch.command; + +public class CommandResult { + + private boolean success; + private String message; + private Object data; + + public CommandResult(boolean success, String message) { + this.success = success; + this.message = message; + } + + public CommandResult(boolean success, String message, Object data) { + this.success = success; + this.message = message; + this.data = data; + } + + public boolean isSuccess() { + return success; + } + + public String getMessage() { + return message; + } + + public Object getData() { + return data; + } + + public static CommandResult success(String message) { + return new CommandResult(true, message); + } + + public static CommandResult success(String message, Object data) { + return new CommandResult(true, message, data); + } + + public static CommandResult failure(String message) { + return new CommandResult(false, message); + } + + public static CommandResult failure(String message, Object data) { + return new CommandResult(false, message, data); + } +} \ No newline at end of file diff --git a/w12/总代码/src/main/java/com/weibo/hotsearch/command/FetchCommand.java b/w12/总代码/src/main/java/com/weibo/hotsearch/command/FetchCommand.java new file mode 100644 index 0000000..38c3d00 --- /dev/null +++ b/w12/总代码/src/main/java/com/weibo/hotsearch/command/FetchCommand.java @@ -0,0 +1,34 @@ +package com.weibo.hotsearch.command; + +import com.weibo.hotsearch.exception.ErrorCode; +import com.weibo.hotsearch.exception.HotSearchException; +import com.weibo.hotsearch.service.DataFetcher; + +public class FetchCommand implements Command { + + private final DataFetcher dataFetcher; + private final String source; + + public FetchCommand(DataFetcher dataFetcher, String source) { + this.dataFetcher = dataFetcher; + this.source = source; + } + + @Override + public void execute() throws HotSearchException { + if (source == null || source.isEmpty()) { + throw new HotSearchException(ErrorCode.PARAMETER_ERROR, "数据源不能为空"); + } + dataFetcher.fetch(source); + } + + @Override + public String getCommandName() { + return "fetch"; + } + + @Override + public String getDescription() { + return "从指定数据源获取热搜数据"; + } +} \ No newline at end of file diff --git a/w12/总代码/src/main/java/com/weibo/hotsearch/command/FilterCommand.java b/w12/总代码/src/main/java/com/weibo/hotsearch/command/FilterCommand.java new file mode 100644 index 0000000..95adfdb --- /dev/null +++ b/w12/总代码/src/main/java/com/weibo/hotsearch/command/FilterCommand.java @@ -0,0 +1,35 @@ +package com.weibo.hotsearch.command; + +import com.weibo.hotsearch.exception.ErrorCode; +import com.weibo.hotsearch.exception.HotSearchException; +import com.weibo.hotsearch.service.FilterService; +import com.weibo.hotsearch.strategy.FilterStrategy; + +public class FilterCommand implements Command { + + private final FilterService filterService; + private final FilterStrategy strategy; + + public FilterCommand(FilterService filterService, FilterStrategy strategy) { + this.filterService = filterService; + this.strategy = strategy; + } + + @Override + public void execute() throws HotSearchException { + if (strategy == null) { + throw new HotSearchException(ErrorCode.PARAMETER_ERROR, "过滤策略不能为空"); + } + filterService.filter(strategy); + } + + @Override + public String getCommandName() { + return "filter"; + } + + @Override + public String getDescription() { + return "使用指定策略过滤热搜数据"; + } +} \ No newline at end of file diff --git a/w12/总代码/src/main/java/com/weibo/hotsearch/command/HelpCommand.java b/w12/总代码/src/main/java/com/weibo/hotsearch/command/HelpCommand.java new file mode 100644 index 0000000..67e1c21 --- /dev/null +++ b/w12/总代码/src/main/java/com/weibo/hotsearch/command/HelpCommand.java @@ -0,0 +1,27 @@ +package com.weibo.hotsearch.command; + +import com.weibo.hotsearch.exception.HotSearchException; + +public class HelpCommand implements Command { + + private final CommandInvoker invoker; + + public HelpCommand(CommandInvoker invoker) { + this.invoker = invoker; + } + + @Override + public void execute() throws HotSearchException { + invoker.printHelp(); + } + + @Override + public String getCommandName() { + return "help"; + } + + @Override + public String getDescription() { + return "显示帮助信息"; + } +} \ No newline at end of file diff --git a/w12/总代码/src/main/java/com/weibo/hotsearch/command/OutputCommand.java b/w12/总代码/src/main/java/com/weibo/hotsearch/command/OutputCommand.java new file mode 100644 index 0000000..82aad69 --- /dev/null +++ b/w12/总代码/src/main/java/com/weibo/hotsearch/command/OutputCommand.java @@ -0,0 +1,34 @@ +package com.weibo.hotsearch.command; + +import com.weibo.hotsearch.exception.ErrorCode; +import com.weibo.hotsearch.exception.HotSearchException; +import com.weibo.hotsearch.service.OutputService; + +public class OutputCommand implements Command { + + private final OutputService outputService; + private final String outputType; + + public OutputCommand(OutputService outputService, String outputType) { + this.outputService = outputService; + this.outputType = outputType; + } + + @Override + public void execute() throws HotSearchException { + if (outputType == null || outputType.isEmpty()) { + throw new HotSearchException(ErrorCode.PARAMETER_ERROR, "输出类型不能为空"); + } + outputService.output(outputType); + } + + @Override + public String getCommandName() { + return "output"; + } + + @Override + public String getDescription() { + return "输出过滤后的热搜数据"; + } +} \ No newline at end of file diff --git a/w12/总代码/src/main/java/com/weibo/hotsearch/command/SaveCommand.java b/w12/总代码/src/main/java/com/weibo/hotsearch/command/SaveCommand.java new file mode 100644 index 0000000..81b4fa9 --- /dev/null +++ b/w12/总代码/src/main/java/com/weibo/hotsearch/command/SaveCommand.java @@ -0,0 +1,30 @@ +package com.weibo.hotsearch.command; + +import com.weibo.hotsearch.exception.HotSearchException; +import com.weibo.hotsearch.service.OutputService; + +public class SaveCommand implements Command { + + private final OutputService outputService; + private final String filePath; + + public SaveCommand(OutputService outputService, String filePath) { + this.outputService = outputService; + this.filePath = filePath; + } + + @Override + public void execute() throws HotSearchException { + outputService.saveToFile(filePath); + } + + @Override + public String getCommandName() { + return "save"; + } + + @Override + public String getDescription() { + return "保存数据到文件"; + } +} \ No newline at end of file diff --git a/w12/总代码/src/main/java/com/weibo/hotsearch/controller/HotSearchController.java b/w12/总代码/src/main/java/com/weibo/hotsearch/controller/HotSearchController.java new file mode 100644 index 0000000..70d5428 --- /dev/null +++ b/w12/总代码/src/main/java/com/weibo/hotsearch/controller/HotSearchController.java @@ -0,0 +1,69 @@ +package com.weibo.hotsearch.controller; + +import com.weibo.hotsearch.command.*; +import com.weibo.hotsearch.exception.HotSearchException; +import com.weibo.hotsearch.service.DataFetcher; +import com.weibo.hotsearch.service.FilterService; +import com.weibo.hotsearch.service.OutputService; +import com.weibo.hotsearch.strategy.FilterStrategy; +import com.weibo.hotsearch.strategy.FilterStrategyFactory; + +public class HotSearchController { + + private final DataFetcher dataFetcher; + private final FilterService filterService; + private final OutputService outputService; + private final CommandInvoker commandInvoker; + + public HotSearchController() { + this.dataFetcher = new DataFetcher(); + this.filterService = new FilterService(); + this.outputService = new OutputService(); + this.commandInvoker = new CommandInvoker(); + registerCommands(); + } + + private void registerCommands() { + commandInvoker.registerCommand("help", new HelpCommand(commandInvoker)); + } + + public void executeFetch(String source) throws HotSearchException { + FetchCommand command = new FetchCommand(dataFetcher, source); + command.execute(); + System.out.println("已从 " + source + " 获取数据"); + } + + public void executeFilter(String filterCode) throws HotSearchException { + FilterStrategy strategy = FilterStrategyFactory.getStrategy(filterCode); + if (strategy == null) { + throw new HotSearchException(com.weibo.hotsearch.exception.ErrorCode.PARAMETER_ERROR, + "未知的过滤策略: " + filterCode); + } + FilterCommand command = new FilterCommand(filterService, strategy); + command.execute(); + System.out.println("已应用过滤策略: " + strategy.getFilterName()); + } + + public void executeOutput(String outputType) throws HotSearchException { + OutputCommand command = new OutputCommand(outputService, outputType); + command.execute(); + } + + public void executeSave(String filePath) throws HotSearchException { + SaveCommand command = new SaveCommand(outputService, filePath); + command.execute(); + } + + public void showHelp() throws HotSearchException { + commandInvoker.executeCommand("help"); + } + + public void processFullPipeline(String source, String filterCode, String outputType, String savePath) throws HotSearchException { + executeFetch(source); + executeFilter(filterCode); + executeOutput(outputType); + if (savePath != null || !savePath.isEmpty()) { + executeSave(savePath); + } + } +} \ No newline at end of file diff --git a/w12/总代码/src/main/java/com/weibo/hotsearch/exception/CliException.java b/w12/总代码/src/main/java/com/weibo/hotsearch/exception/CliException.java new file mode 100644 index 0000000..6fad0f6 --- /dev/null +++ b/w12/总代码/src/main/java/com/weibo/hotsearch/exception/CliException.java @@ -0,0 +1,20 @@ +package com.weibo.hotsearch.exception; + +public class CliException extends HotSearchException { + + public CliException(ErrorCode errorCode) { + super(errorCode); + } + + public CliException(ErrorCode errorCode, Throwable cause) { + super(errorCode, cause); + } + + public CliException(ErrorCode errorCode, String detail) { + super(errorCode, detail); + } + + public CliException(ErrorCode errorCode, String detail, Throwable cause) { + super(errorCode, detail, cause); + } +} \ No newline at end of file diff --git a/w12/总代码/src/main/java/com/weibo/hotsearch/exception/DataParseException.java b/w12/总代码/src/main/java/com/weibo/hotsearch/exception/DataParseException.java new file mode 100644 index 0000000..ea9304c --- /dev/null +++ b/w12/总代码/src/main/java/com/weibo/hotsearch/exception/DataParseException.java @@ -0,0 +1,20 @@ +package com.weibo.hotsearch.exception; + +public class DataParseException extends HotSearchException { + + public DataParseException(ErrorCode errorCode) { + super(errorCode); + } + + public DataParseException(ErrorCode errorCode, Throwable cause) { + super(errorCode, cause); + } + + public DataParseException(ErrorCode errorCode, String detail) { + super(errorCode, detail); + } + + public DataParseException(ErrorCode errorCode, String detail, Throwable cause) { + super(errorCode, detail, cause); + } +} \ No newline at end of file diff --git a/w12/总代码/src/main/java/com/weibo/hotsearch/exception/ErrorCode.java b/w12/总代码/src/main/java/com/weibo/hotsearch/exception/ErrorCode.java new file mode 100644 index 0000000..ef8cab8 --- /dev/null +++ b/w12/总代码/src/main/java/com/weibo/hotsearch/exception/ErrorCode.java @@ -0,0 +1,47 @@ +package com.weibo.hotsearch.exception; + +public enum ErrorCode { + + // 通用错误 + SUCCESS(0, "操作成功"), + UNKNOWN_ERROR(1000, "未知错误"), + PARAMETER_ERROR(1001, "参数错误"), + NOT_FOUND(1002, "资源未找到"), + DUPLICATE_ERROR(1003, "重复操作"), + + // CLI错误 + CLI_PARSE_ERROR(2001, "命令行参数解析错误"), + CLI_COMMAND_NOT_FOUND(2002, "命令未找到"), + CLI_INVALID_OPTION(2003, "无效的选项"), + + // 网络错误 + NETWORK_ERROR(3001, "网络请求失败"), + CONNECTION_TIMEOUT(3002, "连接超时"), + HTTP_ERROR(3003, "HTTP请求错误"), + + // 数据错误 + DATA_PARSE_ERROR(4001, "数据解析失败"), + DATA_FORMAT_ERROR(4002, "数据格式错误"), + DATA_EMPTY(4003, "数据为空"), + + // 服务错误 + SERVICE_UNAVAILABLE(5001, "服务不可用"), + SERVICE_RATE_LIMITED(5002, "请求被限流"), + AUTHENTICATION_FAILED(5003, "认证失败"); + + private final int code; + private final String message; + + ErrorCode(int code, String message) { + this.code = code; + this.message = message; + } + + public int getCode() { + return code; + } + + public String getMessage() { + return message; + } +} \ No newline at end of file diff --git a/w12/总代码/src/main/java/com/weibo/hotsearch/exception/HotSearchException.java b/w12/总代码/src/main/java/com/weibo/hotsearch/exception/HotSearchException.java new file mode 100644 index 0000000..4869c81 --- /dev/null +++ b/w12/总代码/src/main/java/com/weibo/hotsearch/exception/HotSearchException.java @@ -0,0 +1,30 @@ +package com.weibo.hotsearch.exception; + +public class HotSearchException extends Exception { + + private final ErrorCode errorCode; + + public HotSearchException(ErrorCode errorCode) { + super(errorCode.getMessage()); + this.errorCode = errorCode; + } + + public HotSearchException(ErrorCode errorCode, Throwable cause) { + super(errorCode.getMessage(), cause); + this.errorCode = errorCode; + } + + public HotSearchException(ErrorCode errorCode, String detail) { + super(errorCode.getMessage() + ": " + detail); + this.errorCode = errorCode; + } + + public HotSearchException(ErrorCode errorCode, String detail, Throwable cause) { + super(errorCode.getMessage() + ": " + detail, cause); + this.errorCode = errorCode; + } + + public ErrorCode getErrorCode() { + return errorCode; + } +} \ No newline at end of file diff --git a/w12/总代码/src/main/java/com/weibo/hotsearch/exception/NetworkException.java b/w12/总代码/src/main/java/com/weibo/hotsearch/exception/NetworkException.java new file mode 100644 index 0000000..d1db694 --- /dev/null +++ b/w12/总代码/src/main/java/com/weibo/hotsearch/exception/NetworkException.java @@ -0,0 +1,20 @@ +package com.weibo.hotsearch.exception; + +public class NetworkException extends HotSearchException { + + public NetworkException(ErrorCode errorCode) { + super(errorCode); + } + + public NetworkException(ErrorCode errorCode, Throwable cause) { + super(errorCode, cause); + } + + public NetworkException(ErrorCode errorCode, String detail) { + super(errorCode, detail); + } + + public NetworkException(ErrorCode errorCode, String detail, Throwable cause) { + super(errorCode, detail, cause); + } +} \ No newline at end of file diff --git a/w12/总代码/src/main/java/com/weibo/hotsearch/model/AppContext.java b/w12/总代码/src/main/java/com/weibo/hotsearch/model/AppContext.java new file mode 100644 index 0000000..2c95b9e --- /dev/null +++ b/w12/总代码/src/main/java/com/weibo/hotsearch/model/AppContext.java @@ -0,0 +1,49 @@ +package com.weibo.hotsearch.model; + +import java.util.HashMap; +import java.util.Map; + +public class AppContext { + + private static final AppContext instance = new AppContext(); + + private final Map attributes = new HashMap<>(); + + private HotSearchResult currentResult; + + private AppContext() { + } + + public static AppContext getInstance() { + return instance; + } + + public void setAttribute(String key, Object value) { + attributes.put(key, value); + } + + public Object getAttribute(String key) { + return attributes.get(key); + } + + public void removeAttribute(String key) { + attributes.remove(key); + } + + public boolean hasAttribute(String key) { + return attributes.containsKey(key); + } + + public HotSearchResult getCurrentResult() { + return currentResult; + } + + public void setCurrentResult(HotSearchResult currentResult) { + this.currentResult = currentResult; + } + + public void clear() { + attributes.clear(); + currentResult = null; + } +} \ No newline at end of file diff --git a/w12/总代码/src/main/java/com/weibo/hotsearch/model/HotSearchItem.java b/w12/总代码/src/main/java/com/weibo/hotsearch/model/HotSearchItem.java new file mode 100644 index 0000000..b97131c --- /dev/null +++ b/w12/总代码/src/main/java/com/weibo/hotsearch/model/HotSearchItem.java @@ -0,0 +1,69 @@ +package com.weibo.hotsearch.model; + +import java.time.LocalDateTime; + +public class HotSearchItem { + + private String title; + private long hotValue; + private int rank; + private String source; + private LocalDateTime fetchTime; + + public HotSearchItem() { + } + + public HotSearchItem(String title, long hotValue, int rank, String source) { + this.title = title; + this.hotValue = hotValue; + this.rank = rank; + this.source = source; + this.fetchTime = LocalDateTime.now(); + } + + public String getTitle() { + return title; + } + + public void setTitle(String title) { + this.title = title; + } + + public long getHotValue() { + return hotValue; + } + + public void setHotValue(long hotValue) { + this.hotValue = hotValue; + } + + public int getRank() { + return rank; + } + + public void setRank(int rank) { + this.rank = rank; + } + + public String getSource() { + return source; + } + + public void setSource(String source) { + this.source = source; + } + + public LocalDateTime getFetchTime() { + return fetchTime; + } + + public void setFetchTime(LocalDateTime fetchTime) { + this.fetchTime = fetchTime; + } + + @Override + public String toString() { + return String.format("HotSearchItem{title='%s', hotValue=%d, rank=%d, source='%s'}", + title, hotValue, rank, source); + } +} \ No newline at end of file diff --git a/w12/总代码/src/main/java/com/weibo/hotsearch/model/HotSearchResult.java b/w12/总代码/src/main/java/com/weibo/hotsearch/model/HotSearchResult.java new file mode 100644 index 0000000..a8cdcd4 --- /dev/null +++ b/w12/总代码/src/main/java/com/weibo/hotsearch/model/HotSearchResult.java @@ -0,0 +1,75 @@ +package com.weibo.hotsearch.model; + +import java.time.LocalDateTime; +import java.util.ArrayList; +import java.util.List; + +public class HotSearchResult { + + private List items; + private String filterName; + private String dataSource; + private LocalDateTime fetchTime; + private int totalCount; + + public HotSearchResult() { + this.items = new ArrayList<>(); + this.fetchTime = LocalDateTime.now(); + } + + public HotSearchResult(List items, String filterName, String dataSource) { + this.items = items != null ? items : new ArrayList<>(); + this.filterName = filterName; + this.dataSource = dataSource; + this.fetchTime = LocalDateTime.now(); + this.totalCount = this.items.size(); + } + + public List getItems() { + return items; + } + + public void setItems(List items) { + this.items = items != null ? items : new ArrayList<>(); + this.totalCount = this.items.size(); + } + + public String getFilterName() { + return filterName; + } + + public void setFilterName(String filterName) { + this.filterName = filterName; + } + + public String getDataSource() { + return dataSource; + } + + public void setDataSource(String dataSource) { + this.dataSource = dataSource; + } + + public LocalDateTime getFetchTime() { + return fetchTime; + } + + public void setFetchTime(LocalDateTime fetchTime) { + this.fetchTime = fetchTime; + } + + public int getTotalCount() { + return totalCount; + } + + public void addItem(HotSearchItem item) { + if (item != null) { + this.items.add(item); + this.totalCount++; + } + } + + public boolean isEmpty() { + return items == null || items.isEmpty(); + } +} \ No newline at end of file diff --git a/w12/总代码/src/main/java/com/weibo/hotsearch/service/DataFetcher.java b/w12/总代码/src/main/java/com/weibo/hotsearch/service/DataFetcher.java new file mode 100644 index 0000000..07d2055 --- /dev/null +++ b/w12/总代码/src/main/java/com/weibo/hotsearch/service/DataFetcher.java @@ -0,0 +1,307 @@ +package com.weibo.hotsearch.service; + +import com.alibaba.fastjson2.JSONArray; +import com.alibaba.fastjson2.JSONObject; +import com.weibo.hotsearch.exception.DataParseException; +import com.weibo.hotsearch.exception.ErrorCode; +import com.weibo.hotsearch.exception.HotSearchException; +import com.weibo.hotsearch.exception.NetworkException; +import com.weibo.hotsearch.model.AppContext; +import com.weibo.hotsearch.model.HotSearchItem; +import com.weibo.hotsearch.model.HotSearchResult; +import org.apache.hc.client5.http.fluent.Request; +import org.apache.hc.core5.util.Timeout; + +import java.nio.charset.StandardCharsets; +import java.time.LocalDateTime; +import java.util.ArrayList; +import java.util.List; + +public class DataFetcher { + + private static final String WEIBO_URL = "https://weibo.com/ajax/side/hotSearch"; + private static final String TIEBA_URL = "https://tieba.baidu.com/hottopic/browse/topicList"; + private static final String ZHIHU_URL = "https://www.zhihu.com/api/v4/search/top_search"; + + private static final int CONNECT_TIMEOUT = 10000; + private static final int RESPONSE_TIMEOUT = 10000; + private static final int MAX_RETRIES = 3; + + public void fetch(String source) throws HotSearchException { + List items = new ArrayList<>(); + + switch (source.toLowerCase()) { + case "weibo": + items = fetchWeibo(); + break; + case "tieba": + items = fetchTieba(); + break; + case "zhihu": + items = fetchZhihu(); + break; + case "all": + items.addAll(fetchWeibo()); + items.addAll(fetchTieba()); + items.addAll(fetchZhihu()); + break; + default: + throw new HotSearchException(ErrorCode.PARAMETER_ERROR, "未知数据源: " + source); + } + + HotSearchResult result = new HotSearchResult(items, null, source); + AppContext.getInstance().setCurrentResult(result); + } + + private List fetchWeibo() throws HotSearchException { + List items = new ArrayList<>(); + try { + String json = fetchUrlWithRetry(WEIBO_URL, "https://weibo.com/"); + JSONObject root = JSONObject.parseObject(json); + + if (!root.containsKey("data")) { + throw new DataParseException(ErrorCode.DATA_FORMAT_ERROR, "微博数据格式错误"); + } + + JSONObject data = root.getJSONObject("data"); + if (!data.containsKey("realtime")) { + throw new DataParseException(ErrorCode.DATA_EMPTY, "微博数据为空"); + } + + JSONArray realtime = data.getJSONArray("realtime"); + for (int i = 0; i < realtime.size(); i++) { + JSONObject item = realtime.getJSONObject(i); + if (item != null) { + HotSearchItem hotItem = parseWeiboItem(item); + if (hotItem != null) { + items.add(hotItem); + } + } + } + } catch (NetworkException e) { + throw e; + } catch (Exception e) { + throw new DataParseException(ErrorCode.DATA_PARSE_ERROR, "微博数据解析失败", e); + } + return items; + } + + private HotSearchItem parseWeiboItem(JSONObject item) { + String word = item.getString("word"); + if (word == null || word.isEmpty()) { + return null; + } + long num = item.getLongValue("num", 0); + int rank = item.getIntValue("rank", 0); + return new HotSearchItem(word, num, rank, "微博"); + } + + private List fetchTieba() throws HotSearchException { + List items = new ArrayList<>(); + try { + String json = fetchUrlWithRetry(TIEBA_URL, "https://tieba.baidu.com/"); + JSONObject root = JSONObject.parseObject(json); + + if (!root.containsKey("data")) { + throw new DataParseException(ErrorCode.DATA_FORMAT_ERROR, "贴吧数据格式错误"); + } + + JSONObject data = root.getJSONObject("data"); + if (!data.containsKey("bang_topic")) { + throw new DataParseException(ErrorCode.DATA_EMPTY, "贴吧数据为空"); + } + + JSONArray topics = data.getJSONArray("bang_topic"); + for (int i = 0; i < topics.size(); i++) { + JSONObject item = topics.getJSONObject(i); + if (item != null) { + HotSearchItem hotItem = parseTiebaItem(item, i + 1); + if (hotItem != null) { + items.add(hotItem); + } + } + } + } catch (NetworkException e) { + throw e; + } catch (Exception e) { + throw new DataParseException(ErrorCode.DATA_PARSE_ERROR, "贴吧数据解析失败", e); + } + return items; + } + + private HotSearchItem parseTiebaItem(JSONObject item, int index) { + String topicName = item.getString("topic_name"); + if (topicName == null || topicName.isEmpty()) { + return null; + } + int readNum = item.getIntValue("read_num", 0); + int discussNum = item.getIntValue("discuss_num", 0); + return new HotSearchItem(topicName, (long) readNum + discussNum, 0, "百度贴吧"); + } + + private List fetchZhihu() throws HotSearchException { + List items = new ArrayList<>(); + try { + String json = fetchUrlWithRetry(ZHIHU_URL, "https://zhuanlan.zhihu.com/"); + JSONObject root = JSONObject.parseObject(json); + + JSONArray data = findZhihuData(root, json); + if (data == null || data.isEmpty()) { + throw new DataParseException(ErrorCode.DATA_EMPTY, "知乎数据为空"); + } + + for (int i = 0; i < data.size(); i++) { + JSONObject item = data.getJSONObject(i); + if (item != null) { + HotSearchItem hotItem = parseZhihuItem(item, i + 1); + if (hotItem != null) { + items.add(hotItem); + } + } + } + } catch (NetworkException e) { + throw e; + } catch (Exception e) { + throw new DataParseException(ErrorCode.DATA_PARSE_ERROR, "知乎数据解析失败", e); + } + return items; + } + + private JSONArray findZhihuData(JSONObject root, String json) { + if (root.containsKey("data") && root.get("data") instanceof JSONArray) { + return root.getJSONArray("data"); + } else if (root.containsKey("data")) { + JSONObject dataObj = root.getJSONObject("data"); + if (dataObj != null && dataObj.containsKey("topics")) { + return dataObj.getJSONArray("topics"); + } + } else if (root.containsKey("top_search")) { + JSONObject topSearch = root.getJSONObject("top_search"); + if (topSearch != null && topSearch.containsKey("words")) { + return topSearch.getJSONArray("words"); + } + } else if (json.startsWith("[")) { + return JSONArray.parseArray(json); + } + return null; + } + + private HotSearchItem parseZhihuItem(JSONObject item, int index) { + String title = getItemTitle(item); + if (title == null || title.isEmpty()) { + return null; + } + + long hotValue = 0; + if (item.containsKey("hot_score")) { + hotValue = item.getLongValue("hot_score", 0); + } else if (item.containsKey("score")) { + hotValue = item.getLongValue("score", 0); + } else if (item.containsKey("detail_text")) { + String detailText = item.getString("detail_text"); + if (detailText != null) { + try { + String numStr = detailText.replaceAll("[^0-9]", ""); + if (!numStr.isEmpty()) { + hotValue = Long.parseLong(numStr); + } + } catch (Exception e) { + hotValue = 0; + } + } + } + + return new HotSearchItem(title, hotValue, index, "知乎"); + } + + private String getItemTitle(JSONObject item) { + if (item == null) { + return null; + } + + String title = item.getString("title"); + if (title != null && !title.isEmpty()) { + return title; + } + + title = item.getString("topic_title"); + if (title != null && !title.isEmpty()) { + return title; + } + + title = item.getString("name"); + if (title != null && !title.isEmpty()) { + return title; + } + + if (item.containsKey("target")) { + JSONObject target = item.getJSONObject("target"); + if (target != null) { + title = target.getString("title"); + if (title != null && !title.isEmpty()) { + return title; + } + } + } + + title = item.getString("display_query"); + if (title != null && !title.isEmpty()) { + return title; + } + + return item.getString("query"); + } + + private String fetchUrlWithRetry(String url, String referer) throws NetworkException { + int retryCount = 0; + Exception lastException = null; + + while (retryCount < MAX_RETRIES) { + try { + return Request.get(url) + .addHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36") + .addHeader("Referer", referer) + .addHeader("Accept", "application/json, text/plain, */*;charset=UTF-8") + .addHeader("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8") + .addHeader("Accept-Encoding", "identity") + .addHeader("Connection", "keep-alive") + .addHeader("Content-Type", "application/json;charset=UTF-8") + .connectTimeout(Timeout.ofMilliseconds(CONNECT_TIMEOUT)) + .responseTimeout(Timeout.ofMilliseconds(RESPONSE_TIMEOUT)) + .execute() + .returnContent() + .asString(StandardCharsets.UTF_8); + } catch (java.net.SocketTimeoutException e) { + lastException = e; + retryCount++; + if (retryCount >= MAX_RETRIES) { + throw new NetworkException(ErrorCode.CONNECTION_TIMEOUT, "连接超时", e); + } + sleep(2000); + } catch (java.net.ConnectException e) { + lastException = e; + retryCount++; + if (retryCount >= MAX_RETRIES) { + throw new NetworkException(ErrorCode.NETWORK_ERROR, "连接失败", e); + } + sleep(2000); + } catch (Exception e) { + lastException = e; + retryCount++; + if (retryCount >= MAX_RETRIES) { + throw new NetworkException(ErrorCode.NETWORK_ERROR, "网络请求失败", e); + } + sleep(2000); + } + } + throw new NetworkException(ErrorCode.NETWORK_ERROR, "网络请求失败", lastException); + } + + private void sleep(long millis) { + try { + Thread.sleep(millis); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + } +} \ No newline at end of file diff --git a/w12/总代码/src/main/java/com/weibo/hotsearch/service/FilterService.java b/w12/总代码/src/main/java/com/weibo/hotsearch/service/FilterService.java new file mode 100644 index 0000000..0778cb1 --- /dev/null +++ b/w12/总代码/src/main/java/com/weibo/hotsearch/service/FilterService.java @@ -0,0 +1,44 @@ +package com.weibo.hotsearch.service; + +import com.alibaba.fastjson2.JSONObject; +import com.weibo.hotsearch.exception.ErrorCode; +import com.weibo.hotsearch.exception.HotSearchException; +import com.weibo.hotsearch.model.AppContext; +import com.weibo.hotsearch.model.HotSearchItem; +import com.weibo.hotsearch.model.HotSearchResult; +import com.weibo.hotsearch.strategy.FilterStrategy; + +import java.util.ArrayList; +import java.util.List; + +public class FilterService { + + public void filter(FilterStrategy strategy) throws HotSearchException { + HotSearchResult currentResult = AppContext.getInstance().getCurrentResult(); + + if (currentResult == null || currentResult.isEmpty()) { + throw new HotSearchException(ErrorCode.DATA_EMPTY, "没有可过滤的数据,请先获取数据"); + } + + List filteredItems = new ArrayList<>(); + + for (HotSearchItem item : currentResult.getItems()) { + JSONObject jsonItem = convertToJson(item); + if (strategy.match(jsonItem)) { + filteredItems.add(item); + } + } + + HotSearchResult filteredResult = new HotSearchResult(filteredItems, strategy.getFilterName(), currentResult.getDataSource()); + AppContext.getInstance().setCurrentResult(filteredResult); + } + + private JSONObject convertToJson(HotSearchItem item) { + JSONObject json = new JSONObject(); + json.put("word", item.getTitle()); + json.put("title", item.getTitle()); + json.put("num", item.getHotValue()); + json.put("rank", item.getRank()); + return json; + } +} \ No newline at end of file diff --git a/w12/总代码/src/main/java/com/weibo/hotsearch/service/OutputService.java b/w12/总代码/src/main/java/com/weibo/hotsearch/service/OutputService.java new file mode 100644 index 0000000..68fc6c4 --- /dev/null +++ b/w12/总代码/src/main/java/com/weibo/hotsearch/service/OutputService.java @@ -0,0 +1,66 @@ +package com.weibo.hotsearch.service; + +import com.weibo.hotsearch.exception.ErrorCode; +import com.weibo.hotsearch.exception.HotSearchException; +import com.weibo.hotsearch.model.AppContext; +import com.weibo.hotsearch.model.HotSearchResult; +import com.weibo.hotsearch.view.TextView; +import com.weibo.hotsearch.view.View; +import com.weibo.hotsearch.view.ViewFactory; + +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.time.LocalDateTime; +import java.time.format.DateTimeFormatter; + +public class OutputService { + + private static final DateTimeFormatter FILE_FORMATTER = DateTimeFormatter.ofPattern("yyyyMMdd_HHmmss"); + private static final String OUTPUT_DIR = "hotsearch_results"; + + public void output(String outputType) throws HotSearchException { + HotSearchResult result = AppContext.getInstance().getCurrentResult(); + + if (result == null) { + throw new HotSearchException(ErrorCode.DATA_EMPTY, "没有可输出的数据"); + } + + View view = ViewFactory.getView(outputType); + view.render(result); + } + + public void saveToFile(String filePath) throws HotSearchException { + HotSearchResult result = AppContext.getInstance().getCurrentResult(); + + if (result == null) { + throw new HotSearchException(ErrorCode.DATA_EMPTY, "没有可保存的数据"); + } + + try { + File dir = new File(OUTPUT_DIR); + if (!dir.exists()) { + dir.mkdirs(); + } + + String actualPath = filePath; + if (filePath == null || filePath.isEmpty()) { + String timestamp = LocalDateTime.now().format(FILE_FORMATTER); + actualPath = OUTPUT_DIR + File.separator + "hotsearch_" + timestamp + ".txt"; + } + + TextView textView = (TextView) ViewFactory.getView("text"); + String content = textView.renderToString(result); + + try (BufferedWriter writer = new BufferedWriter(new FileWriter(actualPath))) { + writer.write(content); + } + + System.out.println("\n结果已保存到文件: " + actualPath); + + } catch (IOException e) { + throw new HotSearchException(ErrorCode.UNKNOWN_ERROR, "保存文件失败", e); + } + } +} \ No newline at end of file diff --git a/w12/总代码/src/main/java/com/weibo/hotsearch/strategy/FilterStrategy.java b/w12/总代码/src/main/java/com/weibo/hotsearch/strategy/FilterStrategy.java new file mode 100644 index 0000000..c5924bd --- /dev/null +++ b/w12/总代码/src/main/java/com/weibo/hotsearch/strategy/FilterStrategy.java @@ -0,0 +1,12 @@ +package com.weibo.hotsearch.strategy; + +import com.alibaba.fastjson2.JSONObject; + +public interface FilterStrategy { + + boolean match(JSONObject item); + + String getFilterName(); + + String getFilterCode(); +} \ No newline at end of file diff --git a/w12/总代码/src/main/java/com/weibo/hotsearch/strategy/FilterStrategyFactory.java b/w12/总代码/src/main/java/com/weibo/hotsearch/strategy/FilterStrategyFactory.java new file mode 100644 index 0000000..779067f --- /dev/null +++ b/w12/总代码/src/main/java/com/weibo/hotsearch/strategy/FilterStrategyFactory.java @@ -0,0 +1,30 @@ +package com.weibo.hotsearch.strategy; + +import java.util.HashMap; +import java.util.Map; + +public class FilterStrategyFactory { + + private static final Map strategies = new HashMap<>(); + + static { + strategies.put("star", new StarFilterStrategy()); + strategies.put("sports", new SportsFilterStrategy()); + strategies.put("policy", new PolicyFilterStrategy()); + } + + public static FilterStrategy getStrategy(String code) { + if (code == null || code.isEmpty()) { + return null; + } + return strategies.get(code.toLowerCase()); + } + + public static Map getAllStrategies() { + return new HashMap<>(strategies); + } + + public static boolean hasStrategy(String code) { + return code != null && strategies.containsKey(code.toLowerCase()); + } +} \ No newline at end of file diff --git a/w12/总代码/src/main/java/com/weibo/hotsearch/strategy/PolicyFilterStrategy.java b/w12/总代码/src/main/java/com/weibo/hotsearch/strategy/PolicyFilterStrategy.java new file mode 100644 index 0000000..11ccdaf --- /dev/null +++ b/w12/总代码/src/main/java/com/weibo/hotsearch/strategy/PolicyFilterStrategy.java @@ -0,0 +1,55 @@ +package com.weibo.hotsearch.strategy; + +import com.alibaba.fastjson2.JSONObject; + +public class PolicyFilterStrategy implements FilterStrategy { + + private static final String[] KEYWORDS = { + "政策", "新规", "条例", "法规", "通知", "公告", "发布", + "国务院", "发改委", "财政部", "教育部", "工信部", "科技部", + "税收", "补贴", "优惠", "扶持", "改革", "开放", "创新", + "十四五", "计划", "规划", "方案", "意见", "办法", "细则", + "经济", "金融", "市场", "监管", "安全", "环保", "绿色" + }; + + @Override + public boolean match(JSONObject item) { + if (item == null) { + return false; + } + String word = getItemTitle(item); + if (word == null || word.isEmpty()) { + return false; + } + for (String keyword : KEYWORDS) { + if (word.contains(keyword)) { + return true; + } + } + return false; + } + + @Override + public String getFilterName() { + return "国家政策相关热搜"; + } + + @Override + public String getFilterCode() { + return "policy"; + } + + private String getItemTitle(JSONObject item) { + String title = item.getString("word"); + if (title == null || title.isEmpty()) { + title = item.getString("topic_name"); + } + if (title == null || title.isEmpty()) { + title = item.getString("title"); + } + if (title == null || title.isEmpty()) { + title = item.getString("name"); + } + return title; + } +} \ No newline at end of file diff --git a/w12/总代码/src/main/java/com/weibo/hotsearch/strategy/SportsFilterStrategy.java b/w12/总代码/src/main/java/com/weibo/hotsearch/strategy/SportsFilterStrategy.java new file mode 100644 index 0000000..2b26650 --- /dev/null +++ b/w12/总代码/src/main/java/com/weibo/hotsearch/strategy/SportsFilterStrategy.java @@ -0,0 +1,54 @@ +package com.weibo.hotsearch.strategy; + +import com.alibaba.fastjson2.JSONObject; + +public class SportsFilterStrategy implements FilterStrategy { + + private static final String[] KEYWORDS = { + "足球", "篮球", "世界杯", "NBA", "CBA", "奥运会", "世锦赛", + "冠军", "比赛", "夺冠", "进球", "比分", "运动员", "国足", + "乒乓", "排球", "羽毛球", "游泳", "田径", "体操", "跳水", + "MVP", "转会", "联赛", "中超", "英超", "西甲", "欧冠" + }; + + @Override + public boolean match(JSONObject item) { + if (item == null) { + return false; + } + String word = getItemTitle(item); + if (word == null || word.isEmpty()) { + return false; + } + for (String keyword : KEYWORDS) { + if (word.contains(keyword)) { + return true; + } + } + return false; + } + + @Override + public String getFilterName() { + return "体育相关热搜"; + } + + @Override + public String getFilterCode() { + return "sports"; + } + + private String getItemTitle(JSONObject item) { + String title = item.getString("word"); + if (title == null || title.isEmpty()) { + title = item.getString("topic_name"); + } + if (title == null || title.isEmpty()) { + title = item.getString("title"); + } + if (title == null || title.isEmpty()) { + title = item.getString("name"); + } + return title; + } +} \ No newline at end of file diff --git a/w12/总代码/src/main/java/com/weibo/hotsearch/strategy/StarFilterStrategy.java b/w12/总代码/src/main/java/com/weibo/hotsearch/strategy/StarFilterStrategy.java new file mode 100644 index 0000000..6c93768 --- /dev/null +++ b/w12/总代码/src/main/java/com/weibo/hotsearch/strategy/StarFilterStrategy.java @@ -0,0 +1,76 @@ +package com.weibo.hotsearch.strategy; + +import com.alibaba.fastjson2.JSONObject; + +public class StarFilterStrategy implements FilterStrategy { + + private static final String[] KEYWORDS = { + "明星", "演员", "歌手", "爱豆", "艺人", "红毯", "综艺", "新剧", + "恋情", "官宣", "演唱会", "代言", "造型", "封面" + }; + + @Override + public boolean match(JSONObject item) { + if (item == null) { + return false; + } + String word = getItemTitle(item); + if (word == null || word.isEmpty()) { + return false; + } + for (String keyword : KEYWORDS) { + if (word.contains(keyword)) { + return true; + } + } + return false; + } + + @Override + public String getFilterName() { + return "明星相关热搜"; + } + + @Override + public String getFilterCode() { + return "star"; + } + + private String getItemTitle(JSONObject item) { + if (item == null) { + return null; + } + + String title = item.getString("word"); + if (title != null && !title.isEmpty()) { + return title; + } + + title = item.getString("topic_name"); + if (title != null && !title.isEmpty()) { + return title; + } + + title = item.getString("title"); + if (title != null && !title.isEmpty()) { + return title; + } + + title = item.getString("name"); + if (title != null && !title.isEmpty()) { + return title; + } + + if (item.containsKey("target")) { + JSONObject target = item.getJSONObject("target"); + if (target != null) { + title = target.getString("title"); + if (title != null && !title.isEmpty()) { + return title; + } + } + } + + return title; + } +} \ No newline at end of file diff --git a/w12/总代码/src/main/java/com/weibo/hotsearch/view/ConsoleView.java b/w12/总代码/src/main/java/com/weibo/hotsearch/view/ConsoleView.java new file mode 100644 index 0000000..34253d6 --- /dev/null +++ b/w12/总代码/src/main/java/com/weibo/hotsearch/view/ConsoleView.java @@ -0,0 +1,51 @@ +package com.weibo.hotsearch.view; + +import com.weibo.hotsearch.model.HotSearchItem; +import com.weibo.hotsearch.model.HotSearchResult; + +import java.time.format.DateTimeFormatter; + +public class ConsoleView implements View { + + private static final DateTimeFormatter FORMATTER = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss"); + + @Override + public void render(HotSearchResult result) { + if (result == null) { + System.out.println("没有数据可显示"); + return; + } + + System.out.println("\n===== " + (result.getFilterName() != null ? result.getFilterName() : "热搜结果") + " ====="); + System.out.println("数据源: " + result.getDataSource()); + System.out.println("采集时间: " + result.getFetchTime().format(FORMATTER)); + System.out.println("----------------------------------------"); + + if (result.isEmpty()) { + System.out.println("当前暂无符合条件的热搜内容"); + } else { + for (int i = 0; i < result.getItems().size(); i++) { + HotSearchItem item = result.getItems().get(i); + String line = formatItem(item, i); + System.out.println(line); + } + } + + System.out.println("\n===== 热搜总数:" + result.getTotalCount() + " 条 ====="); + } + + private String formatItem(HotSearchItem item, int index) { + if (item.getRank() > 0) { + return String.format("排名:%d\t热度:%d\t来源:%s\t热搜:%s", + item.getRank(), item.getHotValue(), item.getSource(), item.getTitle()); + } else { + return String.format("序号:%d\t热度:%d\t来源:%s\t热搜:%s", + index + 1, item.getHotValue(), item.getSource(), item.getTitle()); + } + } + + @Override + public String getViewType() { + return "console"; + } +} \ No newline at end of file diff --git a/w12/总代码/src/main/java/com/weibo/hotsearch/view/TextView.java b/w12/总代码/src/main/java/com/weibo/hotsearch/view/TextView.java new file mode 100644 index 0000000..4691a24 --- /dev/null +++ b/w12/总代码/src/main/java/com/weibo/hotsearch/view/TextView.java @@ -0,0 +1,83 @@ +package com.weibo.hotsearch.view; + +import com.weibo.hotsearch.model.HotSearchItem; +import com.weibo.hotsearch.model.HotSearchResult; + +import java.time.format.DateTimeFormatter; +import java.util.StringJoiner; + +public class TextView implements View { + + private static final DateTimeFormatter FORMATTER = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss"); + + @Override + public void render(HotSearchResult result) { + if (result == null) { + System.out.println("没有数据可显示"); + return; + } + + StringJoiner sj = new StringJoiner("\n"); + + sj.add("===== " + (result.getFilterName() != null ? result.getFilterName() : "热搜结果") + " ====="); + sj.add("数据源: " + result.getDataSource()); + sj.add("采集时间: " + result.getFetchTime().format(FORMATTER)); + sj.add("----------------------------------------"); + + if (result.isEmpty()) { + sj.add("当前暂无符合条件的热搜内容"); + } else { + for (int i = 0; i < result.getItems().size(); i++) { + HotSearchItem item = result.getItems().get(i); + sj.add(formatItem(item, i)); + } + } + + sj.add(""); + sj.add("===== 热搜总数:" + result.getTotalCount() + " 条 ====="); + + System.out.println(sj.toString()); + } + + private String formatItem(HotSearchItem item, int index) { + if (item.getRank() > 0) { + return String.format("排名:%d\t热度:%d\t来源:%s\t热搜:%s", + item.getRank(), item.getHotValue(), item.getSource(), item.getTitle()); + } else { + return String.format("序号:%d\t热度:%d\t来源:%s\t热搜:%s", + index + 1, item.getHotValue(), item.getSource(), item.getTitle()); + } + } + + public String renderToString(HotSearchResult result) { + if (result == null) { + return "没有数据可显示"; + } + + StringJoiner sj = new StringJoiner("\n"); + + sj.add("===== " + (result.getFilterName() != null ? result.getFilterName() : "热搜结果") + " ====="); + sj.add("数据源: " + result.getDataSource()); + sj.add("采集时间: " + result.getFetchTime().format(FORMATTER)); + sj.add("----------------------------------------"); + + if (result.isEmpty()) { + sj.add("当前暂无符合条件的热搜内容"); + } else { + for (int i = 0; i < result.getItems().size(); i++) { + HotSearchItem item = result.getItems().get(i); + sj.add(formatItem(item, i)); + } + } + + sj.add(""); + sj.add("===== 热搜总数:" + result.getTotalCount() + " 条 ====="); + + return sj.toString(); + } + + @Override + public String getViewType() { + return "text"; + } +} \ No newline at end of file diff --git a/w12/总代码/src/main/java/com/weibo/hotsearch/view/View.java b/w12/总代码/src/main/java/com/weibo/hotsearch/view/View.java new file mode 100644 index 0000000..754eff4 --- /dev/null +++ b/w12/总代码/src/main/java/com/weibo/hotsearch/view/View.java @@ -0,0 +1,10 @@ +package com.weibo.hotsearch.view; + +import com.weibo.hotsearch.model.HotSearchResult; + +public interface View { + + void render(HotSearchResult result); + + String getViewType(); +} \ No newline at end of file diff --git a/w12/总代码/src/main/java/com/weibo/hotsearch/view/ViewFactory.java b/w12/总代码/src/main/java/com/weibo/hotsearch/view/ViewFactory.java new file mode 100644 index 0000000..719ffe9 --- /dev/null +++ b/w12/总代码/src/main/java/com/weibo/hotsearch/view/ViewFactory.java @@ -0,0 +1,29 @@ +package com.weibo.hotsearch.view; + +import java.util.HashMap; +import java.util.Map; + +public class ViewFactory { + + private static final Map views = new HashMap<>(); + + static { + views.put("console", new ConsoleView()); + views.put("text", new TextView()); + } + + public static View getView(String type) { + if (type == null || type.isEmpty()) { + return views.get("console"); + } + return views.getOrDefault(type.toLowerCase(), views.get("console")); + } + + public static boolean hasView(String type) { + return type != null && views.containsKey(type.toLowerCase()); + } + + public static Map getAllViews() { + return new HashMap<>(views); + } +} \ No newline at end of file diff --git a/w12/总代码/src/run.bat b/w12/总代码/src/run.bat new file mode 100644 index 0000000..4fa4d4d --- /dev/null +++ b/w12/总代码/src/run.bat @@ -0,0 +1,4 @@ +@echo off +set CLASSPATH=target\classes;C:\Users\ruiruirui\.m2\repository\org\jsoup\jsoup\1.17.2\jsoup-1.17.2.jar +java WeiboHotSearcha +pause \ No newline at end of file diff --git a/w12/总代码/target/classes/WeiboStarHotSearcha.class b/w12/总代码/target/classes/WeiboStarHotSearcha.class new file mode 100644 index 0000000..b357cea Binary files /dev/null and b/w12/总代码/target/classes/WeiboStarHotSearcha.class differ diff --git a/w12/总代码/target/classes/com/weibo/hotsearch/ConsoleOutputHandler.class b/w12/总代码/target/classes/com/weibo/hotsearch/ConsoleOutputHandler.class new file mode 100644 index 0000000..73999e2 Binary files /dev/null and b/w12/总代码/target/classes/com/weibo/hotsearch/ConsoleOutputHandler.class differ diff --git a/w12/总代码/target/classes/com/weibo/hotsearch/HotSearchFilter.class b/w12/总代码/target/classes/com/weibo/hotsearch/HotSearchFilter.class new file mode 100644 index 0000000..56a2f04 Binary files /dev/null and b/w12/总代码/target/classes/com/weibo/hotsearch/HotSearchFilter.class differ diff --git a/w12/总代码/target/classes/com/weibo/hotsearch/Main.class b/w12/总代码/target/classes/com/weibo/hotsearch/Main.class new file mode 100644 index 0000000..0326206 Binary files /dev/null and b/w12/总代码/target/classes/com/weibo/hotsearch/Main.class differ diff --git a/w12/总代码/target/classes/com/weibo/hotsearch/OutputHandler.class b/w12/总代码/target/classes/com/weibo/hotsearch/OutputHandler.class new file mode 100644 index 0000000..570911b Binary files /dev/null and b/w12/总代码/target/classes/com/weibo/hotsearch/OutputHandler.class differ diff --git a/w12/总代码/target/classes/com/weibo/hotsearch/StarFilter.class b/w12/总代码/target/classes/com/weibo/hotsearch/StarFilter.class new file mode 100644 index 0000000..5677f4c Binary files /dev/null and b/w12/总代码/target/classes/com/weibo/hotsearch/StarFilter.class differ diff --git a/w12/总代码/target/classes/com/weibo/hotsearch/cli/CliHandler.class b/w12/总代码/target/classes/com/weibo/hotsearch/cli/CliHandler.class new file mode 100644 index 0000000..860afe2 Binary files /dev/null and b/w12/总代码/target/classes/com/weibo/hotsearch/cli/CliHandler.class differ diff --git a/w12/总代码/target/classes/com/weibo/hotsearch/cli/CliParser.class b/w12/总代码/target/classes/com/weibo/hotsearch/cli/CliParser.class new file mode 100644 index 0000000..89aae85 Binary files /dev/null and b/w12/总代码/target/classes/com/weibo/hotsearch/cli/CliParser.class differ diff --git a/w12/总代码/target/classes/com/weibo/hotsearch/command/Command.class b/w12/总代码/target/classes/com/weibo/hotsearch/command/Command.class new file mode 100644 index 0000000..59d6722 Binary files /dev/null and b/w12/总代码/target/classes/com/weibo/hotsearch/command/Command.class differ diff --git a/w12/总代码/target/classes/com/weibo/hotsearch/command/CommandInvoker.class b/w12/总代码/target/classes/com/weibo/hotsearch/command/CommandInvoker.class new file mode 100644 index 0000000..78c5d82 Binary files /dev/null and b/w12/总代码/target/classes/com/weibo/hotsearch/command/CommandInvoker.class differ diff --git a/w12/总代码/target/classes/com/weibo/hotsearch/command/CommandResult.class b/w12/总代码/target/classes/com/weibo/hotsearch/command/CommandResult.class new file mode 100644 index 0000000..f962b90 Binary files /dev/null and b/w12/总代码/target/classes/com/weibo/hotsearch/command/CommandResult.class differ diff --git a/w12/总代码/target/classes/com/weibo/hotsearch/command/FetchCommand.class b/w12/总代码/target/classes/com/weibo/hotsearch/command/FetchCommand.class new file mode 100644 index 0000000..11a23d4 Binary files /dev/null and b/w12/总代码/target/classes/com/weibo/hotsearch/command/FetchCommand.class differ diff --git a/w12/总代码/target/classes/com/weibo/hotsearch/command/FilterCommand.class b/w12/总代码/target/classes/com/weibo/hotsearch/command/FilterCommand.class new file mode 100644 index 0000000..dfa3c45 Binary files /dev/null and b/w12/总代码/target/classes/com/weibo/hotsearch/command/FilterCommand.class differ diff --git a/w12/总代码/target/classes/com/weibo/hotsearch/command/HelpCommand.class b/w12/总代码/target/classes/com/weibo/hotsearch/command/HelpCommand.class new file mode 100644 index 0000000..ebec991 Binary files /dev/null and b/w12/总代码/target/classes/com/weibo/hotsearch/command/HelpCommand.class differ diff --git a/w12/总代码/target/classes/com/weibo/hotsearch/command/OutputCommand.class b/w12/总代码/target/classes/com/weibo/hotsearch/command/OutputCommand.class new file mode 100644 index 0000000..a3714bd Binary files /dev/null and b/w12/总代码/target/classes/com/weibo/hotsearch/command/OutputCommand.class differ diff --git a/w12/总代码/target/classes/com/weibo/hotsearch/command/SaveCommand.class b/w12/总代码/target/classes/com/weibo/hotsearch/command/SaveCommand.class new file mode 100644 index 0000000..f7fb233 Binary files /dev/null and b/w12/总代码/target/classes/com/weibo/hotsearch/command/SaveCommand.class differ diff --git a/w12/总代码/target/classes/com/weibo/hotsearch/controller/HotSearchController.class b/w12/总代码/target/classes/com/weibo/hotsearch/controller/HotSearchController.class new file mode 100644 index 0000000..7151cbe Binary files /dev/null and b/w12/总代码/target/classes/com/weibo/hotsearch/controller/HotSearchController.class differ diff --git a/w12/总代码/target/classes/com/weibo/hotsearch/exception/CliException.class b/w12/总代码/target/classes/com/weibo/hotsearch/exception/CliException.class new file mode 100644 index 0000000..ae58f8f Binary files /dev/null and b/w12/总代码/target/classes/com/weibo/hotsearch/exception/CliException.class differ diff --git a/w12/总代码/target/classes/com/weibo/hotsearch/exception/DataParseException.class b/w12/总代码/target/classes/com/weibo/hotsearch/exception/DataParseException.class new file mode 100644 index 0000000..c1eb9a7 Binary files /dev/null and b/w12/总代码/target/classes/com/weibo/hotsearch/exception/DataParseException.class differ diff --git a/w12/总代码/target/classes/com/weibo/hotsearch/exception/ErrorCode.class b/w12/总代码/target/classes/com/weibo/hotsearch/exception/ErrorCode.class new file mode 100644 index 0000000..8b968d2 Binary files /dev/null and b/w12/总代码/target/classes/com/weibo/hotsearch/exception/ErrorCode.class differ diff --git a/w12/总代码/target/classes/com/weibo/hotsearch/exception/HotSearchException.class b/w12/总代码/target/classes/com/weibo/hotsearch/exception/HotSearchException.class new file mode 100644 index 0000000..90cdef2 Binary files /dev/null and b/w12/总代码/target/classes/com/weibo/hotsearch/exception/HotSearchException.class differ diff --git a/w12/总代码/target/classes/com/weibo/hotsearch/exception/NetworkException.class b/w12/总代码/target/classes/com/weibo/hotsearch/exception/NetworkException.class new file mode 100644 index 0000000..0b4b21f Binary files /dev/null and b/w12/总代码/target/classes/com/weibo/hotsearch/exception/NetworkException.class differ diff --git a/w12/总代码/target/classes/com/weibo/hotsearch/model/AppContext.class b/w12/总代码/target/classes/com/weibo/hotsearch/model/AppContext.class new file mode 100644 index 0000000..2c2376d Binary files /dev/null and b/w12/总代码/target/classes/com/weibo/hotsearch/model/AppContext.class differ diff --git a/w12/总代码/target/classes/com/weibo/hotsearch/model/HotSearchItem.class b/w12/总代码/target/classes/com/weibo/hotsearch/model/HotSearchItem.class new file mode 100644 index 0000000..edb954c Binary files /dev/null and b/w12/总代码/target/classes/com/weibo/hotsearch/model/HotSearchItem.class differ diff --git a/w12/总代码/target/classes/com/weibo/hotsearch/model/HotSearchResult.class b/w12/总代码/target/classes/com/weibo/hotsearch/model/HotSearchResult.class new file mode 100644 index 0000000..dd94893 Binary files /dev/null and b/w12/总代码/target/classes/com/weibo/hotsearch/model/HotSearchResult.class differ diff --git a/w12/总代码/target/classes/com/weibo/hotsearch/service/DataFetcher.class b/w12/总代码/target/classes/com/weibo/hotsearch/service/DataFetcher.class new file mode 100644 index 0000000..36794ce Binary files /dev/null and b/w12/总代码/target/classes/com/weibo/hotsearch/service/DataFetcher.class differ diff --git a/w12/总代码/target/classes/com/weibo/hotsearch/service/FilterService.class b/w12/总代码/target/classes/com/weibo/hotsearch/service/FilterService.class new file mode 100644 index 0000000..19550a7 Binary files /dev/null and b/w12/总代码/target/classes/com/weibo/hotsearch/service/FilterService.class differ diff --git a/w12/总代码/target/classes/com/weibo/hotsearch/service/OutputService.class b/w12/总代码/target/classes/com/weibo/hotsearch/service/OutputService.class new file mode 100644 index 0000000..a45f2a5 Binary files /dev/null and b/w12/总代码/target/classes/com/weibo/hotsearch/service/OutputService.class differ diff --git a/w12/总代码/target/classes/com/weibo/hotsearch/strategy/FilterStrategy.class b/w12/总代码/target/classes/com/weibo/hotsearch/strategy/FilterStrategy.class new file mode 100644 index 0000000..0de4599 Binary files /dev/null and b/w12/总代码/target/classes/com/weibo/hotsearch/strategy/FilterStrategy.class differ diff --git a/w12/总代码/target/classes/com/weibo/hotsearch/strategy/FilterStrategyFactory.class b/w12/总代码/target/classes/com/weibo/hotsearch/strategy/FilterStrategyFactory.class new file mode 100644 index 0000000..f0f738d Binary files /dev/null and b/w12/总代码/target/classes/com/weibo/hotsearch/strategy/FilterStrategyFactory.class differ diff --git a/w12/总代码/target/classes/com/weibo/hotsearch/strategy/PolicyFilterStrategy.class b/w12/总代码/target/classes/com/weibo/hotsearch/strategy/PolicyFilterStrategy.class new file mode 100644 index 0000000..027051c Binary files /dev/null and b/w12/总代码/target/classes/com/weibo/hotsearch/strategy/PolicyFilterStrategy.class differ diff --git a/w12/总代码/target/classes/com/weibo/hotsearch/strategy/SportsFilterStrategy.class b/w12/总代码/target/classes/com/weibo/hotsearch/strategy/SportsFilterStrategy.class new file mode 100644 index 0000000..3c9f127 Binary files /dev/null and b/w12/总代码/target/classes/com/weibo/hotsearch/strategy/SportsFilterStrategy.class differ diff --git a/w12/总代码/target/classes/com/weibo/hotsearch/strategy/StarFilterStrategy.class b/w12/总代码/target/classes/com/weibo/hotsearch/strategy/StarFilterStrategy.class new file mode 100644 index 0000000..95cd885 Binary files /dev/null and b/w12/总代码/target/classes/com/weibo/hotsearch/strategy/StarFilterStrategy.class differ diff --git a/w12/总代码/target/classes/com/weibo/hotsearch/view/ConsoleView.class b/w12/总代码/target/classes/com/weibo/hotsearch/view/ConsoleView.class new file mode 100644 index 0000000..9e9e7c1 Binary files /dev/null and b/w12/总代码/target/classes/com/weibo/hotsearch/view/ConsoleView.class differ diff --git a/w12/总代码/target/classes/com/weibo/hotsearch/view/TextView.class b/w12/总代码/target/classes/com/weibo/hotsearch/view/TextView.class new file mode 100644 index 0000000..c50bf96 Binary files /dev/null and b/w12/总代码/target/classes/com/weibo/hotsearch/view/TextView.class differ diff --git a/w12/总代码/target/classes/com/weibo/hotsearch/view/View.class b/w12/总代码/target/classes/com/weibo/hotsearch/view/View.class new file mode 100644 index 0000000..54ae3f2 Binary files /dev/null and b/w12/总代码/target/classes/com/weibo/hotsearch/view/View.class differ diff --git a/w12/总代码/target/classes/com/weibo/hotsearch/view/ViewFactory.class b/w12/总代码/target/classes/com/weibo/hotsearch/view/ViewFactory.class new file mode 100644 index 0000000..ff01502 Binary files /dev/null and b/w12/总代码/target/classes/com/weibo/hotsearch/view/ViewFactory.class differ diff --git a/w12/总代码/target/hotsearch-1.0.0-jar-with-dependencies.jar b/w12/总代码/target/hotsearch-1.0.0-jar-with-dependencies.jar new file mode 100644 index 0000000..8e89f60 Binary files /dev/null and b/w12/总代码/target/hotsearch-1.0.0-jar-with-dependencies.jar differ diff --git a/w12/总代码/target/hotsearch-1.0.0.jar b/w12/总代码/target/hotsearch-1.0.0.jar new file mode 100644 index 0000000..b299671 Binary files /dev/null and b/w12/总代码/target/hotsearch-1.0.0.jar differ diff --git a/w12/总代码/target/maven-archiver/pom.properties b/w12/总代码/target/maven-archiver/pom.properties new file mode 100644 index 0000000..4544cae --- /dev/null +++ b/w12/总代码/target/maven-archiver/pom.properties @@ -0,0 +1,3 @@ +artifactId=hotsearch +groupId=com.weibo +version=1.0.0 diff --git a/w12/总代码/target/maven-status/maven-compiler-plugin/compile/default-compile/createdFiles.lst b/w12/总代码/target/maven-status/maven-compiler-plugin/compile/default-compile/createdFiles.lst new file mode 100644 index 0000000..33102c1 --- /dev/null +++ b/w12/总代码/target/maven-status/maven-compiler-plugin/compile/default-compile/createdFiles.lst @@ -0,0 +1,37 @@ +com\weibo\hotsearch\model\AppContext.class +com\weibo\hotsearch\strategy\PolicyFilterStrategy.class +com\weibo\hotsearch\view\View.class +com\weibo\hotsearch\strategy\SportsFilterStrategy.class +com\weibo\hotsearch\model\HotSearchResult.class +com\weibo\hotsearch\service\FilterService.class +com\weibo\hotsearch\view\TextView.class +com\weibo\hotsearch\HotSearchFilter.class +com\weibo\hotsearch\strategy\FilterStrategyFactory.class +com\weibo\hotsearch\exception\HotSearchException.class +com\weibo\hotsearch\command\SaveCommand.class +com\weibo\hotsearch\StarFilter.class +com\weibo\hotsearch\strategy\FilterStrategy.class +com\weibo\hotsearch\exception\ErrorCode.class +com\weibo\hotsearch\command\HelpCommand.class +com\weibo\hotsearch\command\FilterCommand.class +com\weibo\hotsearch\exception\NetworkException.class +com\weibo\hotsearch\OutputHandler.class +com\weibo\hotsearch\command\CommandResult.class +com\weibo\hotsearch\cli\CliHandler.class +com\weibo\hotsearch\model\HotSearchItem.class +com\weibo\hotsearch\controller\HotSearchController.class +com\weibo\hotsearch\strategy\StarFilterStrategy.class +com\weibo\hotsearch\service\DataFetcher.class +com\weibo\hotsearch\command\Command.class +com\weibo\hotsearch\ConsoleOutputHandler.class +com\weibo\hotsearch\view\ConsoleView.class +com\weibo\hotsearch\Main.class +WeiboStarHotSearcha.class +com\weibo\hotsearch\command\CommandInvoker.class +com\weibo\hotsearch\cli\CliParser.class +com\weibo\hotsearch\command\OutputCommand.class +com\weibo\hotsearch\command\FetchCommand.class +com\weibo\hotsearch\exception\CliException.class +com\weibo\hotsearch\service\OutputService.class +com\weibo\hotsearch\view\ViewFactory.class +com\weibo\hotsearch\exception\DataParseException.class diff --git a/w12/总代码/target/maven-status/maven-compiler-plugin/compile/default-compile/inputFiles.lst b/w12/总代码/target/maven-status/maven-compiler-plugin/compile/default-compile/inputFiles.lst new file mode 100644 index 0000000..6983794 --- /dev/null +++ b/w12/总代码/target/maven-status/maven-compiler-plugin/compile/default-compile/inputFiles.lst @@ -0,0 +1,37 @@ +C:\Users\ruiruirui\java\w11\总代码\src\main\java\com\weibo\hotsearch\command\OutputCommand.java +C:\Users\ruiruirui\java\w11\总代码\src\main\java\WeiboStarHotSearcha.java +C:\Users\ruiruirui\java\w11\总代码\src\main\java\com\weibo\hotsearch\command\Command.java +C:\Users\ruiruirui\java\w11\总代码\src\main\java\com\weibo\hotsearch\model\AppContext.java +C:\Users\ruiruirui\java\w11\总代码\src\main\java\com\weibo\hotsearch\command\SaveCommand.java +C:\Users\ruiruirui\java\w11\总代码\src\main\java\com\weibo\hotsearch\command\CommandInvoker.java +C:\Users\ruiruirui\java\w11\总代码\src\main\java\com\weibo\hotsearch\service\OutputService.java +C:\Users\ruiruirui\java\w11\总代码\src\main\java\com\weibo\hotsearch\service\DataFetcher.java +C:\Users\ruiruirui\java\w11\总代码\src\main\java\com\weibo\hotsearch\OutputHandler.java +C:\Users\ruiruirui\java\w11\总代码\src\main\java\com\weibo\hotsearch\cli\CliParser.java +C:\Users\ruiruirui\java\w11\总代码\src\main\java\com\weibo\hotsearch\command\CommandResult.java +C:\Users\ruiruirui\java\w11\总代码\src\main\java\com\weibo\hotsearch\cli\CliHandler.java +C:\Users\ruiruirui\java\w11\总代码\src\main\java\com\weibo\hotsearch\command\FetchCommand.java +C:\Users\ruiruirui\java\w11\总代码\src\main\java\com\weibo\hotsearch\strategy\FilterStrategyFactory.java +C:\Users\ruiruirui\java\w11\总代码\src\main\java\com\weibo\hotsearch\Main.java +C:\Users\ruiruirui\java\w11\总代码\src\main\java\com\weibo\hotsearch\exception\ErrorCode.java +C:\Users\ruiruirui\java\w11\总代码\src\main\java\com\weibo\hotsearch\exception\CliException.java +C:\Users\ruiruirui\java\w11\总代码\src\main\java\com\weibo\hotsearch\StarFilter.java +C:\Users\ruiruirui\java\w11\总代码\src\main\java\com\weibo\hotsearch\view\View.java +C:\Users\ruiruirui\java\w11\总代码\src\main\java\com\weibo\hotsearch\strategy\FilterStrategy.java +C:\Users\ruiruirui\java\w11\总代码\src\main\java\com\weibo\hotsearch\view\TextView.java +C:\Users\ruiruirui\java\w11\总代码\src\main\java\com\weibo\hotsearch\strategy\PolicyFilterStrategy.java +C:\Users\ruiruirui\java\w11\总代码\src\main\java\com\weibo\hotsearch\model\HotSearchResult.java +C:\Users\ruiruirui\java\w11\总代码\src\main\java\com\weibo\hotsearch\ConsoleOutputHandler.java +C:\Users\ruiruirui\java\w11\总代码\src\main\java\com\weibo\hotsearch\command\HelpCommand.java +C:\Users\ruiruirui\java\w11\总代码\src\main\java\com\weibo\hotsearch\exception\DataParseException.java +C:\Users\ruiruirui\java\w11\总代码\src\main\java\com\weibo\hotsearch\service\FilterService.java +C:\Users\ruiruirui\java\w11\总代码\src\main\java\com\weibo\hotsearch\command\FilterCommand.java +C:\Users\ruiruirui\java\w11\总代码\src\main\java\com\weibo\hotsearch\exception\NetworkException.java +C:\Users\ruiruirui\java\w11\总代码\src\main\java\com\weibo\hotsearch\controller\HotSearchController.java +C:\Users\ruiruirui\java\w11\总代码\src\main\java\com\weibo\hotsearch\HotSearchFilter.java +C:\Users\ruiruirui\java\w11\总代码\src\main\java\com\weibo\hotsearch\view\ViewFactory.java +C:\Users\ruiruirui\java\w11\总代码\src\main\java\com\weibo\hotsearch\strategy\StarFilterStrategy.java +C:\Users\ruiruirui\java\w11\总代码\src\main\java\com\weibo\hotsearch\model\HotSearchItem.java +C:\Users\ruiruirui\java\w11\总代码\src\main\java\com\weibo\hotsearch\view\ConsoleView.java +C:\Users\ruiruirui\java\w11\总代码\src\main\java\com\weibo\hotsearch\exception\HotSearchException.java +C:\Users\ruiruirui\java\w11\总代码\src\main\java\com\weibo\hotsearch\strategy\SportsFilterStrategy.java