Browse Source

宋瑞-202506050301

main
Songrui 3 weeks ago
parent
commit
c542efb776
  1. BIN
      w10/AI协同升级.png
  2. 4
      w10/java-cli/.gitignore
  3. 52
      w10/java-cli/pom.xml
  4. 21
      w10/java-cli/src/main/java/com/example/datacollect/Main.java
  5. 64
      w10/java-cli/src/main/java/com/example/datacollect/command/AnalyzeCommand.java
  6. 8
      w10/java-cli/src/main/java/com/example/datacollect/command/Command.java
  7. 44
      w10/java-cli/src/main/java/com/example/datacollect/command/CrawlCommand.java
  8. 23
      w10/java-cli/src/main/java/com/example/datacollect/command/ExitCommand.java
  9. 22
      w10/java-cli/src/main/java/com/example/datacollect/command/HelpCommand.java
  10. 22
      w10/java-cli/src/main/java/com/example/datacollect/command/ListCommand.java
  11. 49
      w10/java-cli/src/main/java/com/example/datacollect/controller/CrawlerController.java
  12. 45
      w10/java-cli/src/main/java/com/example/datacollect/model/Article.java
  13. 41
      w10/java-cli/src/main/java/com/example/datacollect/repository/ArticleRepository.java
  14. 27
      w10/java-cli/src/main/java/com/example/datacollect/strategy/BlogStrategy.java
  15. 10
      w10/java-cli/src/main/java/com/example/datacollect/strategy/CrawlStrategy.java
  16. 37
      w10/java-cli/src/main/java/com/example/datacollect/strategy/DefaultStrategy.java
  17. 51
      w10/java-cli/src/main/java/com/example/datacollect/strategy/HnuNewsStrategy.java
  18. 27
      w10/java-cli/src/main/java/com/example/datacollect/strategy/NewsStrategy.java
  19. 27
      w10/java-cli/src/main/java/com/example/datacollect/strategy/PriorityStrategy.java
  20. 42
      w10/java-cli/src/main/java/com/example/datacollect/view/ConsoleView.java
  21. BIN
      w10/思考题.png
  22. BIN
      w10/进阶探究.png

BIN
w10/AI协同升级.png

Binary file not shown.

After

Width:  |  Height:  |  Size: 718 KiB

4
w10/java-cli/.gitignore

@ -0,0 +1,4 @@
*.jar
*.jar
*.class
*.log

52
w10/java-cli/pom.xml

@ -0,0 +1,52 @@
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.example</groupId>
<artifactId>datacollect-cli</artifactId>
<version>0.1.0</version>
<properties>
<maven.compiler.source>11</maven.compiler.source>
<maven.compiler.target>11</maven.compiler.target>
</properties>
<dependencies>
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.17.2</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.8.1</version>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-assembly-plugin</artifactId>
<version>3.3.0</version>
<configuration>
<archive>
<manifest>
<mainClass>com.example.datacollect.Main</mainClass>
</manifest>
</archive>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
</configuration>
<executions>
<execution>
<id>make-assembly</id>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>

21
w10/java-cli/src/main/java/com/example/datacollect/Main.java

@ -0,0 +1,21 @@
package com.example.datacollect;
import com.example.datacollect.controller.CrawlerController;
import com.example.datacollect.repository.ArticleRepository;
import com.example.datacollect.strategy.StrategyFactory;
import com.example.datacollect.view.ConsoleView;
public class Main {
public static void main(String[] args) {
ConsoleView view = new ConsoleView();
ArticleRepository repository = new ArticleRepository();
StrategyFactory strategyFactory = new StrategyFactory();
CrawlerController controller = new CrawlerController(view, repository, strategyFactory);
view.printSuccess("Welcome to CLI Crawler (w10_3)! Type help for commands.");
while (true) {
controller.handle(view.readLine());
}
}
}

64
w10/java-cli/src/main/java/com/example/datacollect/command/AnalyzeCommand.java

@ -0,0 +1,64 @@
package com.example.datacollect.command;
import com.example.datacollect.repository.ArticleRepository;
import com.example.datacollect.strategy.CrawlStrategy;
import com.example.datacollect.strategy.StrategyFactory;
import com.example.datacollect.view.ConsoleView;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
public class AnalyzeCommand implements Command {
private final ConsoleView view;
private final StrategyFactory strategyFactory;
public AnalyzeCommand(ConsoleView view, StrategyFactory strategyFactory) {
this.view = view;
this.strategyFactory = strategyFactory;
}
@Override
public String getName() {
return "analyze";
}
@Override
public void execute(String[] args, ArticleRepository repository) {
if (args.length < 2) {
view.printError("Usage: analyze <url>");
return;
}
String url = args[1];
CrawlStrategy strategy = strategyFactory.getStrategy(url);
try {
view.printInfo("Analyzing: " + url);
Document doc = Jsoup.connect(url).get();
var articles = strategy.parse(url, doc);
int count = articles.size();
int totalTitleLength = 0;
int totalContentLength = 0;
for (var article : articles) {
if (article.getTitle() != null) {
totalTitleLength += article.getTitle().length();
}
if (article.getContent() != null) {
totalContentLength += article.getContent().length();
}
}
double avgTitleLength = count > 0 ? (double) totalTitleLength / count : 0;
double avgContentLength = count > 0 ? (double) totalContentLength / count : 0;
view.printSuccess("Analysis Results:");
view.printInfo(" Total Articles: " + count);
view.printInfo(" Average Title Length: " + String.format("%.2f", avgTitleLength));
view.printInfo(" Average Content Length: " + String.format("%.2f", avgContentLength));
view.printInfo(" Strategy Used: " + strategy.getClass().getSimpleName());
} catch (Exception e) {
view.printError("Failed to analyze: " + e.getMessage());
}
}
}

8
w10/java-cli/src/main/java/com/example/datacollect/command/Command.java

@ -0,0 +1,8 @@
package com.example.datacollect.command;
import com.example.datacollect.repository.ArticleRepository;
public interface Command {
String getName();
void execute(String[] args, ArticleRepository repository);
}

44
w10/java-cli/src/main/java/com/example/datacollect/command/CrawlCommand.java

@ -0,0 +1,44 @@
package com.example.datacollect.command;
import com.example.datacollect.repository.ArticleRepository;
import com.example.datacollect.strategy.CrawlStrategy;
import com.example.datacollect.strategy.StrategyFactory;
import com.example.datacollect.view.ConsoleView;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
public class CrawlCommand implements Command {
private final ConsoleView view;
private final StrategyFactory strategyFactory;
public CrawlCommand(ConsoleView view, StrategyFactory strategyFactory) {
this.view = view;
this.strategyFactory = strategyFactory;
}
@Override
public String getName() {
return "crawl";
}
@Override
public void execute(String[] args, ArticleRepository repository) {
if (args.length < 2) {
view.printError("Usage: crawl <url>");
return;
}
String url = args[1];
CrawlStrategy strategy = strategyFactory.getStrategy(url);
try {
view.printInfo("Crawling: " + url);
Document doc = Jsoup.connect(url).get();
var articles = strategy.parse(url, doc);
repository.addAll(articles);
view.printSuccess("Crawled " + articles.size() + " articles.");
} catch (Exception e) {
view.printError("Failed to crawl: " + e.getMessage());
}
}
}

23
w10/java-cli/src/main/java/com/example/datacollect/command/ExitCommand.java

@ -0,0 +1,23 @@
package com.example.datacollect.command;
import com.example.datacollect.repository.ArticleRepository;
import com.example.datacollect.view.ConsoleView;
public class ExitCommand implements Command {
private final ConsoleView view;
public ExitCommand(ConsoleView view) {
this.view = view;
}
@Override
public String getName() {
return "exit";
}
@Override
public void execute(String[] args, ArticleRepository repository) {
view.printSuccess("Bye!");
System.exit(0);
}
}

22
w10/java-cli/src/main/java/com/example/datacollect/command/HelpCommand.java

@ -0,0 +1,22 @@
package com.example.datacollect.command;
import com.example.datacollect.repository.ArticleRepository;
import com.example.datacollect.view.ConsoleView;
public class HelpCommand implements Command {
private final ConsoleView view;
public HelpCommand(ConsoleView view) {
this.view = view;
}
@Override
public String getName() {
return "help";
}
@Override
public void execute(String[] args, ArticleRepository repository) {
view.printInfo("Commands: crawl <url>, analyze <url>, list, help, exit");
}
}

22
w10/java-cli/src/main/java/com/example/datacollect/command/ListCommand.java

@ -0,0 +1,22 @@
package com.example.datacollect.command;
import com.example.datacollect.repository.ArticleRepository;
import com.example.datacollect.view.ConsoleView;
public class ListCommand implements Command {
private final ConsoleView view;
public ListCommand(ConsoleView view) {
this.view = view;
}
@Override
public String getName() {
return "list";
}
@Override
public void execute(String[] args, ArticleRepository repository) {
view.display(repository.getAll());
}
}

49
w10/java-cli/src/main/java/com/example/datacollect/controller/CrawlerController.java

@ -0,0 +1,49 @@
package com.example.datacollect.controller;
import com.example.datacollect.command.AnalyzeCommand;
import com.example.datacollect.command.Command;
import com.example.datacollect.command.CrawlCommand;
import com.example.datacollect.command.ExitCommand;
import com.example.datacollect.command.HelpCommand;
import com.example.datacollect.command.ListCommand;
import com.example.datacollect.repository.ArticleRepository;
import com.example.datacollect.strategy.StrategyFactory;
import com.example.datacollect.view.ConsoleView;
import java.util.HashMap;
import java.util.Map;
public class CrawlerController {
private final Map<String, Command> commands = new HashMap<>();
private final ConsoleView view;
private final ArticleRepository repository;
public CrawlerController(ConsoleView view, ArticleRepository repository, StrategyFactory strategyFactory) {
this.view = view;
this.repository = repository;
register(new HelpCommand(view));
register(new ListCommand(view));
register(new CrawlCommand(view, strategyFactory));
register(new AnalyzeCommand(view, strategyFactory));
register(new ExitCommand(view));
}
private void register(Command command) {
commands.put(command.getName(), command);
}
public void handle(String input) {
String text = input == null ? "" : input.trim();
if (text.isEmpty()) {
return;
}
String[] args = text.split("\\s+");
String cmdName = args[0].toLowerCase();
Command command = commands.get(cmdName);
if (command == null) {
view.printError("Unknown command: " + cmdName);
return;
}
command.execute(args, repository);
}
}

45
w10/java-cli/src/main/java/com/example/datacollect/model/Article.java

@ -0,0 +1,45 @@
package com.example.datacollect.model;
public class Article {
private String title;
private String url;
private String content;
public Article(String title, String url, String content) {
this.title = title;
this.url = url;
this.content = content;
}
public String getTitle() {
return title;
}
public void setTitle(String title) {
this.title = title;
}
public String getUrl() {
return url;
}
public void setUrl(String url) {
this.url = url;
}
public String getContent() {
return content;
}
public void setContent(String content) {
this.content = content;
}
@Override
public String toString() {
return "Article{"
+ "title='" + title + '\''
+ ", url='" + url + '\''
+ '}';
}
}

41
w10/java-cli/src/main/java/com/example/datacollect/repository/ArticleRepository.java

@ -0,0 +1,41 @@
package com.example.datacollect.repository;
import com.example.datacollect.model.Article;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
public class ArticleRepository {
private final List<Article> articles = new ArrayList<>();
public void add(Article article) {
if (article == null) {
throw new IllegalArgumentException("Article cannot be null");
}
articles.add(article);
}
public void addAll(List<Article> articleList) {
if (articleList == null) {
throw new IllegalArgumentException("Article list cannot be null");
}
for (Article article : articleList) {
if (article == null) {
throw new IllegalArgumentException("Article in list cannot be null");
}
}
articles.addAll(articleList);
}
public List<Article> getAll() {
return Collections.unmodifiableList(articles);
}
public int size() {
return articles.size();
}
public void clear() {
articles.clear();
}
}

27
w10/java-cli/src/main/java/com/example/datacollect/strategy/BlogStrategy.java

@ -0,0 +1,27 @@
package com.example.datacollect.strategy;
import com.example.datacollect.model.Article;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.util.ArrayList;
import java.util.List;
public class BlogStrategy extends PriorityStrategy {
private static final int PRIORITY = 100;
private static final String URL_PATTERN = ".*blog\\.example\\.com.*";
public BlogStrategy() {
super(PRIORITY, URL_PATTERN);
}
@Override
public List<Article> parse(String url, Document doc) {
List<Article> articles = new ArrayList<>();
Elements titles = doc.select(".post-title");
for (Element e : titles) {
articles.add(new Article(e.text(), url, ""));
}
return articles;
}
}

10
w10/java-cli/src/main/java/com/example/datacollect/strategy/CrawlStrategy.java

@ -0,0 +1,10 @@
package com.example.datacollect.strategy;
import com.example.datacollect.model.Article;
import org.jsoup.nodes.Document;
import java.util.List;
public interface CrawlStrategy {
List<Article> parse(String url, Document doc);
boolean supports(String url);
}

37
w10/java-cli/src/main/java/com/example/datacollect/strategy/DefaultStrategy.java

@ -0,0 +1,37 @@
package com.example.datacollect.strategy;
import com.example.datacollect.model.Article;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.util.ArrayList;
import java.util.List;
public class DefaultStrategy implements CrawlStrategy {
@Override
public boolean supports(String url) {
return true;
}
@Override
public List<Article> parse(String url, Document doc) {
List<Article> articles = new ArrayList<>();
Elements links = doc.select("a[href]");
for (Element link : links) {
String title = link.text().trim();
String href = link.attr("abs:href");
if (!title.isEmpty() && title.length() > 5) {
articles.add(new Article(title, href.isEmpty() ? url : href, ""));
}
if (articles.size() >= 20) {
break;
}
}
return articles;
}
}

51
w10/java-cli/src/main/java/com/example/datacollect/strategy/HnuNewsStrategy.java

@ -0,0 +1,51 @@
package com.example.datacollect.strategy;
import com.example.datacollect.model.Article;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.util.ArrayList;
import java.util.List;
public class HnuNewsStrategy extends PriorityStrategy {
private static final int PRIORITY = 200;
private static final String URL_PATTERN = ".*news\\.hnu\\.edu\\.cn.*";
public HnuNewsStrategy() {
super(PRIORITY, URL_PATTERN);
}
@Override
public List<Article> parse(String url, Document doc) {
List<Article> articles = new ArrayList<>();
Elements listItems = doc.select("ul.list11 li");
for (Element li : listItems) {
Element link = li.selectFirst("a");
if (link == null) continue;
String articleUrl = link.attr("href");
if (!articleUrl.startsWith("http")) {
articleUrl = "https://news.hnu.edu.cn" + articleUrl.replace("..", "");
}
String title = "";
Element titleEl = link.selectFirst("h4.l2.h4s2");
if (titleEl != null) {
title = titleEl.text().trim();
}
String content = "";
Element contentEl = link.selectFirst("p.l3.ps3");
if (contentEl != null) {
content = contentEl.text().trim();
}
if (!title.isEmpty()) {
articles.add(new Article(title, articleUrl, content));
}
}
return articles;
}
}

27
w10/java-cli/src/main/java/com/example/datacollect/strategy/NewsStrategy.java

@ -0,0 +1,27 @@
package com.example.datacollect.strategy;
import com.example.datacollect.model.Article;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.util.ArrayList;
import java.util.List;
public class NewsStrategy extends PriorityStrategy {
private static final int PRIORITY = 100;
private static final String URL_PATTERN = ".*news\\.example\\.com.*";
public NewsStrategy() {
super(PRIORITY, URL_PATTERN);
}
@Override
public List<Article> parse(String url, Document doc) {
List<Article> articles = new ArrayList<>();
Elements items = doc.select(".article-headline");
for (Element e : items) {
articles.add(new Article(e.text(), url, ""));
}
return articles;
}
}

27
w10/java-cli/src/main/java/com/example/datacollect/strategy/PriorityStrategy.java

@ -0,0 +1,27 @@
package com.example.datacollect.strategy;
import java.util.regex.Pattern;
public abstract class PriorityStrategy implements CrawlStrategy, Comparable<PriorityStrategy> {
private final int priority;
private final Pattern urlPattern;
public PriorityStrategy(int priority, String regexPattern) {
this.priority = priority;
this.urlPattern = Pattern.compile(regexPattern);
}
@Override
public boolean supports(String url) {
return urlPattern.matcher(url).matches();
}
@Override
public int compareTo(PriorityStrategy other) {
return Integer.compare(other.priority, this.priority);
}
public int getPriority() {
return priority;
}
}

42
w10/java-cli/src/main/java/com/example/datacollect/view/ConsoleView.java

@ -0,0 +1,42 @@
package com.example.datacollect.view;
import com.example.datacollect.model.Article;
import java.util.List;
import java.util.Scanner;
public class ConsoleView {
private static final String ANSI_RESET = "\u001B[0m";
private static final String ANSI_GREEN = "\u001B[32m";
private static final String ANSI_RED = "\u001B[31m";
private static final String ANSI_BLUE = "\u001B[34m";
private final Scanner scanner = new Scanner(System.in);
public String readLine() {
System.out.print("> ");
return scanner.nextLine();
}
public void printSuccess(String msg) {
System.out.println(ANSI_GREEN + msg + ANSI_RESET);
}
public void printError(String msg) {
System.out.println(ANSI_RED + msg + ANSI_RESET);
}
public void printInfo(String msg) {
System.out.println(ANSI_BLUE + msg + ANSI_RESET);
}
public void display(List<Article> articles) {
if (articles.isEmpty()) {
printInfo("暂无文章,请先执行 crawl。");
return;
}
for (int i = 0; i < articles.size(); i++) {
Article a = articles.get(i);
System.out.println((i + 1) + ". " + a.getTitle() + " | " + a.getUrl());
}
}
}

BIN
w10/思考题.png

Binary file not shown.

After

Width:  |  Height:  |  Size: 386 KiB

BIN
w10/进阶探究.png

Binary file not shown.

After

Width:  |  Height:  |  Size: 902 KiB

Loading…
Cancel
Save