Browse Source

爬虫

main
zhuyanshuo 1 month ago
parent
commit
6cc1337ec3
  1. 23
      w10/src/main/java/com/example/datacollect/Main.java
  2. 6
      w10/src/main/java/com/example/datacollect/command/Command.java
  3. 40
      w10/src/main/java/com/example/datacollect/command/CrawlCommand.java
  4. 22
      w10/src/main/java/com/example/datacollect/command/ExitCommand.java
  5. 21
      w10/src/main/java/com/example/datacollect/command/HelpCommand.java
  6. 24
      w10/src/main/java/com/example/datacollect/command/ListCommand.java
  7. 44
      w10/src/main/java/com/example/datacollect/controller/CrawlerController.java
  8. 45
      w10/src/main/java/com/example/datacollect/model/Article.java
  9. 42
      w10/src/main/java/com/example/datacollect/view/ConsoleView.java

23
w10/src/main/java/com/example/datacollect/Main.java

@ -0,0 +1,23 @@
package com.example.datacollect;
import com.example.datacollect.controller.CrawlerController;
import com.example.datacollect.repository.ArticleRepository;
import com.example.datacollect.service.CrawlerService;
import com.example.datacollect.strategy.StrategyFactory;
import com.example.datacollect.view.ConsoleView;
public class Main {
public static void main(String[] args) {
ConsoleView view = new ConsoleView();
ArticleRepository repository = new ArticleRepository();
StrategyFactory strategyFactory = new StrategyFactory();
CrawlerService service = new CrawlerService(repository, strategyFactory);
CrawlerController controller = new CrawlerController(view, service);
view.printSuccess("Welcome to CLI Crawler (w10_3)! Type help for commands.");
while (true) {
controller.handle(view.readLine());
}
}
}

6
w10/src/main/java/com/example/datacollect/command/Command.java

@ -0,0 +1,6 @@
package com.example.datacollect.command;
public interface Command {
String getName();
void execute(String[] args);
}

40
w10/src/main/java/com/example/datacollect/command/CrawlCommand.java

@ -0,0 +1,40 @@
package com.example.datacollect.command;
import com.example.datacollect.model.Article;
import com.example.datacollect.service.CrawlerService;
import com.example.datacollect.view.ConsoleView;
import java.util.List;
public class CrawlCommand implements Command {
private final ConsoleView view;
private final CrawlerService service;
public CrawlCommand(ConsoleView view, CrawlerService service) {
this.view = view;
this.service = service;
}
@Override
public String getName() {
return "crawl";
}
@Override
public void execute(String[] args) {
if (args.length < 2) {
view.printError("Usage: crawl <url>");
return;
}
String url = args[1];
try {
view.printInfo("Crawling: " + url);
List<Article> articles = service.crawl(url);
view.printSuccess("Crawled " + articles.size() + " articles.");
} catch (IllegalArgumentException e) {
view.printError(e.getMessage());
} catch (RuntimeException e) {
view.printError(e.getMessage());
}
}
}

22
w10/src/main/java/com/example/datacollect/command/ExitCommand.java

@ -0,0 +1,22 @@
package com.example.datacollect.command;
import com.example.datacollect.view.ConsoleView;
public class ExitCommand implements Command {
private final ConsoleView view;
public ExitCommand(ConsoleView view) {
this.view = view;
}
@Override
public String getName() {
return "exit";
}
@Override
public void execute(String[] args) {
view.printSuccess("Bye!");
System.exit(0);
}
}

21
w10/src/main/java/com/example/datacollect/command/HelpCommand.java

@ -0,0 +1,21 @@
package com.example.datacollect.command;
import com.example.datacollect.view.ConsoleView;
public class HelpCommand implements Command {
private final ConsoleView view;
public HelpCommand(ConsoleView view) {
this.view = view;
}
@Override
public String getName() {
return "help";
}
@Override
public void execute(String[] args) {
view.printInfo("Commands: crawl <url>, list, help, exit");
}
}

24
w10/src/main/java/com/example/datacollect/command/ListCommand.java

@ -0,0 +1,24 @@
package com.example.datacollect.command;
import com.example.datacollect.service.CrawlerService;
import com.example.datacollect.view.ConsoleView;
public class ListCommand implements Command {
private final ConsoleView view;
private final CrawlerService service;
public ListCommand(ConsoleView view, CrawlerService service) {
this.view = view;
this.service = service;
}
@Override
public String getName() {
return "list";
}
@Override
public void execute(String[] args) {
view.display(service.getAllArticles());
}
}

44
w10/src/main/java/com/example/datacollect/controller/CrawlerController.java

@ -0,0 +1,44 @@
package com.example.datacollect.controller;
import com.example.datacollect.command.Command;
import com.example.datacollect.command.CrawlCommand;
import com.example.datacollect.command.ExitCommand;
import com.example.datacollect.command.HelpCommand;
import com.example.datacollect.command.ListCommand;
import com.example.datacollect.service.CrawlerService;
import com.example.datacollect.view.ConsoleView;
import java.util.HashMap;
import java.util.Map;
public class CrawlerController {
private final Map<String, Command> commands = new HashMap<>();
private final ConsoleView view;
public CrawlerController(ConsoleView view, CrawlerService service) {
this.view = view;
register(new HelpCommand(view));
register(new ListCommand(view, service));
register(new CrawlCommand(view, service));
register(new ExitCommand(view));
}
private void register(Command command) {
commands.put(command.getName(), command);
}
public void handle(String input) {
String text = input == null ? "" : input.trim();
if (text.isEmpty()) {
return;
}
String[] args = text.split("\\s+");
String cmdName = args[0].toLowerCase();
Command command = commands.get(cmdName);
if (command == null) {
view.printError("Unknown command: " + cmdName);
return;
}
command.execute(args);
}
}

45
w10/src/main/java/com/example/datacollect/model/Article.java

@ -0,0 +1,45 @@
package com.example.datacollect.model;
public class Article {
private String title;
private String url;
private String content;
public Article(String title, String url, String content) {
this.title = title;
this.url = url;
this.content = content;
}
public String getTitle() {
return title;
}
public void setTitle(String title) {
this.title = title;
}
public String getUrl() {
return url;
}
public void setUrl(String url) {
this.url = url;
}
public String getContent() {
return content;
}
public void setContent(String content) {
this.content = content;
}
@Override
public String toString() {
return "Article{"
+ "title='" + title + '\''
+ ", url='" + url + '\''
+ '}';
}
}

42
w10/src/main/java/com/example/datacollect/view/ConsoleView.java

@ -0,0 +1,42 @@
package com.example.datacollect.view;
import com.example.datacollect.model.Article;
import java.util.List;
import java.util.Scanner;
public class ConsoleView {
private static final String ANSI_RESET = "\u001B[0m";
private static final String ANSI_GREEN = "\u001B[32m";
private static final String ANSI_RED = "\u001B[31m";
private static final String ANSI_BLUE = "\u001B[34m";
private final Scanner scanner = new Scanner(System.in);
public String readLine() {
System.out.print("> ");
return scanner.nextLine();
}
public void printSuccess(String msg) {
System.out.println(ANSI_GREEN + msg + ANSI_RESET);
}
public void printError(String msg) {
System.out.println(ANSI_RED + msg + ANSI_RESET);
}
public void printInfo(String msg) {
System.out.println(ANSI_BLUE + msg + ANSI_RESET);
}
public void display(List<Article> articles) {
if (articles.isEmpty()) {
printInfo("暂无文章,请先执行 crawl。");
return;
}
for (int i = 0; i < articles.size(); i++) {
Article a = articles.get(i);
System.out.println((i + 1) + ". " + a.getTitle() + " | " + a.getUrl());
}
}
}
Loading…
Cancel
Save