Browse Source

better exception handling

master
283375 1 month ago
parent
commit
289be227ab
Failed to extract signature
  1. 14
      src/main/java/internal/hw/crawler/Main.java
  2. 6
      src/main/java/internal/hw/crawler/MainController.java
  3. 2
      src/main/java/internal/hw/crawler/commands/Command.java
  4. 28
      src/main/java/internal/hw/crawler/commands/CrawlCommand.java
  5. 9
      src/main/java/internal/hw/crawler/commands/ExitCommand.java
  6. 12
      src/main/java/internal/hw/crawler/commands/SaveCommand.java
  7. 27
      src/main/java/internal/hw/crawler/strategies/crawl/IthomeCrawlStrategy.java
  8. 7
      src/main/java/internal/hw/crawler/views/ConsoleView.java

14
src/main/java/internal/hw/crawler/Main.java

@ -10,7 +10,7 @@ public class Main {
MainController controller = new MainController(view);
ArticleRepository repository = new ArticleRepository();
controller.registerCommand(new ExitCommand());
controller.registerCommand(new ExitCommand(view));
controller.registerCommand(new CrawlCommand(repository, view));
controller.registerCommand(new ListCommand(repository, view));
controller.registerCommand(new SaveCommand(repository, view));
@ -18,7 +18,17 @@ public class Main {
view.printSuccess("Welcome to crawler. Type `help` for a list of available commands.");
while (true) {
controller.handleInput(view.readLine());
try {
String line = view.readLine();
// stdin 读取异常时退出
if (line == null) {
controller.handleInput(new ExitCommand(view).getName());
break;
}
controller.handleInput(line);
} catch (Exception e) {
view.printError("Unexpected error: " + e.getMessage());
}
}
}
}

6
src/main/java/internal/hw/crawler/MainController.java

@ -43,7 +43,11 @@ public class MainController {
return;
}
command.execute(args);
try {
command.execute(args);
} catch (Exception e) {
view.printError("Command failed: " + e.getMessage());
}
}
private boolean validateArgs(Command command, String[] args) {

2
src/main/java/internal/hw/crawler/commands/Command.java

@ -9,5 +9,5 @@ public interface Command {
return List.of();
}
void execute(String[] args);
void execute(String[] args) throws Exception;
}

28
src/main/java/internal/hw/crawler/commands/CrawlCommand.java

@ -32,24 +32,22 @@ public class CrawlCommand implements Command {
}
@Override
public void execute(String[] args) {
try {
String urlRaw = args[1];
URL url = new URL(urlRaw);
CrawlStrategy strategy = crawlStrategyFactory.getStrategy(url);
if (strategy == null) {
out.error("Unsupported URL: " + urlRaw);
return;
}
public void execute(String[] args) throws Exception {
String urlRaw = args[1];
URL url = new URL(urlRaw);
CrawlStrategy strategy = crawlStrategyFactory.getStrategy(url);
if (strategy == null) {
out.error("Unsupported URL: " + urlRaw);
return;
}
Document doc = Jsoup.parse(url, 5000);
List<Article> articles = strategy.parse(url, doc);
for (Article article : articles) {
Document doc = Jsoup.parse(url, 5000);
List<Article> articles = strategy.parse(url, doc);
for (Article article : articles) {
if (article != null) {
repository.add(article);
}
out.success(String.format("Crawled %d articles from %s", articles.size(), urlRaw));
} catch (Exception e) {
throw new RuntimeException(e);
}
out.success(String.format("Crawled %d articles from %s", articles.size(), urlRaw));
}
}

9
src/main/java/internal/hw/crawler/commands/ExitCommand.java

@ -1,6 +1,14 @@
package internal.hw.crawler.commands;
import internal.hw.crawler.views.CommandOutput;
public class ExitCommand implements Command {
private final CommandOutput out;
public ExitCommand(CommandOutput out) {
this.out = out;
}
@Override
public String getName() {
return "exit";
@ -8,6 +16,7 @@ public class ExitCommand implements Command {
@Override
public void execute(String[] args) {
out.info("Goodbye.");
System.exit(0);
}
}

12
src/main/java/internal/hw/crawler/commands/SaveCommand.java

@ -5,6 +5,7 @@ import internal.hw.crawler.models.Article;
import internal.hw.crawler.repositories.ArticleRepository;
import internal.hw.crawler.views.CommandOutput;
import com.google.gson.JsonSyntaxException;
import java.io.*;
import java.util.*;
import java.util.stream.Collectors;
@ -25,7 +26,7 @@ public class SaveCommand implements Command {
}
@Override
public void execute(String[] args) {
public void execute(String[] args) throws IOException {
String filename = "articles.output.json";
List<Article> articles = getExistingArticles(filename);
@ -38,11 +39,9 @@ public class SaveCommand implements Command {
Article[] articlesToSave = articleMap.values().toArray(new Article[0]);
try (BufferedWriter writer = new BufferedWriter(new FileWriter(filename));) {
try (BufferedWriter writer = new BufferedWriter(new FileWriter(filename))) {
writer.write(gson.toJson(articlesToSave));
out.success(String.format("Wrote %d articles to %s", articlesToSave.length, filename));
} catch (IOException e) {
throw new RuntimeException(e);
}
}
@ -54,7 +53,10 @@ public class SaveCommand implements Command {
try (BufferedReader reader = new BufferedReader(new FileReader(filename))) {
Article[] articles = gson.fromJson(reader, Article[].class);
return Arrays.asList(articles);
} catch (IOException e) {
} catch (FileNotFoundException e) {
return List.of();
} catch (IOException | JsonSyntaxException e) {
out.error("Failed to read existing articles: " + e.getMessage());
return List.of();
}
}

27
src/main/java/internal/hw/crawler/strategies/crawl/IthomeCrawlStrategy.java

@ -2,6 +2,7 @@ package internal.hw.crawler.strategies.crawl;
import internal.hw.crawler.models.Article;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import java.net.URL;
import java.util.HashSet;
@ -25,17 +26,29 @@ public class IthomeCrawlStrategy implements CrawlStrategy {
private Article parseSingle(URL url, Document doc) throws CrawlException {
Matcher matcher = idRegex.matcher(url.getPath());
if (!matcher.find()) throw new CrawlException(String.format("Cannot determine id for %s", url));
if (!matcher.find()) {
throw new CrawlException(String.format("Cannot determine id for %s", url));
}
String id = String.format("%s-%s-%s", matcher.group(1), matcher.group(2), matcher.group(3));
String title = doc.selectFirst("h1").text();
String content = doc.selectFirst("#paragraph").text();
String authorRaw = doc.selectFirst("#author_baidu > strong").text();
String editorRaw = doc.selectFirst("#editor_baidu > strong").text();
Element h1 = doc.selectFirst("h1");
if (h1 == null) {
throw new CrawlException("Missing <h1> element in page: " + url);
}
String title = h1.text();
Element paragraph = doc.selectFirst("#paragraph");
if (paragraph == null) {
throw new CrawlException("Missing #paragraph element in page: " + url);
}
String content = paragraph.text();
Element authorEl = doc.selectFirst("#author_baidu > strong");
Element editorEl = doc.selectFirst("#editor_baidu > strong");
Set<String> authors = new HashSet<>();
authors.add(authorRaw);
authors.add(editorRaw);
if (authorEl != null) authors.add(authorEl.text());
if (editorEl != null) authors.add(editorEl.text());
Article article = new Article();
article.setId(id);

7
src/main/java/internal/hw/crawler/views/ConsoleView.java

@ -1,5 +1,6 @@
package internal.hw.crawler.views;
import java.util.NoSuchElementException;
import java.util.Scanner;
public class ConsoleView implements CommandOutput {
@ -12,7 +13,11 @@ public class ConsoleView implements CommandOutput {
public String readLine() {
System.out.print("> ");
return scanner.nextLine();
try {
return scanner.nextLine();
} catch (NoSuchElementException | IllegalStateException e) {
return null;
}
}
@Override

Loading…
Cancel
Save