Browse Source

improve exception handling

master
283375 4 weeks ago
parent
commit
5abdee0f76
Failed to extract signature
  1. 5
      src/main/java/internal/hw/crawler/Main.java
  2. 10
      src/main/java/internal/hw/crawler/MainController.java
  3. 48
      src/main/java/internal/hw/crawler/commands/CrawlCommand.java
  4. 11
      src/main/java/internal/hw/crawler/strategies/crawl/CrawlException.java
  5. 34
      src/main/java/internal/hw/crawler/strategies/crawl/CrawlNetworkException.java
  6. 43
      src/main/java/internal/hw/crawler/strategies/crawl/CrawlParseException.java
  7. 2
      src/main/java/internal/hw/crawler/strategies/crawl/CrawlStrategy.java
  8. 2
      src/main/java/internal/hw/crawler/strategies/crawl/CrawlStrategyFactory.java
  9. 9
      src/main/java/internal/hw/crawler/strategies/crawl/CrawlUnsupportedException.java
  10. 17
      src/main/java/internal/hw/crawler/strategies/crawl/CrawlUtils.java
  11. 31
      src/main/java/internal/hw/crawler/strategies/crawl/CrawlerException.java
  12. 21
      src/main/java/internal/hw/crawler/strategies/crawl/HttpCrawler.java
  13. 12
      src/main/java/internal/hw/crawler/strategies/crawl/IthomeCrawlStrategy.java
  14. 12
      src/main/java/internal/hw/crawler/strategies/crawl/NeteaseNewsCrawlStrategy.java
  15. 12
      src/main/java/internal/hw/crawler/strategies/crawl/PeopleCnCrawlStrategy.java

5
src/main/java/internal/hw/crawler/Main.java

@ -3,8 +3,12 @@ package internal.hw.crawler;
import internal.hw.crawler.commands.*; import internal.hw.crawler.commands.*;
import internal.hw.crawler.repositories.ArticleRepository; import internal.hw.crawler.repositories.ArticleRepository;
import internal.hw.crawler.views.ConsoleView; import internal.hw.crawler.views.ConsoleView;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class Main { public class Main {
private static final Logger log = LoggerFactory.getLogger(Main.class);
public static void main(String[] args) { public static void main(String[] args) {
ConsoleView view = new ConsoleView(); ConsoleView view = new ConsoleView();
MainController controller = new MainController(view); MainController controller = new MainController(view);
@ -27,6 +31,7 @@ public class Main {
} }
controller.handleInput(line); controller.handleInput(line);
} catch (Exception e) { } catch (Exception e) {
log.error("Unhandled exception in REPL loop", e);
view.printError("Unexpected error: " + e.getMessage()); view.printError("Unexpected error: " + e.getMessage());
} }
} }

10
src/main/java/internal/hw/crawler/MainController.java

@ -2,7 +2,10 @@ package internal.hw.crawler;
import internal.hw.crawler.commands.Command; import internal.hw.crawler.commands.Command;
import internal.hw.crawler.commands.CommandArg; import internal.hw.crawler.commands.CommandArg;
import internal.hw.crawler.strategies.crawl.CrawlerException;
import internal.hw.crawler.views.ConsoleView; import internal.hw.crawler.views.ConsoleView;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.Collections; import java.util.Collections;
import java.util.HashMap; import java.util.HashMap;
@ -10,6 +13,7 @@ import java.util.List;
import java.util.Map; import java.util.Map;
public class MainController { public class MainController {
private static final Logger log = LoggerFactory.getLogger(MainController.class);
private final Map<String, Command> commands = new HashMap<>(); private final Map<String, Command> commands = new HashMap<>();
private final ConsoleView view; private final ConsoleView view;
@ -45,8 +49,12 @@ public class MainController {
try { try {
command.execute(args); command.execute(args);
} catch (CrawlerException e) {
log.warn("Crawler error in command '{}'", cmdName, e);
view.printError(e.getMessage());
} catch (Exception e) { } catch (Exception e) {
view.printError("Command failed: " + e.getMessage()); log.error("Unexpected error in command '{}'", cmdName, e);
view.printError("Internal error: " + e.getMessage());
} }
} }

48
src/main/java/internal/hw/crawler/commands/CrawlCommand.java

@ -5,15 +5,22 @@ import internal.hw.crawler.repositories.ArticleRepository;
import internal.hw.crawler.strategies.crawl.CrawlStrategy; import internal.hw.crawler.strategies.crawl.CrawlStrategy;
import internal.hw.crawler.strategies.crawl.CrawlStrategyFactory; import internal.hw.crawler.strategies.crawl.CrawlStrategyFactory;
import internal.hw.crawler.strategies.crawl.CrawlUtils; import internal.hw.crawler.strategies.crawl.CrawlUtils;
import internal.hw.crawler.strategies.crawl.CrawlNetworkException;
import internal.hw.crawler.strategies.crawl.CrawlParseException;
import internal.hw.crawler.strategies.crawl.CrawlUnsupportedException;
import internal.hw.crawler.views.CommandOutput; import internal.hw.crawler.views.CommandOutput;
import org.jsoup.Jsoup; import org.jsoup.Jsoup;
import org.jsoup.nodes.Document; import org.jsoup.nodes.Document;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.net.URL; import java.net.URL;
import java.util.List; import java.util.List;
import java.util.Objects; import java.util.Objects;
public class CrawlCommand implements Command { public class CrawlCommand implements Command {
private static final Logger log = LoggerFactory.getLogger(CrawlCommand.class);
private final ArticleRepository repository; private final ArticleRepository repository;
private final CommandOutput out; private final CommandOutput out;
private final CrawlStrategyFactory crawlStrategyFactory = new CrawlStrategyFactory(); private final CrawlStrategyFactory crawlStrategyFactory = new CrawlStrategyFactory();
@ -34,24 +41,33 @@ public class CrawlCommand implements Command {
} }
@Override @Override
public void execute(String[] args) throws Exception { public void execute(String[] args) {
String urlRaw = args[1]; String urlRaw = args[1];
URL url = new URL(urlRaw);
CrawlStrategy strategy = crawlStrategyFactory.getStrategy(url);
if (strategy == null) {
out.error("Unsupported URL: " + urlRaw);
return;
}
Document doc = Jsoup.connect(url.toString()).timeout(5000).get();
CrawlUtils.setProgressCallback(msg -> System.out.print("\r" + msg));
try { try {
List<Article> articles = strategy.parse(url, doc); URL url = new URL(urlRaw);
System.out.println(); CrawlStrategy strategy = crawlStrategyFactory.getStrategy(url);
articles.stream().filter(Objects::nonNull).forEach(repository::add); Document doc = Jsoup.connect(url.toString()).timeout(5000).get();
out.success(String.format("Crawled %d articles from %s", articles.size(), urlRaw)); CrawlUtils.setProgressCallback(msg -> System.out.print("\r" + msg));
} finally { try {
CrawlUtils.clearProgressCallback(); List<Article> articles = strategy.parse(url, doc);
System.out.println();
articles.stream().filter(Objects::nonNull).forEach(repository::add);
out.success(String.format("Crawled %d articles from %s", articles.size(), urlRaw));
} finally {
CrawlUtils.clearProgressCallback();
}
} catch (CrawlUnsupportedException e) {
out.error(e.getMessage());
log.warn("Unsupported URL: {}", urlRaw);
} catch (CrawlNetworkException e) {
out.error("Network error: " + e.getMessage());
log.error("Crawl network failure for {}", urlRaw, e);
} catch (CrawlParseException e) {
out.error("Parse error: " + e.getMessage());
log.error("Crawl parse failure for {}", urlRaw, e);
} catch (IOException e) {
out.error("I/O error: " + e.getMessage());
log.error("Crawl I/O failure for {}", urlRaw, e);
} }
} }
} }

11
src/main/java/internal/hw/crawler/strategies/crawl/CrawlException.java

@ -1,11 +0,0 @@
package internal.hw.crawler.strategies.crawl;
public class CrawlException extends Exception {
public CrawlException(String message) {
super(message);
}
public CrawlException(String message, Throwable cause) {
super(message, cause);
}
}

34
src/main/java/internal/hw/crawler/strategies/crawl/CrawlNetworkException.java

@ -0,0 +1,34 @@
package internal.hw.crawler.strategies.crawl;
import java.net.URL;
public class CrawlNetworkException extends CrawlerException {
private final int statusCode;
private final int attempts;
public CrawlNetworkException(String message, URL url) {
this(message, null, url, -1, 0);
}
public CrawlNetworkException(String message, Throwable cause, URL url) {
this(message, cause, url, -1, 0);
}
public CrawlNetworkException(String message, Throwable cause, URL url, int attempts) {
this(message, cause, url, -1, attempts);
}
public CrawlNetworkException(String message, Throwable cause, URL url, int statusCode, int attempts) {
super(message, url, cause);
this.statusCode = statusCode;
this.attempts = attempts;
}
public int getStatusCode() {
return statusCode;
}
public int getAttempts() {
return attempts;
}
}

43
src/main/java/internal/hw/crawler/strategies/crawl/CrawlParseException.java

@ -0,0 +1,43 @@
package internal.hw.crawler.strategies.crawl;
import java.net.URL;
public class CrawlParseException extends CrawlerException {
public CrawlParseException(String message) {
super(message);
}
public CrawlParseException(String message, URL url) {
super(message, url);
}
public CrawlParseException(String message, URL url, Throwable cause) {
super(message, url, cause);
}
public static class ElementNotFoundException extends CrawlParseException {
private final String selector;
public ElementNotFoundException(String selector, URL url) {
super("Missing element '" + selector + "' in page: " + url, url);
this.selector = selector;
}
public String getSelector() {
return selector;
}
}
public static class IdExtractionException extends CrawlParseException {
private final String pattern;
public IdExtractionException(URL url, String pattern) {
super("Cannot determine id for " + url + " (pattern: " + pattern + ")", url);
this.pattern = pattern;
}
public String getPattern() {
return pattern;
}
}
}

2
src/main/java/internal/hw/crawler/strategies/crawl/CrawlStrategy.java

@ -7,7 +7,7 @@ import java.net.URL;
import java.util.List; import java.util.List;
public interface CrawlStrategy { public interface CrawlStrategy {
List<Article> parse(URL url, Document doc) throws CrawlException; List<Article> parse(URL url, Document doc) throws CrawlerException;
boolean supports(URL url); boolean supports(URL url);
} }

2
src/main/java/internal/hw/crawler/strategies/crawl/CrawlStrategyFactory.java

@ -19,7 +19,7 @@ public class CrawlStrategyFactory {
return s; return s;
} }
} }
return null; throw new CrawlUnsupportedException(url);
} }
public void register(CrawlStrategy strategy) { public void register(CrawlStrategy strategy) {

9
src/main/java/internal/hw/crawler/strategies/crawl/CrawlUnsupportedException.java

@ -0,0 +1,9 @@
package internal.hw.crawler.strategies.crawl;
import java.net.URL;
public class CrawlUnsupportedException extends CrawlerException {
public CrawlUnsupportedException(URL url) {
super("Unsupported site: " + url, url);
}
}

17
src/main/java/internal/hw/crawler/strategies/crawl/CrawlUtils.java

@ -6,6 +6,7 @@ import org.jsoup.nodes.Element;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import java.net.MalformedURLException;
import java.net.URI; import java.net.URI;
import java.net.URL; import java.net.URL;
import java.util.ArrayList; import java.util.ArrayList;
@ -34,7 +35,7 @@ public class CrawlUtils {
} }
public static List<Article> parseHomepage(Document doc, Pattern idRegex, public static List<Article> parseHomepage(Document doc, Pattern idRegex,
BiFunction<URL, Document, Article> singleParser) { BiFunction<URL, Document, Article> singleParser) {
HttpCrawler crawler = new HttpCrawler(); HttpCrawler crawler = new HttpCrawler();
Consumer<String> callback = progressCallback.get(); Consumer<String> callback = progressCallback.get();
@ -48,6 +49,7 @@ public class CrawlUtils {
int total = hrefs.size(); int total = hrefs.size();
AtomicInteger done = new AtomicInteger(0); AtomicInteger done = new AtomicInteger(0);
AtomicInteger errors = new AtomicInteger(0);
ExecutorService executor = Executors.newFixedThreadPool(THREAD_POOL_SIZE); ExecutorService executor = Executors.newFixedThreadPool(THREAD_POOL_SIZE);
List<CompletableFuture<Article>> futures = new ArrayList<>(total); List<CompletableFuture<Article>> futures = new ArrayList<>(total);
@ -57,8 +59,13 @@ public class CrawlUtils {
URL articleUrl = URI.create(href).toURL(); URL articleUrl = URI.create(href).toURL();
Document articleDoc = crawler.fetch(articleUrl); Document articleDoc = crawler.fetch(articleUrl);
return singleParser.apply(articleUrl, articleDoc); return singleParser.apply(articleUrl, articleDoc);
} catch (Exception e) { } catch (CrawlerException e) {
log.warn("Failed to fetch article: {}", href, e); int failed = errors.incrementAndGet();
log.warn("Failed [{}/{}]: {} — {}", failed, total, href, e.getMessage());
return null;
} catch (MalformedURLException e) {
int failed = errors.incrementAndGet();
log.warn("Failed [{}/{}]: {} — malformed URL", failed, total, href);
return null; return null;
} finally { } finally {
int completed = done.incrementAndGet(); int completed = done.incrementAndGet();
@ -72,6 +79,10 @@ public class CrawlUtils {
executor.shutdown(); executor.shutdown();
CompletableFuture.allOf(futures.toArray(new CompletableFuture[0])).join(); CompletableFuture.allOf(futures.toArray(new CompletableFuture[0])).join();
if (errors.get() > 0) {
log.warn("Crawl completed: {}/{} articles failed", errors.get(), total);
}
return futures.stream() return futures.stream()
.map(CompletableFuture::join) .map(CompletableFuture::join)
.filter(Objects::nonNull) .filter(Objects::nonNull)

31
src/main/java/internal/hw/crawler/strategies/crawl/CrawlerException.java

@ -0,0 +1,31 @@
package internal.hw.crawler.strategies.crawl;
import java.net.URL;
public abstract class CrawlerException extends RuntimeException {
private final URL url;
public CrawlerException(String message) {
super(message);
this.url = null;
}
public CrawlerException(String message, Throwable cause) {
super(message, cause);
this.url = null;
}
public CrawlerException(String message, URL url) {
super(message);
this.url = url;
}
public CrawlerException(String message, URL url, Throwable cause) {
super(message, cause);
this.url = url;
}
public URL getUrl() {
return url;
}
}

21
src/main/java/internal/hw/crawler/strategies/crawl/HttpCrawler.java

@ -5,6 +5,7 @@ import org.jsoup.nodes.Document;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.net.URL; import java.net.URL;
import java.util.concurrent.ThreadLocalRandom; import java.util.concurrent.ThreadLocalRandom;
@ -29,26 +30,36 @@ public class HttpCrawler {
this.userAgent = userAgent; this.userAgent = userAgent;
} }
public Document fetch(URL url) throws Exception { public Document fetch(URL url) throws CrawlNetworkException {
IOException lastError;
for (int attempt = 0; attempt <= maxRetries; attempt++) { for (int attempt = 0; attempt <= maxRetries; attempt++) {
try { try {
return Jsoup.connect(url.toString()) return Jsoup.connect(url.toString())
.timeout(timeoutMillis) .timeout(timeoutMillis)
.userAgent(userAgent) .userAgent(userAgent)
.get(); .get();
} catch (Exception e) { } catch (IOException e) {
lastError = e;
if (attempt < maxRetries) { if (attempt < maxRetries) {
long delay = computeBackoff(attempt); long delay = computeBackoff(attempt);
log.warn("Failed to fetch {}, attempt {}/{}: {}. Retrying in {}ms", log.warn("Failed to fetch {}, attempt {}/{}: {}. Retrying in {}ms",
url, attempt + 1, maxRetries, e.getMessage(), delay); url, attempt + 1, maxRetries, e.getMessage(), delay);
Thread.sleep(delay); try {
Thread.sleep(delay);
} catch (InterruptedException ie) {
Thread.currentThread().interrupt();
throw new CrawlNetworkException(
"Interrupted while fetching " + url, ie, url, maxRetries + 1);
}
} else { } else {
log.error("Failed to fetch {} after {} attempts", url, maxRetries + 1); log.error("Failed to fetch {} after {} attempts", url, maxRetries + 1);
throw e; throw new CrawlNetworkException(
"Failed to fetch " + url + " after " + (maxRetries + 1) + " attempts",
lastError, url, maxRetries + 1);
} }
} }
} }
throw new RuntimeException("Unreachable"); throw new CrawlNetworkException("Unreachable", url);
} }
public void rateLimit() throws InterruptedException { public void rateLimit() throws InterruptedException {

12
src/main/java/internal/hw/crawler/strategies/crawl/IthomeCrawlStrategy.java

@ -24,12 +24,12 @@ public class IthomeCrawlStrategy implements CrawlStrategy {
} }
@Override @Override
public List<Article> parse(URL url, Document doc) throws CrawlException { public List<Article> parse(URL url, Document doc) throws CrawlParseException {
if (isHomepage(url)) { if (isHomepage(url)) {
return CrawlUtils.parseHomepage(doc, idRegex, (articleUrl, articleDoc) -> { return CrawlUtils.parseHomepage(doc, idRegex, (articleUrl, articleDoc) -> {
try { try {
return parseSingle(articleUrl, articleDoc); return parseSingle(articleUrl, articleDoc);
} catch (CrawlException e) { } catch (CrawlParseException e) {
log.warn("Failed to parse article: {}", articleUrl, e); log.warn("Failed to parse article: {}", articleUrl, e);
return null; return null;
} }
@ -44,23 +44,23 @@ public class IthomeCrawlStrategy implements CrawlStrategy {
return (path == null || path.isEmpty() || path.equals("/")); return (path == null || path.isEmpty() || path.equals("/"));
} }
private Article parseSingle(URL url, Document doc) throws CrawlException { private Article parseSingle(URL url, Document doc) throws CrawlParseException {
Matcher matcher = idRegex.matcher(url.getPath()); Matcher matcher = idRegex.matcher(url.getPath());
if (!matcher.find()) { if (!matcher.find()) {
throw new CrawlException(String.format("Cannot determine id for %s", url)); throw new CrawlParseException.IdExtractionException(url, idRegex.pattern());
} }
String id = String.format("%s-%s-%s", matcher.group(1), matcher.group(2), matcher.group(3)); String id = String.format("%s-%s-%s", matcher.group(1), matcher.group(2), matcher.group(3));
Element h1 = doc.selectFirst("h1"); Element h1 = doc.selectFirst("h1");
if (h1 == null) { if (h1 == null) {
throw new CrawlException("Missing <h1> element in page: " + url); throw new CrawlParseException.ElementNotFoundException("h1", url);
} }
String title = h1.text(); String title = h1.text();
Element paragraph = doc.selectFirst("#paragraph"); Element paragraph = doc.selectFirst("#paragraph");
if (paragraph == null) { if (paragraph == null) {
throw new CrawlException("Missing #paragraph element in page: " + url); throw new CrawlParseException.ElementNotFoundException("#paragraph", url);
} }
String content = paragraph.text(); String content = paragraph.text();

12
src/main/java/internal/hw/crawler/strategies/crawl/NeteaseNewsCrawlStrategy.java

@ -25,12 +25,12 @@ public class NeteaseNewsCrawlStrategy implements CrawlStrategy {
} }
@Override @Override
public List<Article> parse(URL url, Document doc) throws CrawlException { public List<Article> parse(URL url, Document doc) throws CrawlParseException {
if (isHomepage(url)) { if (isHomepage(url)) {
return CrawlUtils.parseHomepage(doc, idRegex, (articleUrl, articleDoc) -> { return CrawlUtils.parseHomepage(doc, idRegex, (articleUrl, articleDoc) -> {
try { try {
return parseSingle(articleUrl, articleDoc); return parseSingle(articleUrl, articleDoc);
} catch (CrawlException e) { } catch (CrawlParseException e) {
log.warn("Failed to parse article: {}", articleUrl, e); log.warn("Failed to parse article: {}", articleUrl, e);
return null; return null;
} }
@ -45,20 +45,20 @@ public class NeteaseNewsCrawlStrategy implements CrawlStrategy {
return path == null || path.isEmpty() || path.equals("/"); return path == null || path.isEmpty() || path.equals("/");
} }
private Article parseSingle(URL url, Document doc) throws CrawlException { private Article parseSingle(URL url, Document doc) throws CrawlParseException {
Matcher matcher = idRegex.matcher(url.getPath()); Matcher matcher = idRegex.matcher(url.getPath());
if (!matcher.find()) { if (!matcher.find()) {
throw new CrawlException(String.format("Cannot determine id for %s", url)); throw new CrawlParseException.IdExtractionException(url, idRegex.pattern());
} }
String id = matcher.group(1); String id = matcher.group(1);
Element titleEl = doc.selectFirst("h1.post_title"); Element titleEl = doc.selectFirst("h1.post_title");
if (titleEl == null) throw new CrawlException(String.format("Cannot find title for %s", url)); if (titleEl == null) throw new CrawlParseException.ElementNotFoundException("h1.post_title", url);
String title = titleEl.text(); String title = titleEl.text();
Element contentEl = doc.selectFirst("div.post_body"); Element contentEl = doc.selectFirst("div.post_body");
if (contentEl == null) throw new CrawlException(String.format("Cannot find content for %s", url)); if (contentEl == null) throw new CrawlParseException.ElementNotFoundException("div.post_body", url);
String content = contentEl.text(); String content = contentEl.text();
Article article = new Article(); Article article = new Article();

12
src/main/java/internal/hw/crawler/strategies/crawl/PeopleCnCrawlStrategy.java

@ -31,12 +31,12 @@ public class PeopleCnCrawlStrategy implements CrawlStrategy {
} }
@Override @Override
public List<Article> parse(URL url, Document doc) throws CrawlException { public List<Article> parse(URL url, Document doc) throws CrawlParseException {
if (isHomepage(url)) { if (isHomepage(url)) {
return CrawlUtils.parseHomepage(doc, idRegex, (articleUrl, articleDoc) -> { return CrawlUtils.parseHomepage(doc, idRegex, (articleUrl, articleDoc) -> {
try { try {
return parseSingle(articleUrl, articleDoc); return parseSingle(articleUrl, articleDoc);
} catch (CrawlException e) { } catch (CrawlParseException e) {
log.warn("Failed to parse article: {}", articleUrl, e); log.warn("Failed to parse article: {}", articleUrl, e);
return null; return null;
} }
@ -55,15 +55,15 @@ public class PeopleCnCrawlStrategy implements CrawlStrategy {
return path == null || path.isEmpty() || path.equals("/"); return path == null || path.isEmpty() || path.equals("/");
} }
private Article parseSingle(URL url, Document doc) throws CrawlException { private Article parseSingle(URL url, Document doc) throws CrawlParseException {
Matcher matcher = idRegex.matcher(url.getPath()); Matcher matcher = idRegex.matcher(url.getPath());
if (!matcher.find()) { if (!matcher.find()) {
throw new CrawlException(String.format("Cannot determine id for %s", url)); throw new CrawlParseException.IdExtractionException(url, idRegex.pattern());
} }
String id = String.format("%s%s-c%s-%s", matcher.group(1), matcher.group(2), matcher.group(3), matcher.group(4)); String id = String.format("%s%s-c%s-%s", matcher.group(1), matcher.group(2), matcher.group(3), matcher.group(4));
Element titleEl = doc.selectFirst(".layout.rm_txt h1"); Element titleEl = doc.selectFirst(".layout.rm_txt h1");
if (titleEl == null) throw new CrawlException(String.format("Cannot find title for %s", url)); if (titleEl == null) throw new CrawlParseException.ElementNotFoundException(".layout.rm_txt h1", url);
String title = titleEl.text(); String title = titleEl.text();
Set<String> authors = new HashSet<>(); Set<String> authors = new HashSet<>();
@ -73,7 +73,7 @@ public class PeopleCnCrawlStrategy implements CrawlStrategy {
} }
Element contentEl = doc.selectFirst("div#rm_txt_zw"); Element contentEl = doc.selectFirst("div#rm_txt_zw");
if (contentEl == null) throw new CrawlException(String.format("Cannot find content for %s", url)); if (contentEl == null) throw new CrawlParseException.ElementNotFoundException("div#rm_txt_zw", url);
String content = contentEl.text(); String content = contentEl.text();
Article article = new Article(); Article article = new Article();

Loading…
Cancel
Save