You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
28 lines
720 B
28 lines
720 B
package crawler;
|
|
|
|
import org.jsoup.Connection;
|
|
import org.jsoup.Jsoup;
|
|
import org.jsoup.nodes.Document;
|
|
import java.io.IOException;
|
|
|
|
public abstract class BaseCrawler {
|
|
protected static final String USER_AGENT =
|
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36";
|
|
|
|
protected String url;
|
|
protected int timeout = 10;
|
|
|
|
public BaseCrawler(String url) {
|
|
this.url = url;
|
|
}
|
|
|
|
protected Document getDocument() throws IOException {
|
|
Connection connect = Jsoup.connect(url)
|
|
.userAgent(USER_AGENT)
|
|
.timeout(timeout * 1000)
|
|
.ignoreContentType(true);
|
|
return connect.get();
|
|
}
|
|
|
|
public abstract void crawl();
|
|
}
|