You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

28 lines
720 B

package crawler;
import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import java.io.IOException;
public abstract class BaseCrawler {
protected static final String USER_AGENT =
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36";
protected String url;
protected int timeout = 10;
public BaseCrawler(String url) {
this.url = url;
}
protected Document getDocument() throws IOException {
Connection connect = Jsoup.connect(url)
.userAgent(USER_AGENT)
.timeout(timeout * 1000)
.ignoreContentType(true);
return connect.get();
}
public abstract void crawl();
}