package crawler; import org.jsoup.Connection; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import java.io.IOException; public abstract class BaseCrawler { protected static final String USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"; protected String url; protected int timeout = 10; public BaseCrawler(String url) { this.url = url; } protected Document getDocument() throws IOException { Connection connect = Jsoup.connect(url) .userAgent(USER_AGENT) .timeout(timeout * 1000) .ignoreContentType(true); return connect.get(); } public abstract void crawl(); }