You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

103 lines
3.2 KiB

package com.crawler.crawler;
import com.crawler.crawler.impl.*;
import com.crawler.exception.InvalidUrlException;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.regex.Pattern;
public class CrawlerFactory {
private static CrawlerFactory instance;
private Map<String, Pattern> crawlerPatterns;
private CrawlerFactory() {
crawlerPatterns = new LinkedHashMap<>();
initPatterns();
}
public static CrawlerFactory getInstance() {
if (instance == null) {
instance = new CrawlerFactory();
}
return instance;
}
private void initPatterns() {
crawlerPatterns.put("MountBladeCrawler",
Pattern.compile(".*mountblade\\.com\\.cn.*", Pattern.CASE_INSENSITIVE));
crawlerPatterns.put("HunanUniversityNewsCrawler",
Pattern.compile(".*news\\.hnu\\.edu\\.cn.*", Pattern.CASE_INSENSITIVE));
crawlerPatterns.put("HunanUniversityCrawler",
Pattern.compile(".*hnu\\.edu\\.cn.*", Pattern.CASE_INSENSITIVE));
crawlerPatterns.put("ChinaWeatherCrawler",
Pattern.compile(".*weather\\.com\\.cn.*", Pattern.CASE_INSENSITIVE));
crawlerPatterns.put("ExampleCrawler",
Pattern.compile(".*", Pattern.CASE_INSENSITIVE));
}
public Crawler createCrawler(String url) {
validateUrl(url);
for (Map.Entry<String, Pattern> entry : crawlerPatterns.entrySet()) {
if (entry.getValue().matcher(url).matches()) {
return createCrawlerByName(entry.getKey());
}
}
return new ExampleCrawler();
}
private void validateUrl(String url) {
if (url == null || url.isEmpty()) {
throw new InvalidUrlException("URL不能为空", url);
}
if (!url.startsWith("http://") && !url.startsWith("https://")) {
throw new InvalidUrlException("URL格式无效,必须以http://或https://开头", url);
}
}
private Crawler createCrawlerByName(String crawlerName) {
switch (crawlerName) {
case "MountBladeCrawler":
return new MountBladeCrawler();
case "HunanUniversityNewsCrawler":
return new HunanUniversityNewsCrawler();
case "HunanUniversityCrawler":
return new HunanUniversityCrawler();
case "ChinaWeatherCrawler":
return new ChinaWeatherCrawler();
case "ExampleCrawler":
default:
return new ExampleCrawler();
}
}
public String getCrawlerName(String url) {
if (url == null || url.isEmpty()) {
return "ExampleCrawler";
}
for (Map.Entry<String, Pattern> entry : crawlerPatterns.entrySet()) {
if (entry.getValue().matcher(url).matches()) {
return entry.getKey();
}
}
return "ExampleCrawler";
}
public boolean isUrlSupported(String url) {
if (url == null || url.isEmpty()) {
return false;
}
for (Pattern pattern : crawlerPatterns.values()) {
if (pattern.matcher(url).matches()) {
return true;
}
}
return true;
}
}