You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

50 lines
1.6 KiB

package com.example.datacollect.strategy;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;
public class StrategyFactory {
private final List<CrawlStrategy> strategies = new ArrayList<>();
private final CrawlStrategy defaultStrategy;
public StrategyFactory() {
strategies.add(new HnuNewsStrategy());
strategies.add(new BlogStrategy());
strategies.add(new NewsStrategy());
strategies.add(new RegexStrategy(".*\\.edu\\.cn$", 80));
strategies.add(new RegexStrategy(".*\\.com$", 60));
strategies.sort(Comparator.comparingInt(CrawlStrategy::getPriority).reversed());
this.defaultStrategy = new DefaultStrategy();
}
public CrawlStrategy getStrategy(String url) {
List<CrawlStrategy> matched = new ArrayList<>();
for (CrawlStrategy s : strategies) {
if (s.supports(url)) {
matched.add(s);
}
}
if (matched.isEmpty()) {
return defaultStrategy;
}
if (matched.size() > 1) {
System.out.println("WARNING: Multiple strategies matched for URL: " + url
+ ", using highest priority: " + matched.get(0).getClass().getSimpleName());
}
return matched.get(0);
}
public void register(CrawlStrategy strategy) {
strategies.add(strategy);
strategies.sort(Comparator.comparingInt(CrawlStrategy::getPriority).reversed());
}
public List<CrawlStrategy> getAllStrategies() {
return new ArrayList<>(strategies);
}
public CrawlStrategy getDefaultStrategy() {
return defaultStrategy;
}
}