You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
50 lines
1.6 KiB
50 lines
1.6 KiB
package com.example.datacollect.strategy;
|
|
|
|
import java.util.ArrayList;
|
|
import java.util.Comparator;
|
|
import java.util.List;
|
|
|
|
public class StrategyFactory {
|
|
private final List<CrawlStrategy> strategies = new ArrayList<>();
|
|
private final CrawlStrategy defaultStrategy;
|
|
|
|
public StrategyFactory() {
|
|
strategies.add(new HnuNewsStrategy());
|
|
strategies.add(new BlogStrategy());
|
|
strategies.add(new NewsStrategy());
|
|
strategies.add(new RegexStrategy(".*\\.edu\\.cn$", 80));
|
|
strategies.add(new RegexStrategy(".*\\.com$", 60));
|
|
strategies.sort(Comparator.comparingInt(CrawlStrategy::getPriority).reversed());
|
|
this.defaultStrategy = new DefaultStrategy();
|
|
}
|
|
|
|
public CrawlStrategy getStrategy(String url) {
|
|
List<CrawlStrategy> matched = new ArrayList<>();
|
|
for (CrawlStrategy s : strategies) {
|
|
if (s.supports(url)) {
|
|
matched.add(s);
|
|
}
|
|
}
|
|
if (matched.isEmpty()) {
|
|
return defaultStrategy;
|
|
}
|
|
if (matched.size() > 1) {
|
|
System.out.println("WARNING: Multiple strategies matched for URL: " + url
|
|
+ ", using highest priority: " + matched.get(0).getClass().getSimpleName());
|
|
}
|
|
return matched.get(0);
|
|
}
|
|
|
|
public void register(CrawlStrategy strategy) {
|
|
strategies.add(strategy);
|
|
strategies.sort(Comparator.comparingInt(CrawlStrategy::getPriority).reversed());
|
|
}
|
|
|
|
public List<CrawlStrategy> getAllStrategies() {
|
|
return new ArrayList<>(strategies);
|
|
}
|
|
|
|
public CrawlStrategy getDefaultStrategy() {
|
|
return defaultStrategy;
|
|
}
|
|
}
|
|
|