You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

107 lines
2.8 KiB

package com.example.datacollect.strategy;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Pattern;
public class StrategyFactory {
private final List<CrawlStrategy> strategies = new ArrayList<>();
private CrawlStrategy defaultStrategy;
public StrategyFactory() {
strategies.add(new HnuNewsStrategy());
strategies.add(new BlogStrategy());
strategies.add(new NewsStrategy());
strategies.add(new GenericNewsStrategy());
defaultStrategy = new DefaultStrategy();
}
public CrawlStrategy getStrategy(String url) {
CrawlStrategy matched = null;
int highestPriority = Integer.MIN_VALUE;
for (CrawlStrategy s : strategies) {
boolean supports = false;
Pattern pattern = s.getPattern();
if (pattern != null) {
supports = pattern.matcher(url).find();
} else {
supports = s.supports(url);
}
if (supports) {
int priority = s.getPriority();
if (priority > highestPriority) {
highestPriority = priority;
matched = s;
}
}
}
if (matched != null) {
return matched;
}
return defaultStrategy;
}
public void register(CrawlStrategy strategy) {
strategies.add(strategy);
}
public void register(CrawlStrategy strategy, int priority) {
strategies.add(new PrioritizedStrategy(strategy, priority));
}
public void setDefaultStrategy(CrawlStrategy defaultStrategy) {
this.defaultStrategy = defaultStrategy;
}
private static class PrioritizedStrategy implements CrawlStrategy {
private final CrawlStrategy delegate;
private final int priority;
public PrioritizedStrategy(CrawlStrategy delegate, int priority) {
this.delegate = delegate;
this.priority = priority;
}
@Override
public List<Article> parse(String url, Document doc) {
return delegate.parse(url, doc);
}
@Override
public boolean supports(String url) {
return delegate.supports(url);
}
@Override
public int getPriority() {
return priority;
}
@Override
public Pattern getPattern() {
return delegate.getPattern();
}
}
private static class DefaultStrategy implements CrawlStrategy {
@Override
public List<Article> parse(String url, Document doc) {
return List.of();
}
@Override
public boolean supports(String url) {
return false;
}
@Override
public int getPriority() {
return Integer.MIN_VALUE;
}
}
}