You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
107 lines
2.8 KiB
107 lines
2.8 KiB
package com.example.datacollect.strategy;
|
|
|
|
import java.util.ArrayList;
|
|
import java.util.List;
|
|
import java.util.regex.Pattern;
|
|
|
|
public class StrategyFactory {
|
|
private final List<CrawlStrategy> strategies = new ArrayList<>();
|
|
private CrawlStrategy defaultStrategy;
|
|
|
|
public StrategyFactory() {
|
|
strategies.add(new HnuNewsStrategy());
|
|
strategies.add(new BlogStrategy());
|
|
strategies.add(new NewsStrategy());
|
|
strategies.add(new GenericNewsStrategy());
|
|
defaultStrategy = new DefaultStrategy();
|
|
}
|
|
|
|
public CrawlStrategy getStrategy(String url) {
|
|
CrawlStrategy matched = null;
|
|
int highestPriority = Integer.MIN_VALUE;
|
|
|
|
for (CrawlStrategy s : strategies) {
|
|
boolean supports = false;
|
|
|
|
Pattern pattern = s.getPattern();
|
|
if (pattern != null) {
|
|
supports = pattern.matcher(url).find();
|
|
} else {
|
|
supports = s.supports(url);
|
|
}
|
|
|
|
if (supports) {
|
|
int priority = s.getPriority();
|
|
if (priority > highestPriority) {
|
|
highestPriority = priority;
|
|
matched = s;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (matched != null) {
|
|
return matched;
|
|
}
|
|
|
|
return defaultStrategy;
|
|
}
|
|
|
|
public void register(CrawlStrategy strategy) {
|
|
strategies.add(strategy);
|
|
}
|
|
|
|
public void register(CrawlStrategy strategy, int priority) {
|
|
strategies.add(new PrioritizedStrategy(strategy, priority));
|
|
}
|
|
|
|
public void setDefaultStrategy(CrawlStrategy defaultStrategy) {
|
|
this.defaultStrategy = defaultStrategy;
|
|
}
|
|
|
|
private static class PrioritizedStrategy implements CrawlStrategy {
|
|
private final CrawlStrategy delegate;
|
|
private final int priority;
|
|
|
|
public PrioritizedStrategy(CrawlStrategy delegate, int priority) {
|
|
this.delegate = delegate;
|
|
this.priority = priority;
|
|
}
|
|
|
|
@Override
|
|
public List<Article> parse(String url, Document doc) {
|
|
return delegate.parse(url, doc);
|
|
}
|
|
|
|
@Override
|
|
public boolean supports(String url) {
|
|
return delegate.supports(url);
|
|
}
|
|
|
|
@Override
|
|
public int getPriority() {
|
|
return priority;
|
|
}
|
|
|
|
@Override
|
|
public Pattern getPattern() {
|
|
return delegate.getPattern();
|
|
}
|
|
}
|
|
|
|
private static class DefaultStrategy implements CrawlStrategy {
|
|
@Override
|
|
public List<Article> parse(String url, Document doc) {
|
|
return List.of();
|
|
}
|
|
|
|
@Override
|
|
public boolean supports(String url) {
|
|
return false;
|
|
}
|
|
|
|
@Override
|
|
public int getPriority() {
|
|
return Integer.MIN_VALUE;
|
|
}
|
|
}
|
|
}
|