31 changed files with 1168 additions and 0 deletions
@ -0,0 +1,4 @@ |
|||||
|
*.jar |
||||
|
*.jar |
||||
|
*.class |
||||
|
*.log |
||||
@ -0,0 +1,3 @@ |
|||||
|
{ |
||||
|
"java.configuration.updateBuildConfiguration": "interactive" |
||||
|
} |
||||
@ -0,0 +1,67 @@ |
|||||
|
<project xmlns="http://maven.apache.org/POM/4.0.0" |
||||
|
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" |
||||
|
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd"> |
||||
|
<modelVersion>4.0.0</modelVersion> |
||||
|
<groupId>com.example</groupId> |
||||
|
<artifactId>datacollect-cli</artifactId> |
||||
|
<version>0.1.0</version> |
||||
|
<properties> |
||||
|
<maven.compiler.source>11</maven.compiler.source> |
||||
|
<maven.compiler.target>11</maven.compiler.target> |
||||
|
</properties> |
||||
|
<dependencies> |
||||
|
<dependency> |
||||
|
<groupId>org.jsoup</groupId> |
||||
|
<artifactId>jsoup</artifactId> |
||||
|
<version>1.17.2</version> |
||||
|
</dependency> |
||||
|
<dependency> |
||||
|
<groupId>ch.qos.logback</groupId> |
||||
|
<artifactId>logback-classic</artifactId> |
||||
|
<version>1.4.14</version> |
||||
|
</dependency> |
||||
|
<dependency> |
||||
|
<groupId>com.fasterxml.jackson.core</groupId> |
||||
|
<artifactId>jackson-databind</artifactId> |
||||
|
<version>2.15.3</version> |
||||
|
</dependency> |
||||
|
<dependency> |
||||
|
<groupId>com.fasterxml.jackson.datatype</groupId> |
||||
|
<artifactId>jackson-datatype-jsr310</artifactId> |
||||
|
<version>2.15.3</version> |
||||
|
</dependency> |
||||
|
</dependencies> |
||||
|
<build> |
||||
|
<plugins> |
||||
|
<plugin> |
||||
|
<groupId>org.apache.maven.plugins</groupId> |
||||
|
<artifactId>maven-compiler-plugin</artifactId> |
||||
|
<version>3.8.1</version> |
||||
|
</plugin> |
||||
|
<plugin> |
||||
|
<groupId>org.apache.maven.plugins</groupId> |
||||
|
<artifactId>maven-assembly-plugin</artifactId> |
||||
|
<version>3.3.0</version> |
||||
|
<configuration> |
||||
|
<archive> |
||||
|
<manifest> |
||||
|
<mainClass>com.example.datacollect.Main</mainClass> |
||||
|
</manifest> |
||||
|
</archive> |
||||
|
<descriptorRefs> |
||||
|
<descriptorRef>jar-with-dependencies</descriptorRef> |
||||
|
</descriptorRefs> |
||||
|
</configuration> |
||||
|
<executions> |
||||
|
<execution> |
||||
|
<id>make-assembly</id> |
||||
|
<phase>package</phase> |
||||
|
<goals> |
||||
|
<goal>single</goal> |
||||
|
</goals> |
||||
|
</execution> |
||||
|
</executions> |
||||
|
</plugin> |
||||
|
</plugins> |
||||
|
</build> |
||||
|
</project> |
||||
@ -0,0 +1,25 @@ |
|||||
|
package com.example.datacollect; |
||||
|
|
||||
|
import com.example.datacollect.controller.CrawlerController; |
||||
|
import com.example.datacollect.repository.ArticleRepository; |
||||
|
import com.example.datacollect.strategy.StrategyFactory; |
||||
|
import com.example.datacollect.view.ConsoleView; |
||||
|
import org.slf4j.Logger; |
||||
|
import org.slf4j.LoggerFactory; |
||||
|
|
||||
|
public class Main { |
||||
|
private static final Logger logger = LoggerFactory.getLogger(Main.class); |
||||
|
|
||||
|
public static void main(String[] args) { |
||||
|
logger.info("Starting CLI Crawler application"); |
||||
|
ConsoleView view = new ConsoleView(); |
||||
|
ArticleRepository repository = new ArticleRepository(); |
||||
|
StrategyFactory strategyFactory = new StrategyFactory(); |
||||
|
CrawlerController controller = new CrawlerController(view, repository, strategyFactory); |
||||
|
|
||||
|
view.printSuccess("Welcome to CLI Crawler (w10_3)! Type help for commands."); |
||||
|
while (true) { |
||||
|
controller.handle(view.readLine()); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,75 @@ |
|||||
|
package com.example.datacollect.command; |
||||
|
|
||||
|
import com.example.datacollect.repository.ArticleRepository; |
||||
|
import com.example.datacollect.strategy.CrawlStrategy; |
||||
|
import com.example.datacollect.strategy.StrategyFactory; |
||||
|
import com.example.datacollect.view.ConsoleView; |
||||
|
import org.jsoup.Jsoup; |
||||
|
import org.jsoup.nodes.Document; |
||||
|
import org.slf4j.Logger; |
||||
|
import org.slf4j.LoggerFactory; |
||||
|
|
||||
|
public class AnalyzeCommand implements Command { |
||||
|
private static final Logger logger = LoggerFactory.getLogger(AnalyzeCommand.class); |
||||
|
|
||||
|
private final ConsoleView view; |
||||
|
private final StrategyFactory strategyFactory; |
||||
|
|
||||
|
public AnalyzeCommand(ConsoleView view, StrategyFactory strategyFactory) { |
||||
|
this.view = view; |
||||
|
this.strategyFactory = strategyFactory; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String getName() { |
||||
|
return "analyze"; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public void execute(String[] args, ArticleRepository repository) { |
||||
|
if (args.length < 2) { |
||||
|
logger.warn("Analyze command called without URL argument"); |
||||
|
view.printError("Usage: analyze <url>"); |
||||
|
return; |
||||
|
} |
||||
|
String url = args[1]; |
||||
|
logger.info("Analyzing URL: {}", url); |
||||
|
|
||||
|
CrawlStrategy strategy = strategyFactory.getStrategy(url); |
||||
|
logger.debug("Using strategy: {}", strategy.getClass().getSimpleName()); |
||||
|
|
||||
|
try { |
||||
|
view.printInfo("Analyzing: " + url); |
||||
|
Document doc = Jsoup.connect(url).get(); |
||||
|
var articles = strategy.parse(url, doc); |
||||
|
|
||||
|
int count = articles.size(); |
||||
|
int totalTitleLength = 0; |
||||
|
int totalContentLength = 0; |
||||
|
|
||||
|
for (var article : articles) { |
||||
|
if (article.getTitle() != null) { |
||||
|
totalTitleLength += article.getTitle().length(); |
||||
|
} |
||||
|
if (article.getContent() != null) { |
||||
|
totalContentLength += article.getContent().length(); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
double avgTitleLength = count > 0 ? (double) totalTitleLength / count : 0; |
||||
|
double avgContentLength = count > 0 ? (double) totalContentLength / count : 0; |
||||
|
|
||||
|
logger.info("Analysis complete - Articles: {}, Avg Title Length: {:.2f}, Avg Content Length: {:.2f}", |
||||
|
count, avgTitleLength, avgContentLength); |
||||
|
|
||||
|
view.printSuccess("Analysis Results:"); |
||||
|
view.printInfo(" Total Articles: " + count); |
||||
|
view.printInfo(" Average Title Length: " + String.format("%.2f", avgTitleLength)); |
||||
|
view.printInfo(" Average Content Length: " + String.format("%.2f", avgContentLength)); |
||||
|
view.printInfo(" Strategy Used: " + strategy.getClass().getSimpleName()); |
||||
|
} catch (Exception e) { |
||||
|
logger.error("Failed to analyze URL {}: {}", url, e.getMessage(), e); |
||||
|
view.printError("Failed to analyze: " + e.getMessage()); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,8 @@ |
|||||
|
package com.example.datacollect.command; |
||||
|
|
||||
|
import com.example.datacollect.repository.ArticleRepository; |
||||
|
|
||||
|
public interface Command { |
||||
|
String getName(); |
||||
|
void execute(String[] args, ArticleRepository repository); |
||||
|
} |
||||
@ -0,0 +1,88 @@ |
|||||
|
package com.example.datacollect.command; |
||||
|
|
||||
|
import com.example.datacollect.exception.ParseException; |
||||
|
import com.example.datacollect.repository.ArticleRepository; |
||||
|
import com.example.datacollect.strategy.CrawlStrategy; |
||||
|
import com.example.datacollect.strategy.StrategyFactory; |
||||
|
import com.example.datacollect.view.ConsoleView; |
||||
|
import org.jsoup.Jsoup; |
||||
|
import org.jsoup.nodes.Document; |
||||
|
import org.slf4j.Logger; |
||||
|
import org.slf4j.LoggerFactory; |
||||
|
import java.io.IOException; |
||||
|
|
||||
|
public class CrawlCommand implements Command { |
||||
|
private static final Logger logger = LoggerFactory.getLogger(CrawlCommand.class); |
||||
|
private static final int MAX_RETRIES = 3; |
||||
|
private static final long RETRY_DELAY_MS = 1000; |
||||
|
|
||||
|
private final ConsoleView view; |
||||
|
private final StrategyFactory strategyFactory; |
||||
|
|
||||
|
public CrawlCommand(ConsoleView view, StrategyFactory strategyFactory) { |
||||
|
this.view = view; |
||||
|
this.strategyFactory = strategyFactory; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String getName() { |
||||
|
return "crawl"; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public void execute(String[] args, ArticleRepository repository) { |
||||
|
if (args.length < 2) { |
||||
|
logger.warn("Crawl command called without URL argument"); |
||||
|
view.printError("Usage: crawl <url>"); |
||||
|
return; |
||||
|
} |
||||
|
String url = args[1]; |
||||
|
logger.info("Starting crawl for URL: {}", url); |
||||
|
|
||||
|
CrawlStrategy strategy = strategyFactory.getStrategy(url); |
||||
|
logger.debug("Using strategy: {}", strategy.getClass().getSimpleName()); |
||||
|
|
||||
|
int retryCount = 0; |
||||
|
boolean success = false; |
||||
|
|
||||
|
while (retryCount < MAX_RETRIES && !success) { |
||||
|
try { |
||||
|
view.printInfo("Crawling: " + url + (retryCount > 0 ? " (attempt " + (retryCount + 1) + ")" : "")); |
||||
|
logger.debug("Attempt {} to fetch URL: {}", retryCount + 1, url); |
||||
|
Document doc = Jsoup.connect(url).get(); |
||||
|
var articles = strategy.parse(url, doc); |
||||
|
repository.addAll(articles); |
||||
|
logger.info("Successfully crawled {} articles from {}", articles.size(), url); |
||||
|
view.printSuccess("Crawled " + articles.size() + " articles."); |
||||
|
success = true; |
||||
|
} catch (IOException e) { |
||||
|
retryCount++; |
||||
|
logger.error("Network error on attempt {} for URL {}: {}", retryCount, url, e.getMessage()); |
||||
|
if (retryCount < MAX_RETRIES) { |
||||
|
view.printWarning("Network error: " + e.getMessage() + ", retrying..."); |
||||
|
sleep(RETRY_DELAY_MS); |
||||
|
} else { |
||||
|
logger.error("Failed to crawl URL {} after {} attempts", url, MAX_RETRIES); |
||||
|
view.printError("Failed to crawl after " + MAX_RETRIES + " attempts: " + e.getMessage()); |
||||
|
} |
||||
|
} catch (ParseException e) { |
||||
|
logger.error("Parse error for URL {}: {}", url, e.getMessage()); |
||||
|
view.printError("Parse error: " + e.getMessage()); |
||||
|
break; |
||||
|
} catch (Exception e) { |
||||
|
logger.error("Unexpected error for URL {}: {}", url, e.getMessage(), e); |
||||
|
view.printError("Unexpected error: " + e.getMessage()); |
||||
|
break; |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
private void sleep(long millis) { |
||||
|
try { |
||||
|
Thread.sleep(millis); |
||||
|
} catch (InterruptedException e) { |
||||
|
Thread.currentThread().interrupt(); |
||||
|
logger.warn("Sleep interrupted"); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,28 @@ |
|||||
|
package com.example.datacollect.command; |
||||
|
|
||||
|
import com.example.datacollect.repository.ArticleRepository; |
||||
|
import com.example.datacollect.view.ConsoleView; |
||||
|
import org.slf4j.Logger; |
||||
|
import org.slf4j.LoggerFactory; |
||||
|
|
||||
|
public class ExitCommand implements Command { |
||||
|
private static final Logger logger = LoggerFactory.getLogger(ExitCommand.class); |
||||
|
|
||||
|
private final ConsoleView view; |
||||
|
|
||||
|
public ExitCommand(ConsoleView view) { |
||||
|
this.view = view; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String getName() { |
||||
|
return "exit"; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public void execute(String[] args, ArticleRepository repository) { |
||||
|
logger.info("Exiting application"); |
||||
|
view.printSuccess("Bye!"); |
||||
|
System.exit(0); |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,27 @@ |
|||||
|
package com.example.datacollect.command; |
||||
|
|
||||
|
import com.example.datacollect.repository.ArticleRepository; |
||||
|
import com.example.datacollect.view.ConsoleView; |
||||
|
import org.slf4j.Logger; |
||||
|
import org.slf4j.LoggerFactory; |
||||
|
|
||||
|
public class HelpCommand implements Command { |
||||
|
private static final Logger logger = LoggerFactory.getLogger(HelpCommand.class); |
||||
|
|
||||
|
private final ConsoleView view; |
||||
|
|
||||
|
public HelpCommand(ConsoleView view) { |
||||
|
this.view = view; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String getName() { |
||||
|
return "help"; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public void execute(String[] args, ArticleRepository repository) { |
||||
|
logger.debug("Displaying help information"); |
||||
|
view.printInfo("Commands: crawl <url>, analyze <url>, list, export [--format json], import <filename>, help, exit"); |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,84 @@ |
|||||
|
package com.example.datacollect.command; |
||||
|
|
||||
|
import com.example.datacollect.model.Article; |
||||
|
import com.example.datacollect.repository.ArticleRepository; |
||||
|
import com.example.datacollect.view.ConsoleView; |
||||
|
import com.fasterxml.jackson.databind.ObjectMapper; |
||||
|
import com.fasterxml.jackson.databind.SerializationFeature; |
||||
|
import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule; |
||||
|
import org.slf4j.Logger; |
||||
|
import org.slf4j.LoggerFactory; |
||||
|
|
||||
|
import java.io.FileWriter; |
||||
|
import java.io.IOException; |
||||
|
import java.nio.file.Path; |
||||
|
import java.nio.file.Paths; |
||||
|
import java.util.HashMap; |
||||
|
import java.util.List; |
||||
|
import java.util.Map; |
||||
|
|
||||
|
public class JsonExporterCommand implements Command { |
||||
|
private static final Logger logger = LoggerFactory.getLogger(JsonExporterCommand.class); |
||||
|
private static final String DEFAULT_FILENAME = "articles.json"; |
||||
|
|
||||
|
private final ConsoleView view; |
||||
|
|
||||
|
public JsonExporterCommand(ConsoleView view) { |
||||
|
this.view = view; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String getName() { |
||||
|
return "export"; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public void execute(String[] args, ArticleRepository repository) { |
||||
|
String filename = DEFAULT_FILENAME; |
||||
|
String format = null; |
||||
|
|
||||
|
for (int i = 1; i < args.length; i++) { |
||||
|
if (args[i].equals("--format") && i + 1 < args.length) { |
||||
|
format = args[i + 1]; |
||||
|
i++; |
||||
|
} else if (!args[i].startsWith("-")) { |
||||
|
filename = args[i]; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
if (format != null && !format.equals("json")) { |
||||
|
logger.warn("Unsupported export format: {}", format); |
||||
|
view.printError("Unsupported format: " + format + ". Only 'json' is supported."); |
||||
|
return; |
||||
|
} |
||||
|
|
||||
|
List<Article> articles = repository.getAll(); |
||||
|
if (articles.isEmpty()) { |
||||
|
logger.warn("Attempted to export empty repository"); |
||||
|
view.printWarning("No articles to export."); |
||||
|
return; |
||||
|
} |
||||
|
|
||||
|
logger.info("Exporting {} articles to JSON file: {}", articles.size(), filename); |
||||
|
|
||||
|
ObjectMapper mapper = new ObjectMapper(); |
||||
|
mapper.registerModule(new JavaTimeModule()); |
||||
|
mapper.enable(SerializationFeature.INDENT_OUTPUT); |
||||
|
mapper.disable(SerializationFeature.WRITE_DATES_AS_TIMESTAMPS); |
||||
|
|
||||
|
Map<String, Object> exportData = new HashMap<>(); |
||||
|
exportData.put("articles", articles); |
||||
|
exportData.put("count", articles.size()); |
||||
|
exportData.put("exportedAt", java.time.LocalDateTime.now().toString()); |
||||
|
|
||||
|
Path path = Paths.get(filename); |
||||
|
try (FileWriter writer = new FileWriter(path.toFile())) { |
||||
|
mapper.writeValue(writer, exportData); |
||||
|
logger.info("Successfully exported articles to {}", path.toAbsolutePath()); |
||||
|
view.printSuccess("Exported " + articles.size() + " articles to " + filename); |
||||
|
} catch (IOException e) { |
||||
|
logger.error("Failed to export articles to {}: {}", filename, e.getMessage()); |
||||
|
view.printError("Failed to export: " + e.getMessage()); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,118 @@ |
|||||
|
package com.example.datacollect.command; |
||||
|
|
||||
|
import com.example.datacollect.model.Article; |
||||
|
import com.example.datacollect.repository.ArticleRepository; |
||||
|
import com.example.datacollect.view.ConsoleView; |
||||
|
import com.fasterxml.jackson.databind.ObjectMapper; |
||||
|
import com.fasterxml.jackson.databind.SerializationFeature; |
||||
|
import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule; |
||||
|
import org.slf4j.Logger; |
||||
|
import org.slf4j.LoggerFactory; |
||||
|
|
||||
|
import java.io.IOException; |
||||
|
import java.nio.file.Files; |
||||
|
import java.nio.file.Path; |
||||
|
import java.nio.file.Paths; |
||||
|
import java.util.HashSet; |
||||
|
import java.util.List; |
||||
|
import java.util.Map; |
||||
|
import java.util.Set; |
||||
|
|
||||
|
public class JsonImporterCommand implements Command { |
||||
|
private static final Logger logger = LoggerFactory.getLogger(JsonImporterCommand.class); |
||||
|
|
||||
|
private final ConsoleView view; |
||||
|
|
||||
|
public JsonImporterCommand(ConsoleView view) { |
||||
|
this.view = view; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String getName() { |
||||
|
return "import"; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public void execute(String[] args, ArticleRepository repository) { |
||||
|
if (args.length < 2) { |
||||
|
logger.warn("Import command called without filename argument"); |
||||
|
view.printError("Usage: import <filename>"); |
||||
|
return; |
||||
|
} |
||||
|
|
||||
|
String filename = args[1]; |
||||
|
Path path = Paths.get(filename); |
||||
|
|
||||
|
if (!Files.exists(path)) { |
||||
|
logger.error("Import file does not exist: {}", filename); |
||||
|
view.printError("File not found: " + filename); |
||||
|
return; |
||||
|
} |
||||
|
|
||||
|
ObjectMapper mapper = new ObjectMapper(); |
||||
|
mapper.registerModule(new JavaTimeModule()); |
||||
|
mapper.disable(SerializationFeature.WRITE_DATES_AS_TIMESTAMPS); |
||||
|
|
||||
|
try { |
||||
|
String content = Files.readString(path); |
||||
|
Map<String, Object> data = mapper.readValue(content, Map.class); |
||||
|
|
||||
|
List<Map<String, Object>> articlesList = (List<Map<String, Object>>) data.get("articles"); |
||||
|
|
||||
|
if (articlesList == null || articlesList.isEmpty()) { |
||||
|
logger.warn("No articles found in import file"); |
||||
|
view.printWarning("No articles found in file."); |
||||
|
return; |
||||
|
} |
||||
|
|
||||
|
Set<String> existingUrls = new HashSet<>(); |
||||
|
for (Article article : repository.getAll()) { |
||||
|
existingUrls.add(article.getUrl()); |
||||
|
} |
||||
|
|
||||
|
int importedCount = 0; |
||||
|
int skippedCount = 0; |
||||
|
|
||||
|
for (Map<String, Object> articleMap : articlesList) { |
||||
|
String title = (String) articleMap.get("title"); |
||||
|
String url = (String) articleMap.get("url"); |
||||
|
String contentStr = (String) articleMap.get("content"); |
||||
|
|
||||
|
if (title == null || url == null) { |
||||
|
logger.warn("Skipping article with missing title or url"); |
||||
|
skippedCount++; |
||||
|
continue; |
||||
|
} |
||||
|
|
||||
|
if (existingUrls.contains(url)) { |
||||
|
logger.debug("Skipping duplicate article with url: {}", url); |
||||
|
skippedCount++; |
||||
|
continue; |
||||
|
} |
||||
|
|
||||
|
Article article; |
||||
|
if (articleMap.containsKey("crawledAt")) { |
||||
|
String crawledAtStr = (String) articleMap.get("crawledAt"); |
||||
|
java.time.LocalDateTime crawledAt = java.time.LocalDateTime.parse(crawledAtStr); |
||||
|
article = new Article(title, url, contentStr, crawledAt); |
||||
|
} else { |
||||
|
article = new Article(title, url, contentStr); |
||||
|
} |
||||
|
|
||||
|
repository.add(article); |
||||
|
existingUrls.add(url); |
||||
|
importedCount++; |
||||
|
} |
||||
|
|
||||
|
logger.info("Imported {} articles, skipped {} duplicates", importedCount, skippedCount); |
||||
|
view.printSuccess("Imported " + importedCount + " articles, skipped " + skippedCount + " duplicates."); |
||||
|
|
||||
|
} catch (IOException e) { |
||||
|
logger.error("Failed to import articles from {}: {}", filename, e.getMessage()); |
||||
|
view.printError("Failed to import: " + e.getMessage()); |
||||
|
} catch (Exception e) { |
||||
|
logger.error("Error parsing import file {}: {}", filename, e.getMessage()); |
||||
|
view.printError("Invalid JSON format: " + e.getMessage()); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,27 @@ |
|||||
|
package com.example.datacollect.command; |
||||
|
|
||||
|
import com.example.datacollect.repository.ArticleRepository; |
||||
|
import com.example.datacollect.view.ConsoleView; |
||||
|
import org.slf4j.Logger; |
||||
|
import org.slf4j.LoggerFactory; |
||||
|
|
||||
|
public class ListCommand implements Command { |
||||
|
private static final Logger logger = LoggerFactory.getLogger(ListCommand.class); |
||||
|
|
||||
|
private final ConsoleView view; |
||||
|
|
||||
|
public ListCommand(ConsoleView view) { |
||||
|
this.view = view; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String getName() { |
||||
|
return "list"; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public void execute(String[] args, ArticleRepository repository) { |
||||
|
logger.debug("Listing {} articles", repository.size()); |
||||
|
view.display(repository.getAll()); |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,61 @@ |
|||||
|
package com.example.datacollect.controller; |
||||
|
|
||||
|
import com.example.datacollect.command.AnalyzeCommand; |
||||
|
import com.example.datacollect.command.Command; |
||||
|
import com.example.datacollect.command.CrawlCommand; |
||||
|
import com.example.datacollect.command.ExitCommand; |
||||
|
import com.example.datacollect.command.HelpCommand; |
||||
|
import com.example.datacollect.command.JsonExporterCommand; |
||||
|
import com.example.datacollect.command.JsonImporterCommand; |
||||
|
import com.example.datacollect.command.ListCommand; |
||||
|
import com.example.datacollect.repository.ArticleRepository; |
||||
|
import com.example.datacollect.strategy.StrategyFactory; |
||||
|
import com.example.datacollect.view.ConsoleView; |
||||
|
import org.slf4j.Logger; |
||||
|
import org.slf4j.LoggerFactory; |
||||
|
import java.util.HashMap; |
||||
|
import java.util.Map; |
||||
|
|
||||
|
public class CrawlerController { |
||||
|
private static final Logger logger = LoggerFactory.getLogger(CrawlerController.class); |
||||
|
|
||||
|
private final Map<String, Command> commands = new HashMap<>(); |
||||
|
private final ConsoleView view; |
||||
|
private final ArticleRepository repository; |
||||
|
|
||||
|
public CrawlerController(ConsoleView view, ArticleRepository repository, StrategyFactory strategyFactory) { |
||||
|
this.view = view; |
||||
|
this.repository = repository; |
||||
|
register(new HelpCommand(view)); |
||||
|
register(new ListCommand(view)); |
||||
|
register(new CrawlCommand(view, strategyFactory)); |
||||
|
register(new AnalyzeCommand(view, strategyFactory)); |
||||
|
register(new ExitCommand(view)); |
||||
|
register(new JsonExporterCommand(view)); |
||||
|
register(new JsonImporterCommand(view)); |
||||
|
logger.info("CrawlerController initialized with {} commands", commands.size()); |
||||
|
} |
||||
|
|
||||
|
private void register(Command command) { |
||||
|
commands.put(command.getName(), command); |
||||
|
logger.debug("Registered command: {}", command.getName()); |
||||
|
} |
||||
|
|
||||
|
public void handle(String input) { |
||||
|
String text = input == null ? "" : input.trim(); |
||||
|
if (text.isEmpty()) { |
||||
|
return; |
||||
|
} |
||||
|
|
||||
|
String[] args = text.split("\\s+"); |
||||
|
String cmdName = args[0].toLowerCase(); |
||||
|
Command command = commands.get(cmdName); |
||||
|
if (command == null) { |
||||
|
logger.warn("Unknown command: {}", cmdName); |
||||
|
view.printError("Unknown command: " + cmdName); |
||||
|
return; |
||||
|
} |
||||
|
logger.info("Executing command: {}", cmdName); |
||||
|
command.execute(args, repository); |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,11 @@ |
|||||
|
package com.example.datacollect.exception; |
||||
|
|
||||
|
public class CrawlerException extends Exception { |
||||
|
public CrawlerException(String message) { |
||||
|
super(message); |
||||
|
} |
||||
|
|
||||
|
public CrawlerException(String message, Throwable cause) { |
||||
|
super(message, cause); |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,11 @@ |
|||||
|
package com.example.datacollect.exception; |
||||
|
|
||||
|
public class NetworkException extends CrawlerException { |
||||
|
public NetworkException(String message) { |
||||
|
super(message); |
||||
|
} |
||||
|
|
||||
|
public NetworkException(String message, Throwable cause) { |
||||
|
super(message, cause); |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,11 @@ |
|||||
|
package com.example.datacollect.exception; |
||||
|
|
||||
|
public class ParseException extends CrawlerException { |
||||
|
public ParseException(String message) { |
||||
|
super(message); |
||||
|
} |
||||
|
|
||||
|
public ParseException(String message, Throwable cause) { |
||||
|
super(message, cause); |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,65 @@ |
|||||
|
package com.example.datacollect.model; |
||||
|
|
||||
|
import java.time.LocalDateTime; |
||||
|
|
||||
|
public class Article { |
||||
|
private String title; |
||||
|
private String url; |
||||
|
private String content; |
||||
|
private LocalDateTime crawledAt; |
||||
|
|
||||
|
public Article(String title, String url, String content) { |
||||
|
this.title = title; |
||||
|
this.url = url; |
||||
|
this.content = content; |
||||
|
this.crawledAt = LocalDateTime.now(); |
||||
|
} |
||||
|
|
||||
|
public Article(String title, String url, String content, LocalDateTime crawledAt) { |
||||
|
this.title = title; |
||||
|
this.url = url; |
||||
|
this.content = content; |
||||
|
this.crawledAt = crawledAt; |
||||
|
} |
||||
|
|
||||
|
public String getTitle() { |
||||
|
return title; |
||||
|
} |
||||
|
|
||||
|
public void setTitle(String title) { |
||||
|
this.title = title; |
||||
|
} |
||||
|
|
||||
|
public String getUrl() { |
||||
|
return url; |
||||
|
} |
||||
|
|
||||
|
public void setUrl(String url) { |
||||
|
this.url = url; |
||||
|
} |
||||
|
|
||||
|
public String getContent() { |
||||
|
return content; |
||||
|
} |
||||
|
|
||||
|
public void setContent(String content) { |
||||
|
this.content = content; |
||||
|
} |
||||
|
|
||||
|
public LocalDateTime getCrawledAt() { |
||||
|
return crawledAt; |
||||
|
} |
||||
|
|
||||
|
public void setCrawledAt(LocalDateTime crawledAt) { |
||||
|
this.crawledAt = crawledAt; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String toString() { |
||||
|
return "Article{" |
||||
|
+ "title='" + title + '\'' |
||||
|
+ ", url='" + url + '\'' |
||||
|
+ ", crawledAt=" + crawledAt |
||||
|
+ '}'; |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,76 @@ |
|||||
|
package com.example.datacollect.repository; |
||||
|
|
||||
|
import com.example.datacollect.model.Article; |
||||
|
import org.slf4j.Logger; |
||||
|
import org.slf4j.LoggerFactory; |
||||
|
import java.util.ArrayList; |
||||
|
import java.util.Collections; |
||||
|
import java.util.List; |
||||
|
|
||||
|
public class ArticleRepository { |
||||
|
private static final Logger logger = LoggerFactory.getLogger(ArticleRepository.class); |
||||
|
|
||||
|
private final List<Article> articles = new ArrayList<>(); |
||||
|
|
||||
|
public void add(Article article) { |
||||
|
if (article == null) { |
||||
|
logger.error("Attempted to add null article"); |
||||
|
throw new IllegalArgumentException("Article cannot be null"); |
||||
|
} |
||||
|
if (article.getTitle() == null || article.getTitle().trim().isEmpty()) { |
||||
|
logger.warn("Attempted to add article with empty title"); |
||||
|
throw new IllegalArgumentException("Article title cannot be null or empty"); |
||||
|
} |
||||
|
if (article.getUrl() == null || article.getUrl().trim().isEmpty()) { |
||||
|
logger.warn("Attempted to add article with empty URL"); |
||||
|
throw new IllegalArgumentException("Article URL cannot be null or empty"); |
||||
|
} |
||||
|
articles.add(article); |
||||
|
logger.debug("Added article: {}", article.getTitle()); |
||||
|
} |
||||
|
|
||||
|
public void addAll(List<Article> articleList) { |
||||
|
if (articleList == null) { |
||||
|
logger.error("Attempted to add null article list"); |
||||
|
throw new IllegalArgumentException("Article list cannot be null"); |
||||
|
} |
||||
|
if (articleList.isEmpty()) { |
||||
|
logger.debug("Attempted to add empty article list"); |
||||
|
return; |
||||
|
} |
||||
|
|
||||
|
for (int i = 0; i < articleList.size(); i++) { |
||||
|
Article article = articleList.get(i); |
||||
|
if (article == null) { |
||||
|
logger.warn("Skipping null article at index {}", i); |
||||
|
throw new IllegalArgumentException("Article in list cannot be null at index " + i); |
||||
|
} |
||||
|
if (article.getTitle() == null || article.getTitle().trim().isEmpty()) { |
||||
|
logger.warn("Skipping article with empty title at index {}", i); |
||||
|
throw new IllegalArgumentException("Article title cannot be null or empty at index " + i); |
||||
|
} |
||||
|
if (article.getUrl() == null || article.getUrl().trim().isEmpty()) { |
||||
|
logger.warn("Skipping article with empty URL at index {}", i); |
||||
|
throw new IllegalArgumentException("Article URL cannot be null or empty at index " + i); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
articles.addAll(articleList); |
||||
|
logger.info("Added {} articles to repository", articleList.size()); |
||||
|
} |
||||
|
|
||||
|
public List<Article> getAll() { |
||||
|
logger.debug("Retrieving all articles, count: {}", articles.size()); |
||||
|
return Collections.unmodifiableList(articles); |
||||
|
} |
||||
|
|
||||
|
public int size() { |
||||
|
return articles.size(); |
||||
|
} |
||||
|
|
||||
|
public void clear() { |
||||
|
int size = articles.size(); |
||||
|
articles.clear(); |
||||
|
logger.info("Cleared repository, removed {} articles", size); |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,28 @@ |
|||||
|
package com.example.datacollect.strategy; |
||||
|
|
||||
|
import com.example.datacollect.exception.ParseException; |
||||
|
import com.example.datacollect.model.Article; |
||||
|
import org.jsoup.nodes.Document; |
||||
|
import org.jsoup.nodes.Element; |
||||
|
import org.jsoup.select.Elements; |
||||
|
import java.util.ArrayList; |
||||
|
import java.util.List; |
||||
|
|
||||
|
public class BlogStrategy extends PriorityStrategy { |
||||
|
private static final int PRIORITY = 100; |
||||
|
private static final String URL_PATTERN = ".*blog\\.example\\.com.*"; |
||||
|
|
||||
|
public BlogStrategy() { |
||||
|
super(PRIORITY, URL_PATTERN); |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public List<Article> parse(String url, Document doc) throws ParseException { |
||||
|
List<Article> articles = new ArrayList<>(); |
||||
|
Elements titles = doc.select(".post-title"); |
||||
|
for (Element e : titles) { |
||||
|
articles.add(new Article(e.text(), url, "")); |
||||
|
} |
||||
|
return articles; |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,11 @@ |
|||||
|
package com.example.datacollect.strategy; |
||||
|
|
||||
|
import com.example.datacollect.exception.ParseException; |
||||
|
import com.example.datacollect.model.Article; |
||||
|
import org.jsoup.nodes.Document; |
||||
|
import java.util.List; |
||||
|
|
||||
|
public interface CrawlStrategy { |
||||
|
List<Article> parse(String url, Document doc) throws ParseException; |
||||
|
boolean supports(String url); |
||||
|
} |
||||
@ -0,0 +1,38 @@ |
|||||
|
package com.example.datacollect.strategy; |
||||
|
|
||||
|
import com.example.datacollect.exception.ParseException; |
||||
|
import com.example.datacollect.model.Article; |
||||
|
import org.jsoup.nodes.Document; |
||||
|
import org.jsoup.nodes.Element; |
||||
|
import org.jsoup.select.Elements; |
||||
|
import java.util.ArrayList; |
||||
|
import java.util.List; |
||||
|
|
||||
|
public class DefaultStrategy implements CrawlStrategy { |
||||
|
|
||||
|
@Override |
||||
|
public boolean supports(String url) { |
||||
|
return true; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public List<Article> parse(String url, Document doc) throws ParseException { |
||||
|
List<Article> articles = new ArrayList<>(); |
||||
|
|
||||
|
Elements links = doc.select("a[href]"); |
||||
|
for (Element link : links) { |
||||
|
String title = link.text().trim(); |
||||
|
String href = link.attr("abs:href"); |
||||
|
|
||||
|
if (!title.isEmpty() && title.length() > 5) { |
||||
|
articles.add(new Article(title, href.isEmpty() ? url : href, "")); |
||||
|
} |
||||
|
|
||||
|
if (articles.size() >= 20) { |
||||
|
break; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
return articles; |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,52 @@ |
|||||
|
package com.example.datacollect.strategy; |
||||
|
|
||||
|
import com.example.datacollect.exception.ParseException; |
||||
|
import com.example.datacollect.model.Article; |
||||
|
import org.jsoup.nodes.Document; |
||||
|
import org.jsoup.nodes.Element; |
||||
|
import org.jsoup.select.Elements; |
||||
|
import java.util.ArrayList; |
||||
|
import java.util.List; |
||||
|
|
||||
|
public class HnuNewsStrategy extends PriorityStrategy { |
||||
|
private static final int PRIORITY = 200; |
||||
|
private static final String URL_PATTERN = ".*news\\.hnu\\.edu\\.cn.*"; |
||||
|
|
||||
|
public HnuNewsStrategy() { |
||||
|
super(PRIORITY, URL_PATTERN); |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public List<Article> parse(String url, Document doc) throws ParseException { |
||||
|
List<Article> articles = new ArrayList<>(); |
||||
|
Elements listItems = doc.select("ul.list11 li"); |
||||
|
|
||||
|
for (Element li : listItems) { |
||||
|
Element link = li.selectFirst("a"); |
||||
|
if (link == null) continue; |
||||
|
|
||||
|
String articleUrl = link.attr("href"); |
||||
|
if (!articleUrl.startsWith("http")) { |
||||
|
articleUrl = "https://news.hnu.edu.cn" + articleUrl.replace("..", ""); |
||||
|
} |
||||
|
|
||||
|
String title = ""; |
||||
|
Element titleEl = link.selectFirst("h4.l2.h4s2"); |
||||
|
if (titleEl != null) { |
||||
|
title = titleEl.text().trim(); |
||||
|
} |
||||
|
|
||||
|
String content = ""; |
||||
|
Element contentEl = link.selectFirst("p.l3.ps3"); |
||||
|
if (contentEl != null) { |
||||
|
content = contentEl.text().trim(); |
||||
|
} |
||||
|
|
||||
|
if (!title.isEmpty()) { |
||||
|
articles.add(new Article(title, articleUrl, content)); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
return articles; |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,28 @@ |
|||||
|
package com.example.datacollect.strategy; |
||||
|
|
||||
|
import com.example.datacollect.exception.ParseException; |
||||
|
import com.example.datacollect.model.Article; |
||||
|
import org.jsoup.nodes.Document; |
||||
|
import org.jsoup.nodes.Element; |
||||
|
import org.jsoup.select.Elements; |
||||
|
import java.util.ArrayList; |
||||
|
import java.util.List; |
||||
|
|
||||
|
public class NewsStrategy extends PriorityStrategy { |
||||
|
private static final int PRIORITY = 100; |
||||
|
private static final String URL_PATTERN = ".*news\\.example\\.com.*"; |
||||
|
|
||||
|
public NewsStrategy() { |
||||
|
super(PRIORITY, URL_PATTERN); |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public List<Article> parse(String url, Document doc) throws ParseException { |
||||
|
List<Article> articles = new ArrayList<>(); |
||||
|
Elements items = doc.select(".article-headline"); |
||||
|
for (Element e : items) { |
||||
|
articles.add(new Article(e.text(), url, "")); |
||||
|
} |
||||
|
return articles; |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,27 @@ |
|||||
|
package com.example.datacollect.strategy; |
||||
|
|
||||
|
import java.util.regex.Pattern; |
||||
|
|
||||
|
public abstract class PriorityStrategy implements CrawlStrategy, Comparable<PriorityStrategy> { |
||||
|
private final int priority; |
||||
|
private final Pattern urlPattern; |
||||
|
|
||||
|
public PriorityStrategy(int priority, String regexPattern) { |
||||
|
this.priority = priority; |
||||
|
this.urlPattern = Pattern.compile(regexPattern); |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public boolean supports(String url) { |
||||
|
return urlPattern.matcher(url).matches(); |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public int compareTo(PriorityStrategy other) { |
||||
|
return Integer.compare(other.priority, this.priority); |
||||
|
} |
||||
|
|
||||
|
public int getPriority() { |
||||
|
return priority; |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,49 @@ |
|||||
|
package com.example.datacollect.strategy; |
||||
|
|
||||
|
import org.slf4j.Logger; |
||||
|
import org.slf4j.LoggerFactory; |
||||
|
import java.util.ArrayList; |
||||
|
import java.util.Collections; |
||||
|
import java.util.List; |
||||
|
|
||||
|
public class StrategyFactory { |
||||
|
private static final Logger logger = LoggerFactory.getLogger(StrategyFactory.class); |
||||
|
|
||||
|
private final List<PriorityStrategy> strategies = new ArrayList<>(); |
||||
|
private final CrawlStrategy defaultStrategy; |
||||
|
|
||||
|
public StrategyFactory() { |
||||
|
strategies.add(new HnuNewsStrategy()); |
||||
|
strategies.add(new BlogStrategy()); |
||||
|
strategies.add(new NewsStrategy()); |
||||
|
Collections.sort(strategies); |
||||
|
this.defaultStrategy = new DefaultStrategy(); |
||||
|
logger.info("StrategyFactory initialized with {} strategies", strategies.size()); |
||||
|
} |
||||
|
|
||||
|
public CrawlStrategy getStrategy(String url) { |
||||
|
if (url == null || url.trim().isEmpty()) { |
||||
|
logger.debug("Empty URL provided, using default strategy"); |
||||
|
return defaultStrategy; |
||||
|
} |
||||
|
|
||||
|
for (PriorityStrategy s : strategies) { |
||||
|
if (s.supports(url)) { |
||||
|
logger.debug("URL {} matched strategy: {}", url, s.getClass().getSimpleName()); |
||||
|
return s; |
||||
|
} |
||||
|
} |
||||
|
logger.debug("URL {} did not match any specific strategy, using default", url); |
||||
|
return defaultStrategy; |
||||
|
} |
||||
|
|
||||
|
public void register(PriorityStrategy strategy) { |
||||
|
strategies.add(strategy); |
||||
|
Collections.sort(strategies); |
||||
|
logger.info("Registered new strategy: {}", strategy.getClass().getSimpleName()); |
||||
|
} |
||||
|
|
||||
|
public CrawlStrategy getDefaultStrategy() { |
||||
|
return defaultStrategy; |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,47 @@ |
|||||
|
package com.example.datacollect.view; |
||||
|
|
||||
|
import com.example.datacollect.model.Article; |
||||
|
import java.util.List; |
||||
|
import java.util.Scanner; |
||||
|
|
||||
|
public class ConsoleView { |
||||
|
private static final String ANSI_RESET = "\u001B[0m"; |
||||
|
private static final String ANSI_GREEN = "\u001B[32m"; |
||||
|
private static final String ANSI_RED = "\u001B[31m"; |
||||
|
private static final String ANSI_BLUE = "\u001B[34m"; |
||||
|
private static final String ANSI_YELLOW = "\u001B[33m"; |
||||
|
|
||||
|
private final Scanner scanner = new Scanner(System.in); |
||||
|
|
||||
|
public String readLine() { |
||||
|
System.out.print("> "); |
||||
|
return scanner.nextLine(); |
||||
|
} |
||||
|
|
||||
|
public void printSuccess(String msg) { |
||||
|
System.out.println(ANSI_GREEN + msg + ANSI_RESET); |
||||
|
} |
||||
|
|
||||
|
public void printError(String msg) { |
||||
|
System.out.println(ANSI_RED + msg + ANSI_RESET); |
||||
|
} |
||||
|
|
||||
|
public void printInfo(String msg) { |
||||
|
System.out.println(ANSI_BLUE + msg + ANSI_RESET); |
||||
|
} |
||||
|
|
||||
|
public void printWarning(String msg) { |
||||
|
System.out.println(ANSI_YELLOW + msg + ANSI_RESET); |
||||
|
} |
||||
|
|
||||
|
public void display(List<Article> articles) { |
||||
|
if (articles.isEmpty()) { |
||||
|
printInfo("暂无文章,请先执行 crawl。"); |
||||
|
return; |
||||
|
} |
||||
|
for (int i = 0; i < articles.size(); i++) { |
||||
|
Article a = articles.get(i); |
||||
|
System.out.println((i + 1) + ". " + a.getTitle() + " | " + a.getUrl()); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,26 @@ |
|||||
|
<?xml version="1.0" encoding="UTF-8"?> |
||||
|
<configuration> |
||||
|
<appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender"> |
||||
|
<encoder> |
||||
|
<pattern>%d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n</pattern> |
||||
|
</encoder> |
||||
|
</appender> |
||||
|
|
||||
|
<appender name="FILE" class="ch.qos.logback.core.rolling.RollingFileAppender"> |
||||
|
<file>logs/crawler.log</file> |
||||
|
<encoder> |
||||
|
<pattern>%d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n</pattern> |
||||
|
</encoder> |
||||
|
<rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy"> |
||||
|
<fileNamePattern>logs/crawler.%d{yyyy-MM-dd}.log</fileNamePattern> |
||||
|
<maxHistory>30</maxHistory> |
||||
|
</rollingPolicy> |
||||
|
</appender> |
||||
|
|
||||
|
<root level="INFO"> |
||||
|
<appender-ref ref="STDOUT" /> |
||||
|
<appender-ref ref="FILE" /> |
||||
|
</root> |
||||
|
|
||||
|
<logger name="com.example.datacollect" level="DEBUG" /> |
||||
|
</configuration> |
||||
@ -0,0 +1,26 @@ |
|||||
|
<?xml version="1.0" encoding="UTF-8"?> |
||||
|
<configuration> |
||||
|
<appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender"> |
||||
|
<encoder> |
||||
|
<pattern>%d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n</pattern> |
||||
|
</encoder> |
||||
|
</appender> |
||||
|
|
||||
|
<appender name="FILE" class="ch.qos.logback.core.rolling.RollingFileAppender"> |
||||
|
<file>logs/crawler.log</file> |
||||
|
<encoder> |
||||
|
<pattern>%d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n</pattern> |
||||
|
</encoder> |
||||
|
<rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy"> |
||||
|
<fileNamePattern>logs/crawler.%d{yyyy-MM-dd}.log</fileNamePattern> |
||||
|
<maxHistory>30</maxHistory> |
||||
|
</rollingPolicy> |
||||
|
</appender> |
||||
|
|
||||
|
<root level="INFO"> |
||||
|
<appender-ref ref="STDOUT" /> |
||||
|
<appender-ref ref="FILE" /> |
||||
|
</root> |
||||
|
|
||||
|
<logger name="com.example.datacollect" level="DEBUG" /> |
||||
|
</configuration> |
||||
@ -0,0 +1,3 @@ |
|||||
|
artifactId=datacollect-cli |
||||
|
groupId=com.example |
||||
|
version=0.1.0 |
||||
@ -0,0 +1,21 @@ |
|||||
|
com\example\datacollect\strategy\DefaultStrategy.class |
||||
|
com\example\datacollect\strategy\PriorityStrategy.class |
||||
|
com\example\datacollect\command\ListCommand.class |
||||
|
com\example\datacollect\command\CrawlCommand.class |
||||
|
com\example\datacollect\strategy\BlogStrategy.class |
||||
|
com\example\datacollect\repository\ArticleRepository.class |
||||
|
com\example\datacollect\Main.class |
||||
|
com\example\datacollect\view\ConsoleView.class |
||||
|
com\example\datacollect\command\ExitCommand.class |
||||
|
com\example\datacollect\command\HelpCommand.class |
||||
|
com\example\datacollect\strategy\NewsStrategy.class |
||||
|
com\example\datacollect\command\Command.class |
||||
|
com\example\datacollect\controller\CrawlerController.class |
||||
|
com\example\datacollect\exception\CrawlerException.class |
||||
|
com\example\datacollect\exception\NetworkException.class |
||||
|
com\example\datacollect\command\AnalyzeCommand.class |
||||
|
com\example\datacollect\strategy\StrategyFactory.class |
||||
|
com\example\datacollect\strategy\HnuNewsStrategy.class |
||||
|
com\example\datacollect\exception\ParseException.class |
||||
|
com\example\datacollect\strategy\CrawlStrategy.class |
||||
|
com\example\datacollect\model\Article.class |
||||
@ -0,0 +1,23 @@ |
|||||
|
D:\桌面\java-cli - 副本 - 副本\src\main\java\com\example\datacollect\exception\ParseException.java |
||||
|
D:\桌面\java-cli - 副本 - 副本\src\main\java\com\example\datacollect\strategy\CrawlStrategy.java |
||||
|
D:\桌面\java-cli - 副本 - 副本\src\main\java\com\example\datacollect\command\JsonImporterCommand.java |
||||
|
D:\桌面\java-cli - 副本 - 副本\src\main\java\com\example\datacollect\command\CrawlCommand.java |
||||
|
D:\桌面\java-cli - 副本 - 副本\src\main\java\com\example\datacollect\strategy\BlogStrategy.java |
||||
|
D:\桌面\java-cli - 副本 - 副本\src\main\java\com\example\datacollect\command\JsonExporterCommand.java |
||||
|
D:\桌面\java-cli - 副本 - 副本\src\main\java\com\example\datacollect\command\AnalyzeCommand.java |
||||
|
D:\桌面\java-cli - 副本 - 副本\src\main\java\com\example\datacollect\strategy\HnuNewsStrategy.java |
||||
|
D:\桌面\java-cli - 副本 - 副本\src\main\java\com\example\datacollect\command\HelpCommand.java |
||||
|
D:\桌面\java-cli - 副本 - 副本\src\main\java\com\example\datacollect\command\ExitCommand.java |
||||
|
D:\桌面\java-cli - 副本 - 副本\src\main\java\com\example\datacollect\strategy\DefaultStrategy.java |
||||
|
D:\桌面\java-cli - 副本 - 副本\src\main\java\com\example\datacollect\strategy\NewsStrategy.java |
||||
|
D:\桌面\java-cli - 副本 - 副本\src\main\java\com\example\datacollect\strategy\StrategyFactory.java |
||||
|
D:\桌面\java-cli - 副本 - 副本\src\main\java\com\example\datacollect\Main.java |
||||
|
D:\桌面\java-cli - 副本 - 副本\src\main\java\com\example\datacollect\controller\CrawlerController.java |
||||
|
D:\桌面\java-cli - 副本 - 副本\src\main\java\com\example\datacollect\strategy\PriorityStrategy.java |
||||
|
D:\桌面\java-cli - 副本 - 副本\src\main\java\com\example\datacollect\exception\CrawlerException.java |
||||
|
D:\桌面\java-cli - 副本 - 副本\src\main\java\com\example\datacollect\exception\NetworkException.java |
||||
|
D:\桌面\java-cli - 副本 - 副本\src\main\java\com\example\datacollect\command\ListCommand.java |
||||
|
D:\桌面\java-cli - 副本 - 副本\src\main\java\com\example\datacollect\command\Command.java |
||||
|
D:\桌面\java-cli - 副本 - 副本\src\main\java\com\example\datacollect\model\Article.java |
||||
|
D:\桌面\java-cli - 副本 - 副本\src\main\java\com\example\datacollect\repository\ArticleRepository.java |
||||
|
D:\桌面\java-cli - 副本 - 副本\src\main\java\com\example\datacollect\view\ConsoleView.java |
||||
Loading…
Reference in new issue