diff --git a/.gitignore b/.gitignore
index 1c5ca92..3c66885 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,6 +4,8 @@ target/
!**/src/test/**/target/
.kotlin
+*.output.json
+
### IntelliJ IDEA ###
.idea/
diff --git a/pom.xml b/pom.xml
index d657eb4..d4b92f0 100644
--- a/pom.xml
+++ b/pom.xml
@@ -18,5 +18,11 @@
jsoup
1.22.2
+
+ com.google.code.gson
+ gson
+ 2.14.0
+ compile
+
\ No newline at end of file
diff --git a/src/main/java/internal/hw/crawler/Main.java b/src/main/java/internal/hw/crawler/Main.java
index a9a03ed..feef9a9 100644
--- a/src/main/java/internal/hw/crawler/Main.java
+++ b/src/main/java/internal/hw/crawler/Main.java
@@ -1,9 +1,6 @@
package internal.hw.crawler;
-import internal.hw.crawler.commands.CrawlCommand;
-import internal.hw.crawler.commands.ExitCommand;
-import internal.hw.crawler.commands.HelpCommand;
-import internal.hw.crawler.commands.ListCommand;
+import internal.hw.crawler.commands.*;
import internal.hw.crawler.repositories.ArticleRepository;
import internal.hw.crawler.views.ConsoleView;
@@ -16,6 +13,7 @@ public class Main {
controller.registerCommand(new ExitCommand());
controller.registerCommand(new CrawlCommand(repository, view));
controller.registerCommand(new ListCommand(repository, view));
+ controller.registerCommand(new SaveCommand(repository, view));
controller.registerCommand(new HelpCommand(controller.getCommands(), view));
view.printSuccess("Welcome to crawler. Type `help` for a list of available commands.");
diff --git a/src/main/java/internal/hw/crawler/commands/SaveCommand.java b/src/main/java/internal/hw/crawler/commands/SaveCommand.java
new file mode 100644
index 0000000..e906395
--- /dev/null
+++ b/src/main/java/internal/hw/crawler/commands/SaveCommand.java
@@ -0,0 +1,61 @@
+package internal.hw.crawler.commands;
+
+import com.google.gson.Gson;
+import internal.hw.crawler.models.Article;
+import internal.hw.crawler.repositories.ArticleRepository;
+import internal.hw.crawler.views.CommandOutput;
+
+import java.io.*;
+import java.util.*;
+import java.util.stream.Collectors;
+
+public class SaveCommand implements Command {
+ Gson gson = new Gson();
+ private final ArticleRepository articleRepository;
+ private final CommandOutput out;
+
+ public SaveCommand(ArticleRepository articleRepository, CommandOutput out) {
+ this.articleRepository = articleRepository;
+ this.out = out;
+ }
+
+ @Override
+ public String getName() {
+ return "save";
+ }
+
+ @Override
+ public void execute(String[] args) {
+ String filename = "articles.output.json";
+
+ List articles = getExistingArticles(filename);
+ Map articleMap = articles.stream().collect(Collectors.toMap(this::articleMapId, it -> it));
+
+ // Update existing articles with new articles
+ for (Article article : articleRepository.getAll()) {
+ articleMap.put(articleMapId(article), article);
+ }
+
+ Article[] articlesToSave = articleMap.values().toArray(new Article[0]);
+
+ try (BufferedWriter writer = new BufferedWriter(new FileWriter(filename));) {
+ writer.write(gson.toJson(articlesToSave));
+ out.success(String.format("Wrote %d articles to %s", articlesToSave.length, filename));
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ private String articleMapId(Article article) {
+ return String.format("%s-%s", article.getSource(), article.getId());
+ }
+
+ private List getExistingArticles(String filename) {
+ try (BufferedReader reader = new BufferedReader(new FileReader(filename))) {
+ Article[] articles = gson.fromJson(reader, Article[].class);
+ return Arrays.asList(articles);
+ } catch (IOException e) {
+ return List.of();
+ }
+ }
+}
diff --git a/src/main/java/internal/hw/crawler/models/Article.java b/src/main/java/internal/hw/crawler/models/Article.java
index bc9be98..ff5e4f8 100644
--- a/src/main/java/internal/hw/crawler/models/Article.java
+++ b/src/main/java/internal/hw/crawler/models/Article.java
@@ -5,6 +5,7 @@ import java.util.Set;
public class Article {
private String id;
+ private String source;
private URL url;
private String title;
private Set authors;
@@ -18,6 +19,14 @@ public class Article {
this.id = id;
}
+ public String getSource() {
+ return source;
+ }
+
+ public void setSource(String source) {
+ this.source = source;
+ }
+
public URL getUrl() {
return url;
}
diff --git a/src/main/java/internal/hw/crawler/strategies/crawl/IthomeCrawlStrategy.java b/src/main/java/internal/hw/crawler/strategies/crawl/IthomeCrawlStrategy.java
index 86ef862..0ea4f6a 100644
--- a/src/main/java/internal/hw/crawler/strategies/crawl/IthomeCrawlStrategy.java
+++ b/src/main/java/internal/hw/crawler/strategies/crawl/IthomeCrawlStrategy.java
@@ -39,6 +39,7 @@ public class IthomeCrawlStrategy implements CrawlStrategy {
Article article = new Article();
article.setId(id);
+ article.setSource("ithome");
article.setUrl(url);
article.setTitle(title);
article.setAuthors(authors);