diff --git a/.gitignore b/.gitignore index 1c5ca92..3c66885 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,8 @@ target/ !**/src/test/**/target/ .kotlin +*.output.json + ### IntelliJ IDEA ### .idea/ diff --git a/pom.xml b/pom.xml index d657eb4..d4b92f0 100644 --- a/pom.xml +++ b/pom.xml @@ -18,5 +18,11 @@ jsoup 1.22.2 + + com.google.code.gson + gson + 2.14.0 + compile + \ No newline at end of file diff --git a/src/main/java/internal/hw/crawler/Main.java b/src/main/java/internal/hw/crawler/Main.java index a9a03ed..feef9a9 100644 --- a/src/main/java/internal/hw/crawler/Main.java +++ b/src/main/java/internal/hw/crawler/Main.java @@ -1,9 +1,6 @@ package internal.hw.crawler; -import internal.hw.crawler.commands.CrawlCommand; -import internal.hw.crawler.commands.ExitCommand; -import internal.hw.crawler.commands.HelpCommand; -import internal.hw.crawler.commands.ListCommand; +import internal.hw.crawler.commands.*; import internal.hw.crawler.repositories.ArticleRepository; import internal.hw.crawler.views.ConsoleView; @@ -16,6 +13,7 @@ public class Main { controller.registerCommand(new ExitCommand()); controller.registerCommand(new CrawlCommand(repository, view)); controller.registerCommand(new ListCommand(repository, view)); + controller.registerCommand(new SaveCommand(repository, view)); controller.registerCommand(new HelpCommand(controller.getCommands(), view)); view.printSuccess("Welcome to crawler. Type `help` for a list of available commands."); diff --git a/src/main/java/internal/hw/crawler/commands/SaveCommand.java b/src/main/java/internal/hw/crawler/commands/SaveCommand.java new file mode 100644 index 0000000..e906395 --- /dev/null +++ b/src/main/java/internal/hw/crawler/commands/SaveCommand.java @@ -0,0 +1,61 @@ +package internal.hw.crawler.commands; + +import com.google.gson.Gson; +import internal.hw.crawler.models.Article; +import internal.hw.crawler.repositories.ArticleRepository; +import internal.hw.crawler.views.CommandOutput; + +import java.io.*; +import java.util.*; +import java.util.stream.Collectors; + +public class SaveCommand implements Command { + Gson gson = new Gson(); + private final ArticleRepository articleRepository; + private final CommandOutput out; + + public SaveCommand(ArticleRepository articleRepository, CommandOutput out) { + this.articleRepository = articleRepository; + this.out = out; + } + + @Override + public String getName() { + return "save"; + } + + @Override + public void execute(String[] args) { + String filename = "articles.output.json"; + + List
articles = getExistingArticles(filename); + Map articleMap = articles.stream().collect(Collectors.toMap(this::articleMapId, it -> it)); + + // Update existing articles with new articles + for (Article article : articleRepository.getAll()) { + articleMap.put(articleMapId(article), article); + } + + Article[] articlesToSave = articleMap.values().toArray(new Article[0]); + + try (BufferedWriter writer = new BufferedWriter(new FileWriter(filename));) { + writer.write(gson.toJson(articlesToSave)); + out.success(String.format("Wrote %d articles to %s", articlesToSave.length, filename)); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + private String articleMapId(Article article) { + return String.format("%s-%s", article.getSource(), article.getId()); + } + + private List
getExistingArticles(String filename) { + try (BufferedReader reader = new BufferedReader(new FileReader(filename))) { + Article[] articles = gson.fromJson(reader, Article[].class); + return Arrays.asList(articles); + } catch (IOException e) { + return List.of(); + } + } +} diff --git a/src/main/java/internal/hw/crawler/models/Article.java b/src/main/java/internal/hw/crawler/models/Article.java index bc9be98..ff5e4f8 100644 --- a/src/main/java/internal/hw/crawler/models/Article.java +++ b/src/main/java/internal/hw/crawler/models/Article.java @@ -5,6 +5,7 @@ import java.util.Set; public class Article { private String id; + private String source; private URL url; private String title; private Set authors; @@ -18,6 +19,14 @@ public class Article { this.id = id; } + public String getSource() { + return source; + } + + public void setSource(String source) { + this.source = source; + } + public URL getUrl() { return url; } diff --git a/src/main/java/internal/hw/crawler/strategies/crawl/IthomeCrawlStrategy.java b/src/main/java/internal/hw/crawler/strategies/crawl/IthomeCrawlStrategy.java index 86ef862..0ea4f6a 100644 --- a/src/main/java/internal/hw/crawler/strategies/crawl/IthomeCrawlStrategy.java +++ b/src/main/java/internal/hw/crawler/strategies/crawl/IthomeCrawlStrategy.java @@ -39,6 +39,7 @@ public class IthomeCrawlStrategy implements CrawlStrategy { Article article = new Article(); article.setId(id); + article.setSource("ithome"); article.setUrl(url); article.setTitle(title); article.setAuthors(authors);