Browse Source

basic saving command

master
283375 1 month ago
parent
commit
2549d03287
Failed to extract signature
  1. 2
      .gitignore
  2. 6
      pom.xml
  3. 6
      src/main/java/internal/hw/crawler/Main.java
  4. 61
      src/main/java/internal/hw/crawler/commands/SaveCommand.java
  5. 9
      src/main/java/internal/hw/crawler/models/Article.java
  6. 1
      src/main/java/internal/hw/crawler/strategies/crawl/IthomeCrawlStrategy.java

2
.gitignore

@ -4,6 +4,8 @@ target/
!**/src/test/**/target/ !**/src/test/**/target/
.kotlin .kotlin
*.output.json
### IntelliJ IDEA ### ### IntelliJ IDEA ###
.idea/ .idea/

6
pom.xml

@ -18,5 +18,11 @@
<artifactId>jsoup</artifactId> <artifactId>jsoup</artifactId>
<version>1.22.2</version> <version>1.22.2</version>
</dependency> </dependency>
<dependency>
<groupId>com.google.code.gson</groupId>
<artifactId>gson</artifactId>
<version>2.14.0</version>
<scope>compile</scope>
</dependency>
</dependencies> </dependencies>
</project> </project>

6
src/main/java/internal/hw/crawler/Main.java

@ -1,9 +1,6 @@
package internal.hw.crawler; package internal.hw.crawler;
import internal.hw.crawler.commands.CrawlCommand; import internal.hw.crawler.commands.*;
import internal.hw.crawler.commands.ExitCommand;
import internal.hw.crawler.commands.HelpCommand;
import internal.hw.crawler.commands.ListCommand;
import internal.hw.crawler.repositories.ArticleRepository; import internal.hw.crawler.repositories.ArticleRepository;
import internal.hw.crawler.views.ConsoleView; import internal.hw.crawler.views.ConsoleView;
@ -16,6 +13,7 @@ public class Main {
controller.registerCommand(new ExitCommand()); controller.registerCommand(new ExitCommand());
controller.registerCommand(new CrawlCommand(repository, view)); controller.registerCommand(new CrawlCommand(repository, view));
controller.registerCommand(new ListCommand(repository, view)); controller.registerCommand(new ListCommand(repository, view));
controller.registerCommand(new SaveCommand(repository, view));
controller.registerCommand(new HelpCommand(controller.getCommands(), view)); controller.registerCommand(new HelpCommand(controller.getCommands(), view));
view.printSuccess("Welcome to crawler. Type `help` for a list of available commands."); view.printSuccess("Welcome to crawler. Type `help` for a list of available commands.");

61
src/main/java/internal/hw/crawler/commands/SaveCommand.java

@ -0,0 +1,61 @@
package internal.hw.crawler.commands;
import com.google.gson.Gson;
import internal.hw.crawler.models.Article;
import internal.hw.crawler.repositories.ArticleRepository;
import internal.hw.crawler.views.CommandOutput;
import java.io.*;
import java.util.*;
import java.util.stream.Collectors;
public class SaveCommand implements Command {
Gson gson = new Gson();
private final ArticleRepository articleRepository;
private final CommandOutput out;
public SaveCommand(ArticleRepository articleRepository, CommandOutput out) {
this.articleRepository = articleRepository;
this.out = out;
}
@Override
public String getName() {
return "save";
}
@Override
public void execute(String[] args) {
String filename = "articles.output.json";
List<Article> articles = getExistingArticles(filename);
Map<String, Article> articleMap = articles.stream().collect(Collectors.toMap(this::articleMapId, it -> it));
// Update existing articles with new articles
for (Article article : articleRepository.getAll()) {
articleMap.put(articleMapId(article), article);
}
Article[] articlesToSave = articleMap.values().toArray(new Article[0]);
try (BufferedWriter writer = new BufferedWriter(new FileWriter(filename));) {
writer.write(gson.toJson(articlesToSave));
out.success(String.format("Wrote %d articles to %s", articlesToSave.length, filename));
} catch (IOException e) {
throw new RuntimeException(e);
}
}
private String articleMapId(Article article) {
return String.format("%s-%s", article.getSource(), article.getId());
}
private List<Article> getExistingArticles(String filename) {
try (BufferedReader reader = new BufferedReader(new FileReader(filename))) {
Article[] articles = gson.fromJson(reader, Article[].class);
return Arrays.asList(articles);
} catch (IOException e) {
return List.of();
}
}
}

9
src/main/java/internal/hw/crawler/models/Article.java

@ -5,6 +5,7 @@ import java.util.Set;
public class Article { public class Article {
private String id; private String id;
private String source;
private URL url; private URL url;
private String title; private String title;
private Set<String> authors; private Set<String> authors;
@ -18,6 +19,14 @@ public class Article {
this.id = id; this.id = id;
} }
public String getSource() {
return source;
}
public void setSource(String source) {
this.source = source;
}
public URL getUrl() { public URL getUrl() {
return url; return url;
} }

1
src/main/java/internal/hw/crawler/strategies/crawl/IthomeCrawlStrategy.java

@ -39,6 +39,7 @@ public class IthomeCrawlStrategy implements CrawlStrategy {
Article article = new Article(); Article article = new Article();
article.setId(id); article.setId(id);
article.setSource("ithome");
article.setUrl(url); article.setUrl(url);
article.setTitle(title); article.setTitle(title);
article.setAuthors(authors); article.setAuthors(authors);

Loading…
Cancel
Save