You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
138 lines
4.2 KiB
138 lines
4.2 KiB
package com.scraper.controller;
|
|
|
|
import com.google.gson.Gson;
|
|
import com.google.gson.GsonBuilder;
|
|
import com.scraper.exception.StorageException;
|
|
import com.scraper.exception.StrategyException;
|
|
import com.scraper.model.ScrapedData;
|
|
import com.scraper.strategy.BooksScraperStrategy;
|
|
import com.scraper.strategy.NewsScraperStrategy;
|
|
import com.scraper.strategy.ScraperStrategy;
|
|
import com.scraper.strategy.TechNewsScraperStrategy;
|
|
|
|
import java.io.File;
|
|
import java.io.FileWriter;
|
|
import java.io.IOException;
|
|
import java.nio.file.Files;
|
|
import java.nio.file.Path;
|
|
import java.nio.file.Paths;
|
|
import java.time.LocalDateTime;
|
|
import java.time.format.DateTimeFormatter;
|
|
import java.util.ArrayList;
|
|
import java.util.HashMap;
|
|
import java.util.List;
|
|
import java.util.Map;
|
|
|
|
/**
|
|
* MVC Controller for the scraper application
|
|
*/
|
|
public class ScraperController {
|
|
|
|
private String outputDir;
|
|
private Map<String, ScraperStrategy> strategies;
|
|
private Gson gson;
|
|
|
|
public ScraperController() {
|
|
this("data");
|
|
}
|
|
|
|
public ScraperController(String outputDir) {
|
|
this.outputDir = outputDir;
|
|
this.strategies = new HashMap<>();
|
|
this.gson = new GsonBuilder().setPrettyPrinting().create();
|
|
registerDefaultStrategies();
|
|
}
|
|
|
|
private void registerDefaultStrategies() {
|
|
registerStrategy(new NewsScraperStrategy());
|
|
registerStrategy(new BooksScraperStrategy());
|
|
registerStrategy(new TechNewsScraperStrategy());
|
|
}
|
|
|
|
public void registerStrategy(ScraperStrategy strategy) {
|
|
strategies.put(strategy.getName(), strategy);
|
|
}
|
|
|
|
public List<Map<String, String>> listStrategies() {
|
|
List<Map<String, String>> result = new ArrayList<>();
|
|
for (ScraperStrategy strategy : strategies.values()) {
|
|
Map<String, String> info = new HashMap<>();
|
|
info.put("name", strategy.getName());
|
|
info.put("source", strategy.getSource());
|
|
result.add(info);
|
|
}
|
|
return result;
|
|
}
|
|
|
|
public ScrapedData executeScrape(String strategyName) throws StrategyException {
|
|
ScraperStrategy strategy = strategies.get(strategyName);
|
|
if (strategy == null) {
|
|
String available = String.join(", ", strategies.keySet());
|
|
throw new StrategyException(
|
|
"Strategy '" + strategyName + "' not found. Available: " + available,
|
|
strategyName,
|
|
null
|
|
);
|
|
}
|
|
|
|
try {
|
|
return strategy.scrape();
|
|
} catch (Exception e) {
|
|
if (e instanceof StrategyException) {
|
|
throw (StrategyException) e;
|
|
}
|
|
throw new StrategyException(
|
|
"Error executing strategy: " + strategyName,
|
|
strategyName,
|
|
e
|
|
);
|
|
}
|
|
}
|
|
|
|
public String saveData(ScrapedData data, String strategyName) throws StorageException {
|
|
try {
|
|
String folderPath = outputDir + File.separator + strategyName;
|
|
Path folder = Paths.get(folderPath);
|
|
Files.createDirectories(folder);
|
|
|
|
String timestamp = LocalDateTime.now().format(
|
|
DateTimeFormatter.ofPattern("yyyyMMdd_HHmmss")
|
|
);
|
|
String filename = "scraped_data_" + timestamp + ".json";
|
|
String filePath = folderPath + File.separator + filename;
|
|
|
|
try (FileWriter writer = new FileWriter(filePath)) {
|
|
gson.toJson(data, writer);
|
|
}
|
|
|
|
return filePath;
|
|
} catch (IOException e) {
|
|
throw new StorageException(
|
|
"Failed to save data to: " + outputDir,
|
|
outputDir,
|
|
e
|
|
);
|
|
}
|
|
}
|
|
|
|
public boolean deleteData(String filePath) throws StorageException {
|
|
try {
|
|
Path path = Paths.get(filePath);
|
|
if (Files.exists(path)) {
|
|
Files.delete(path);
|
|
return true;
|
|
}
|
|
return false;
|
|
} catch (IOException e) {
|
|
throw new StorageException(
|
|
"Failed to delete file: " + filePath,
|
|
filePath,
|
|
e
|
|
);
|
|
}
|
|
}
|
|
|
|
public void setOutputDir(String outputDir) {
|
|
this.outputDir = outputDir;
|
|
}
|
|
}
|
|
|