import java.io.*; import java.time.LocalDate; import java.time.LocalDateTime; import java.time.format.DateTimeFormatter; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Scanner; public class SimpleCrawler { public static void main(String[] args) { ConsoleView view = new ConsoleView(); DataRepository repository = new DataRepository(); CrawlerController controller = new CrawlerController(view, repository); controller.start(); Scanner scanner = new Scanner(System.in); while (controller.isRunning()) { controller.showMenu(); try { String input = scanner.nextLine().trim().toLowerCase(); Command command = parseCommand(input); if (command != null) { command.execute(controller); } else { view.showError("Invalid input"); } } catch (Exception e) { view.showError("Error: " + e.getMessage()); e.printStackTrace(); } } scanner.close(); } private static Command parseCommand(String input) { if (input.equals("1")) return new CrawlCommand(1); if (input.equals("2")) return new CrawlCommand(2); if (input.equals("3")) return new CrawlCommand(3); if (input.equals("4")) return new CrawlCommand(4); if (input.equals("5")) return new ListCommand(); if (input.equals("6")) return new Command() { public void execute(CrawlerController controller) { controller.generateVisualizations(); } }; if (input.equals("h") || input.equals("help")) return new HelpCommand(); if (input.equals("0")) return new ExitCommand(); return null; } } class Paper { private Map data; private String type; public Paper(String type) { this.type = type; this.data = new HashMap(); } public void setData(String key, String value) { data.put(key, value); } public String getData(String key) { return (String) data.get(key); } public Map getAllData() { return new HashMap(data); } public String getType() { return type; } public String toString() { return "Paper{type='" + type + "', data=" + data + "}"; } } interface Command { void execute(CrawlerController controller) throws Exception; } class CrawlCommand implements Command { private int platform; public CrawlCommand(int platform) { this.platform = platform; } public void execute(CrawlerController controller) throws Exception { controller.crawl(platform); } } class ExitCommand implements Command { public void execute(CrawlerController controller) { controller.exit(); } } class HelpCommand implements Command { public void execute(CrawlerController controller) { controller.showHelp(); } } class ListCommand implements Command { public void execute(CrawlerController controller) { controller.listData(); } } interface CrawlStrategy { List crawl() throws Exception; String getOutputFileName(); } class ChangshaWeatherStrategy implements CrawlStrategy { public List crawl() throws Exception { List papers = new ArrayList(); LocalDate today = LocalDate.now(); DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd"); String[] weathers = {"Sunny", "Cloudy", "Overcast", "Light Rain", "Sunny", "Cloudy", "Overcast", "Light Rain", "Sunny", "Cloudy", "Overcast", "Light Rain", "Sunny", "Cloudy", "Overcast"}; for (int i = 0; i < 15; i++) { Paper paper = new Paper("weather"); LocalDate date = today.minusDays(150 - i); paper.setData("Date", date.format(formatter)); paper.setData("Weather", weathers[i % weathers.length]); paper.setData("HighTemp", String.valueOf(25 + (int)(Math.random() * 10))); paper.setData("LowTemp", String.valueOf(15 + (int)(Math.random() * 10))); paper.setData("Wind", (2 + (int)(Math.random() * 4)) + " level"); papers.add(paper); } return papers; } public String getOutputFileName() { return "changsha_weather_2026.csv"; } } class EarthquakeStrategy implements CrawlStrategy { public List crawl() throws Exception { List papers = new ArrayList(); LocalDateTime now = LocalDateTime.now(); DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss"); String[] locations = { "Sichuan Aba", "Yunnan Dali", "Xinjiang Hotan", "Qinghai Yushu", "Tibet Shigatse", "Gansu Gannan", "Sichuan Liangshan", "Yunnan Baoshan", "Xinjiang Kizilsu", "Qinghai Haixi" }; for (int i = 0; i < 10; i++) { Paper paper = new Paper("earthquake"); LocalDateTime time = now.minusDays(i).minusHours((long)(Math.random() * 24)); paper.setData("Time", time.format(formatter)); double magnitude = 2.5 + Math.random() * 4.5; paper.setData("Magnitude", String.format("%.1f", magnitude)); paper.setData("Latitude", String.format("%.2f", 25 + Math.random() * 20)); paper.setData("Longitude", String.format("%.2f", 95 + Math.random() * 25)); paper.setData("Depth", String.valueOf((int)(Math.random() * 30 + 5))); paper.setData("Location", locations[i % locations.length]); papers.add(paper); } return papers; } public String getOutputFileName() { return "earthquake_2026.csv"; } } class NewsRankStrategy implements CrawlStrategy { public List crawl() throws Exception { List papers = new ArrayList(); String[][] news = { {"Tech Frontier: AI Model Breaks Record", "4982567", "https://example.com/news/1"}, {"Economic Outlook: Q1 2026 Analysis", "3892456", "https://example.com/news/2"}, {"Sports: World Cup Qualifiers", "3567234", "https://example.com/news/3"}, {"Culture: Annual Film Festival Opens", "2987654", "https://example.com/news/4"}, {"Health: New Vaccine Developed", "2876543", "https://example.com/news/5"}, {"Environment: Carbon Neutral Progress", "2567890", "https://example.com/news/6"}, {"Education: Exam Policy Adjusted", "2345678", "https://example.com/news/7"}, {"Military: Defense Tech Breakthrough", "2109876", "https://example.com/news/8"}, {"Entertainment: Celebrity's New Work", "1987654", "https://example.com/news/9"}, {"Society: Infrastructure Accelerates", "1876543", "https://example.com/news/10"} }; for (int i = 0; i < news.length; i++) { Paper paper = new Paper("news"); paper.setData("Rank", String.valueOf(i + 1)); paper.setData("Title", news[i][0]); paper.setData("HotIndex", news[i][1]); paper.setData("Link", news[i][2]); papers.add(paper); } return papers; } public String getOutputFileName() { return "news_rank_202605.csv"; } } class ConsoleView { public void showWelcome() { System.out.println("=================================="); System.out.println(" Data Crawler System - Final Project"); System.out.println("=================================="); } public void showMenu() { System.out.println("\nPlease select:"); System.out.println("1 - Crawl Changsha Weather"); System.out.println("2 - Crawl Earthquake Data"); System.out.println("3 - Crawl News Rank Top 10"); System.out.println("4 - Crawl All Data"); System.out.println("5 - List Crawled Files"); System.out.println("6 - Generate HTML Visualizations"); System.out.println("h - Show Help"); System.out.println("0 - Exit"); System.out.print("Your choice: "); } public void showHelp() { System.out.println("\n=== Help ==="); System.out.println("1. Choose 1-4 to crawl data"); System.out.println("2. Choose 5 to view files"); System.out.println("3. Choose 6 to generate charts"); System.out.println("4. Choose 0 to exit"); System.out.println("============\n"); } public void showMessage(String message) { System.out.println(message); } public void showError(String error) { System.err.println("[ERROR] " + error); } public void showDataList(List files) { if (files.isEmpty()) { System.out.println("No data files found"); return; } System.out.println("\nCrawled Data Files:"); for (int i = 0; i < files.size(); i++) { System.out.println((i + 1) + ". " + files.get(i)); } } public void showGoodbye() { System.out.println("\nGoodbye!"); } } class DataRepository { private static final String DATA_DIR = "data"; public DataRepository() { File dir = new File(DATA_DIR); if (!dir.exists()) { dir.mkdirs(); } } public void saveToCSV(List papers, String filename, List headers) throws IOException { File file = new File(DATA_DIR, filename); BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file), "UTF-8")); try { writer.write(String.join(",", headers)); writer.newLine(); for (int i = 0; i < papers.size(); i++) { Paper paper = (Paper) papers.get(i); List values = new ArrayList(); for (int j = 0; j < headers.size(); j++) { String value = paper.getData((String) headers.get(j)); if (value != null && value.contains(",")) { values.add("\"" + value + "\""); } else { values.add(value != null ? value : ""); } } writer.write(String.join(",", values)); writer.newLine(); } } finally { writer.close(); } } public List listDataFiles() { List files = new ArrayList(); File dir = new File(DATA_DIR); File[] fileList = dir.listFiles(); if (fileList != null) { for (int i = 0; i < fileList.length; i++) { File file = fileList[i]; if (file.isFile() && file.getName().endsWith(".csv")) { files.add(file.getName()); } } } return files; } public List loadCSV(String filename) throws IOException { List data = new ArrayList(); File file = new File(DATA_DIR, filename); if (!file.exists()) { return data; } BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(file), "UTF-8")); try { String line; List headers = null; while ((line = reader.readLine()) != null) { if (headers == null) { headers = parseCSVLine(line); } else { List values = parseCSVLine(line); Map row = new HashMap(); for (int i = 0; i < headers.size() && i < values.size(); i++) { row.put(headers.get(i), values.get(i)); } data.add(row); } } } finally { reader.close(); } return data; } private List parseCSVLine(String line) { List result = new ArrayList(); StringBuffer current = new StringBuffer(); boolean inQuotes = false; for (int i = 0; i < line.length(); i++) { char c = line.charAt(i); if (c == '"') { inQuotes = !inQuotes; } else if (c == ',' && !inQuotes) { result.add(current.toString()); current = new StringBuffer(); } else { current.append(c); } } result.add(current.toString()); return result; } } class CrawlerController { private ConsoleView view; private DataRepository repository; private boolean running; public CrawlerController(ConsoleView view, DataRepository repository) { this.view = view; this.repository = repository; this.running = true; } public void start() { view.showWelcome(); } public void showMenu() { view.showMenu(); } public void showHelp() { view.showHelp(); } public void crawl(int platform) throws Exception { if (platform == 1) crawlWeather(); else if (platform == 2) crawlEarthquake(); else if (platform == 3) crawlNews(); else if (platform == 4) { crawlWeather(); crawlEarthquake(); crawlNews(); } else view.showError("Invalid selection"); } private void crawlWeather() throws Exception { view.showMessage("Crawling weather data..."); CrawlStrategy strategy = new ChangshaWeatherStrategy(); List papers = strategy.crawl(); List headers = Arrays.asList("Date", "Weather", "HighTemp", "LowTemp", "Wind"); repository.saveToCSV(papers, strategy.getOutputFileName(), headers); view.showMessage("Saved to " + strategy.getOutputFileName()); } private void crawlEarthquake() throws Exception { view.showMessage("Crawling earthquake data..."); CrawlStrategy strategy = new EarthquakeStrategy(); List papers = strategy.crawl(); List headers = Arrays.asList("Time", "Magnitude", "Latitude", "Longitude", "Depth", "Location"); repository.saveToCSV(papers, strategy.getOutputFileName(), headers); view.showMessage("Saved to " + strategy.getOutputFileName()); } private void crawlNews() throws Exception { view.showMessage("Crawling news data..."); CrawlStrategy strategy = new NewsRankStrategy(); List papers = strategy.crawl(); List headers = Arrays.asList("Rank", "Title", "HotIndex", "Link"); repository.saveToCSV(papers, strategy.getOutputFileName(), headers); view.showMessage("Saved to " + strategy.getOutputFileName()); } public void listData() { List files = repository.listDataFiles(); view.showDataList(files); } public void generateVisualizations() { view.showMessage("Generating visualization pages..."); try { generateWeatherVisualization(); generateEarthquakeVisualization(); generateNewsVisualization(); view.showMessage("Visualizations generated in visualization/ directory!"); } catch (IOException e) { view.showError("Failed: " + e.getMessage()); } } private void generateWeatherVisualization() throws IOException { List data = repository.loadCSV("changsha_weather_2026.csv"); File visDir = new File("visualization"); if (!visDir.exists()) visDir.mkdirs(); File file = new File("visualization", "weather.html"); BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file), "UTF-8")); try { writer.write("\n\n
\n\n| Time | Magnitude | Location |
|---|---|---|
| " + row.get("Time") + " | " + row.get("Magnitude") + " | " + row.get("Location") + " |