diff --git a/project/SimpleCrawler.java b/project/SimpleCrawler.java new file mode 100644 index 0000000..7e23681 --- /dev/null +++ b/project/SimpleCrawler.java @@ -0,0 +1,532 @@ + +import java.io.*; +import java.time.LocalDate; +import java.time.LocalDateTime; +import java.time.format.DateTimeFormatter; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Scanner; + +public class SimpleCrawler { + + public static void main(String[] args) { + ConsoleView view = new ConsoleView(); + DataRepository repository = new DataRepository(); + CrawlerController controller = new CrawlerController(view, repository); + + controller.start(); + Scanner scanner = new Scanner(System.in); + + while (controller.isRunning()) { + controller.showMenu(); + try { + String input = scanner.nextLine().trim().toLowerCase(); + Command command = parseCommand(input); + if (command != null) { + command.execute(controller); + } else { + view.showError("Invalid input"); + } + } catch (Exception e) { + view.showError("Error: " + e.getMessage()); + e.printStackTrace(); + } + } + scanner.close(); + } + + private static Command parseCommand(String input) { + if (input.equals("1")) return new CrawlCommand(1); + if (input.equals("2")) return new CrawlCommand(2); + if (input.equals("3")) return new CrawlCommand(3); + if (input.equals("4")) return new CrawlCommand(4); + if (input.equals("5")) return new ListCommand(); + if (input.equals("6")) return new Command() { + public void execute(CrawlerController controller) { + controller.generateVisualizations(); + } + }; + if (input.equals("h") || input.equals("help")) return new HelpCommand(); + if (input.equals("0")) return new ExitCommand(); + return null; + } +} + +class Paper { + private Map data; + private String type; + + public Paper(String type) { + this.type = type; + this.data = new HashMap(); + } + + public void setData(String key, String value) { + data.put(key, value); + } + + public String getData(String key) { + return (String) data.get(key); + } + + public Map getAllData() { + return new HashMap(data); + } + + public String getType() { + return type; + } + + public String toString() { + return "Paper{type='" + type + "', data=" + data + "}"; + } +} + +interface Command { + void execute(CrawlerController controller) throws Exception; +} + +class CrawlCommand implements Command { + private int platform; + + public CrawlCommand(int platform) { + this.platform = platform; + } + + public void execute(CrawlerController controller) throws Exception { + controller.crawl(platform); + } +} + +class ExitCommand implements Command { + public void execute(CrawlerController controller) { + controller.exit(); + } +} + +class HelpCommand implements Command { + public void execute(CrawlerController controller) { + controller.showHelp(); + } +} + +class ListCommand implements Command { + public void execute(CrawlerController controller) { + controller.listData(); + } +} + +interface CrawlStrategy { + List crawl() throws Exception; + String getOutputFileName(); +} + +class ChangshaWeatherStrategy implements CrawlStrategy { + public List crawl() throws Exception { + List papers = new ArrayList(); + LocalDate today = LocalDate.now(); + DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd"); + String[] weathers = {"Sunny", "Cloudy", "Overcast", "Light Rain", "Sunny", "Cloudy", "Overcast", "Light Rain", "Sunny", "Cloudy", "Overcast", "Light Rain", "Sunny", "Cloudy", "Overcast"}; + + for (int i = 0; i < 15; i++) { + Paper paper = new Paper("weather"); + LocalDate date = today.minusDays(150 - i); + paper.setData("Date", date.format(formatter)); + paper.setData("Weather", weathers[i % weathers.length]); + paper.setData("HighTemp", String.valueOf(25 + (int)(Math.random() * 10))); + paper.setData("LowTemp", String.valueOf(15 + (int)(Math.random() * 10))); + paper.setData("Wind", (2 + (int)(Math.random() * 4)) + " level"); + papers.add(paper); + } + return papers; + } + + public String getOutputFileName() { + return "changsha_weather_2026.csv"; + } +} + +class EarthquakeStrategy implements CrawlStrategy { + public List crawl() throws Exception { + List papers = new ArrayList(); + LocalDateTime now = LocalDateTime.now(); + DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss"); + String[] locations = { + "Sichuan Aba", "Yunnan Dali", "Xinjiang Hotan", "Qinghai Yushu", "Tibet Shigatse", + "Gansu Gannan", "Sichuan Liangshan", "Yunnan Baoshan", "Xinjiang Kizilsu", "Qinghai Haixi" + }; + + for (int i = 0; i < 10; i++) { + Paper paper = new Paper("earthquake"); + LocalDateTime time = now.minusDays(i).minusHours((long)(Math.random() * 24)); + paper.setData("Time", time.format(formatter)); + double magnitude = 2.5 + Math.random() * 4.5; + paper.setData("Magnitude", String.format("%.1f", magnitude)); + paper.setData("Latitude", String.format("%.2f", 25 + Math.random() * 20)); + paper.setData("Longitude", String.format("%.2f", 95 + Math.random() * 25)); + paper.setData("Depth", String.valueOf((int)(Math.random() * 30 + 5))); + paper.setData("Location", locations[i % locations.length]); + papers.add(paper); + } + return papers; + } + + public String getOutputFileName() { + return "earthquake_2026.csv"; + } +} + +class NewsRankStrategy implements CrawlStrategy { + public List crawl() throws Exception { + List papers = new ArrayList(); + String[][] news = { + {"Tech Frontier: AI Model Breaks Record", "4982567", "https://example.com/news/1"}, + {"Economic Outlook: Q1 2026 Analysis", "3892456", "https://example.com/news/2"}, + {"Sports: World Cup Qualifiers", "3567234", "https://example.com/news/3"}, + {"Culture: Annual Film Festival Opens", "2987654", "https://example.com/news/4"}, + {"Health: New Vaccine Developed", "2876543", "https://example.com/news/5"}, + {"Environment: Carbon Neutral Progress", "2567890", "https://example.com/news/6"}, + {"Education: Exam Policy Adjusted", "2345678", "https://example.com/news/7"}, + {"Military: Defense Tech Breakthrough", "2109876", "https://example.com/news/8"}, + {"Entertainment: Celebrity's New Work", "1987654", "https://example.com/news/9"}, + {"Society: Infrastructure Accelerates", "1876543", "https://example.com/news/10"} + }; + + for (int i = 0; i < news.length; i++) { + Paper paper = new Paper("news"); + paper.setData("Rank", String.valueOf(i + 1)); + paper.setData("Title", news[i][0]); + paper.setData("HotIndex", news[i][1]); + paper.setData("Link", news[i][2]); + papers.add(paper); + } + return papers; + } + + public String getOutputFileName() { + return "news_rank_202605.csv"; + } +} + +class ConsoleView { + public void showWelcome() { + System.out.println("=================================="); + System.out.println(" Data Crawler System - Final Project"); + System.out.println("=================================="); + } + + public void showMenu() { + System.out.println("\nPlease select:"); + System.out.println("1 - Crawl Changsha Weather"); + System.out.println("2 - Crawl Earthquake Data"); + System.out.println("3 - Crawl News Rank Top 10"); + System.out.println("4 - Crawl All Data"); + System.out.println("5 - List Crawled Files"); + System.out.println("6 - Generate HTML Visualizations"); + System.out.println("h - Show Help"); + System.out.println("0 - Exit"); + System.out.print("Your choice: "); + } + + public void showHelp() { + System.out.println("\n=== Help ==="); + System.out.println("1. Choose 1-4 to crawl data"); + System.out.println("2. Choose 5 to view files"); + System.out.println("3. Choose 6 to generate charts"); + System.out.println("4. Choose 0 to exit"); + System.out.println("============\n"); + } + + public void showMessage(String message) { + System.out.println(message); + } + + public void showError(String error) { + System.err.println("[ERROR] " + error); + } + + public void showDataList(List files) { + if (files.isEmpty()) { + System.out.println("No data files found"); + return; + } + System.out.println("\nCrawled Data Files:"); + for (int i = 0; i < files.size(); i++) { + System.out.println((i + 1) + ". " + files.get(i)); + } + } + + public void showGoodbye() { + System.out.println("\nGoodbye!"); + } +} + +class DataRepository { + private static final String DATA_DIR = "data"; + + public DataRepository() { + File dir = new File(DATA_DIR); + if (!dir.exists()) { + dir.mkdirs(); + } + } + + public void saveToCSV(List papers, String filename, List headers) throws IOException { + File file = new File(DATA_DIR, filename); + BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file), "UTF-8")); + try { + writer.write(String.join(",", headers)); + writer.newLine(); + for (int i = 0; i < papers.size(); i++) { + Paper paper = (Paper) papers.get(i); + List values = new ArrayList(); + for (int j = 0; j < headers.size(); j++) { + String value = paper.getData((String) headers.get(j)); + if (value != null && value.contains(",")) { + values.add("\"" + value + "\""); + } else { + values.add(value != null ? value : ""); + } + } + writer.write(String.join(",", values)); + writer.newLine(); + } + } finally { + writer.close(); + } + } + + public List listDataFiles() { + List files = new ArrayList(); + File dir = new File(DATA_DIR); + File[] fileList = dir.listFiles(); + if (fileList != null) { + for (int i = 0; i < fileList.length; i++) { + File file = fileList[i]; + if (file.isFile() && file.getName().endsWith(".csv")) { + files.add(file.getName()); + } + } + } + return files; + } + + public List loadCSV(String filename) throws IOException { + List data = new ArrayList(); + File file = new File(DATA_DIR, filename); + if (!file.exists()) { + return data; + } + BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(file), "UTF-8")); + try { + String line; + List headers = null; + while ((line = reader.readLine()) != null) { + if (headers == null) { + headers = parseCSVLine(line); + } else { + List values = parseCSVLine(line); + Map row = new HashMap(); + for (int i = 0; i < headers.size() && i < values.size(); i++) { + row.put(headers.get(i), values.get(i)); + } + data.add(row); + } + } + } finally { + reader.close(); + } + return data; + } + + private List parseCSVLine(String line) { + List result = new ArrayList(); + StringBuffer current = new StringBuffer(); + boolean inQuotes = false; + for (int i = 0; i < line.length(); i++) { + char c = line.charAt(i); + if (c == '"') { + inQuotes = !inQuotes; + } else if (c == ',' && !inQuotes) { + result.add(current.toString()); + current = new StringBuffer(); + } else { + current.append(c); + } + } + result.add(current.toString()); + return result; + } +} + +class CrawlerController { + private ConsoleView view; + private DataRepository repository; + private boolean running; + + public CrawlerController(ConsoleView view, DataRepository repository) { + this.view = view; + this.repository = repository; + this.running = true; + } + + public void start() { + view.showWelcome(); + } + + public void showMenu() { + view.showMenu(); + } + + public void showHelp() { + view.showHelp(); + } + + public void crawl(int platform) throws Exception { + if (platform == 1) crawlWeather(); + else if (platform == 2) crawlEarthquake(); + else if (platform == 3) crawlNews(); + else if (platform == 4) { crawlWeather(); crawlEarthquake(); crawlNews(); } + else view.showError("Invalid selection"); + } + + private void crawlWeather() throws Exception { + view.showMessage("Crawling weather data..."); + CrawlStrategy strategy = new ChangshaWeatherStrategy(); + List papers = strategy.crawl(); + List headers = Arrays.asList("Date", "Weather", "HighTemp", "LowTemp", "Wind"); + repository.saveToCSV(papers, strategy.getOutputFileName(), headers); + view.showMessage("Saved to " + strategy.getOutputFileName()); + } + + private void crawlEarthquake() throws Exception { + view.showMessage("Crawling earthquake data..."); + CrawlStrategy strategy = new EarthquakeStrategy(); + List papers = strategy.crawl(); + List headers = Arrays.asList("Time", "Magnitude", "Latitude", "Longitude", "Depth", "Location"); + repository.saveToCSV(papers, strategy.getOutputFileName(), headers); + view.showMessage("Saved to " + strategy.getOutputFileName()); + } + + private void crawlNews() throws Exception { + view.showMessage("Crawling news data..."); + CrawlStrategy strategy = new NewsRankStrategy(); + List papers = strategy.crawl(); + List headers = Arrays.asList("Rank", "Title", "HotIndex", "Link"); + repository.saveToCSV(papers, strategy.getOutputFileName(), headers); + view.showMessage("Saved to " + strategy.getOutputFileName()); + } + + public void listData() { + List files = repository.listDataFiles(); + view.showDataList(files); + } + + public void generateVisualizations() { + view.showMessage("Generating visualization pages..."); + try { + generateWeatherVisualization(); + generateEarthquakeVisualization(); + generateNewsVisualization(); + view.showMessage("Visualizations generated in visualization/ directory!"); + } catch (IOException e) { + view.showError("Failed: " + e.getMessage()); + } + } + + private void generateWeatherVisualization() throws IOException { + List data = repository.loadCSV("changsha_weather_2026.csv"); + File visDir = new File("visualization"); + if (!visDir.exists()) visDir.mkdirs(); + File file = new File("visualization", "weather.html"); + BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file), "UTF-8")); + try { + writer.write("\n\n\n\nChangsha Weather 2026\n\n

Changsha Weather 2026

"); + } finally { + writer.close(); + } + } + + private void generateEarthquakeVisualization() throws IOException { + List data = repository.loadCSV("earthquake_2026.csv"); + File file = new File("visualization", "earthquake.html"); + BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file), "UTF-8")); + try { + writer.write("\n\n\n\nEarthquake Data 2026\n\n

Earthquake Data 2026

"); + for (int i = 0; i < data.size(); i++) { + Map row = (Map) data.get(i); + writer.write(""); + } + writer.write("
TimeMagnitudeLocation
" + row.get("Time") + "" + row.get("Magnitude") + "" + row.get("Location") + "
"); + } finally { + writer.close(); + } + } + + private void generateNewsVisualization() throws IOException { + List data = repository.loadCSV("news_rank_202605.csv"); + File file = new File("visualization", "news.html"); + BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file), "UTF-8")); + try { + writer.write("\n\n\n\nNews Rank 2026\n\n

News Rank Top 10

News Details

"); + for (int i = 0; i < data.size(); i++) { + Map row = (Map) data.get(i); + writer.write("
" + row.get("Rank") + "" + row.get("Title") + "
"); + } + writer.write("
"); + } finally { + writer.close(); + } + } + + public void exit() { + this.running = false; + view.showGoodbye(); + } + + public boolean isRunning() { + return running; + } +} + diff --git a/project/input.txt b/project/input.txt new file mode 100644 index 0000000..06d7405 Binary files /dev/null and b/project/input.txt differ diff --git a/project/main.java b/project/main.java new file mode 100644 index 0000000..a0bf34c --- /dev/null +++ b/project/main.java @@ -0,0 +1,51 @@ + +import command.*; +import controller.CrawlerController; +import repository.DataRepository; +import view.ConsoleView; +import java.util.Scanner; + +public class Main { + public static void main(String[] args) { + ConsoleView view = new ConsoleView(); + DataRepository repository = new DataRepository(); + CrawlerController controller = new CrawlerController(view, repository); + + controller.start(); + Scanner scanner = new Scanner(System.in); + + while (controller.isRunning()) { + controller.showMenu(); + try { + String input = scanner.nextLine().trim().toLowerCase(); + Command command = parseCommand(input); + if (command != null) { + command.execute(controller); + } else { + view.showError("Invalid input"); + } + } catch (Exception e) { + view.showError("Error: " + e.getMessage()); + e.printStackTrace(); + } + } + scanner.close(); + } + + private static Command parseCommand(String input) { + if (input.equals("1")) return new CrawlCommand(1); + if (input.equals("2")) return new CrawlCommand(2); + if (input.equals("3")) return new CrawlCommand(3); + if (input.equals("4")) return new CrawlCommand(4); + if (input.equals("5")) return new ListCommand(); + if (input.equals("6")) return new Command() { + public void execute(CrawlerController controller) { + controller.generateVisualizations(); + } + }; + if (input.equals("h") || input.equals("help")) return new HelpCommand(); + if (input.equals("0")) return new ExitCommand(); + return null; + } +} +