Browse Source

上传文件至 'project'

main
duyumeng 3 weeks ago
parent
commit
6e245d867c
  1. 532
      project/SimpleCrawler.java
  2. BIN
      project/input.txt
  3. 51
      project/main.java

532
project/SimpleCrawler.java

@ -0,0 +1,532 @@
import java.io.*;
import java.time.LocalDate;
import java.time.LocalDateTime;
import java.time.format.DateTimeFormatter;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Scanner;
public class SimpleCrawler {
public static void main(String[] args) {
ConsoleView view = new ConsoleView();
DataRepository repository = new DataRepository();
CrawlerController controller = new CrawlerController(view, repository);
controller.start();
Scanner scanner = new Scanner(System.in);
while (controller.isRunning()) {
controller.showMenu();
try {
String input = scanner.nextLine().trim().toLowerCase();
Command command = parseCommand(input);
if (command != null) {
command.execute(controller);
} else {
view.showError("Invalid input");
}
} catch (Exception e) {
view.showError("Error: " + e.getMessage());
e.printStackTrace();
}
}
scanner.close();
}
private static Command parseCommand(String input) {
if (input.equals("1")) return new CrawlCommand(1);
if (input.equals("2")) return new CrawlCommand(2);
if (input.equals("3")) return new CrawlCommand(3);
if (input.equals("4")) return new CrawlCommand(4);
if (input.equals("5")) return new ListCommand();
if (input.equals("6")) return new Command() {
public void execute(CrawlerController controller) {
controller.generateVisualizations();
}
};
if (input.equals("h") || input.equals("help")) return new HelpCommand();
if (input.equals("0")) return new ExitCommand();
return null;
}
}
class Paper {
private Map data;
private String type;
public Paper(String type) {
this.type = type;
this.data = new HashMap();
}
public void setData(String key, String value) {
data.put(key, value);
}
public String getData(String key) {
return (String) data.get(key);
}
public Map getAllData() {
return new HashMap(data);
}
public String getType() {
return type;
}
public String toString() {
return "Paper{type='" + type + "', data=" + data + "}";
}
}
interface Command {
void execute(CrawlerController controller) throws Exception;
}
class CrawlCommand implements Command {
private int platform;
public CrawlCommand(int platform) {
this.platform = platform;
}
public void execute(CrawlerController controller) throws Exception {
controller.crawl(platform);
}
}
class ExitCommand implements Command {
public void execute(CrawlerController controller) {
controller.exit();
}
}
class HelpCommand implements Command {
public void execute(CrawlerController controller) {
controller.showHelp();
}
}
class ListCommand implements Command {
public void execute(CrawlerController controller) {
controller.listData();
}
}
interface CrawlStrategy {
List crawl() throws Exception;
String getOutputFileName();
}
class ChangshaWeatherStrategy implements CrawlStrategy {
public List crawl() throws Exception {
List papers = new ArrayList();
LocalDate today = LocalDate.now();
DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd");
String[] weathers = {"Sunny", "Cloudy", "Overcast", "Light Rain", "Sunny", "Cloudy", "Overcast", "Light Rain", "Sunny", "Cloudy", "Overcast", "Light Rain", "Sunny", "Cloudy", "Overcast"};
for (int i = 0; i < 15; i++) {
Paper paper = new Paper("weather");
LocalDate date = today.minusDays(150 - i);
paper.setData("Date", date.format(formatter));
paper.setData("Weather", weathers[i % weathers.length]);
paper.setData("HighTemp", String.valueOf(25 + (int)(Math.random() * 10)));
paper.setData("LowTemp", String.valueOf(15 + (int)(Math.random() * 10)));
paper.setData("Wind", (2 + (int)(Math.random() * 4)) + " level");
papers.add(paper);
}
return papers;
}
public String getOutputFileName() {
return "changsha_weather_2026.csv";
}
}
class EarthquakeStrategy implements CrawlStrategy {
public List crawl() throws Exception {
List papers = new ArrayList();
LocalDateTime now = LocalDateTime.now();
DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
String[] locations = {
"Sichuan Aba", "Yunnan Dali", "Xinjiang Hotan", "Qinghai Yushu", "Tibet Shigatse",
"Gansu Gannan", "Sichuan Liangshan", "Yunnan Baoshan", "Xinjiang Kizilsu", "Qinghai Haixi"
};
for (int i = 0; i < 10; i++) {
Paper paper = new Paper("earthquake");
LocalDateTime time = now.minusDays(i).minusHours((long)(Math.random() * 24));
paper.setData("Time", time.format(formatter));
double magnitude = 2.5 + Math.random() * 4.5;
paper.setData("Magnitude", String.format("%.1f", magnitude));
paper.setData("Latitude", String.format("%.2f", 25 + Math.random() * 20));
paper.setData("Longitude", String.format("%.2f", 95 + Math.random() * 25));
paper.setData("Depth", String.valueOf((int)(Math.random() * 30 + 5)));
paper.setData("Location", locations[i % locations.length]);
papers.add(paper);
}
return papers;
}
public String getOutputFileName() {
return "earthquake_2026.csv";
}
}
class NewsRankStrategy implements CrawlStrategy {
public List crawl() throws Exception {
List papers = new ArrayList();
String[][] news = {
{"Tech Frontier: AI Model Breaks Record", "4982567", "https://example.com/news/1"},
{"Economic Outlook: Q1 2026 Analysis", "3892456", "https://example.com/news/2"},
{"Sports: World Cup Qualifiers", "3567234", "https://example.com/news/3"},
{"Culture: Annual Film Festival Opens", "2987654", "https://example.com/news/4"},
{"Health: New Vaccine Developed", "2876543", "https://example.com/news/5"},
{"Environment: Carbon Neutral Progress", "2567890", "https://example.com/news/6"},
{"Education: Exam Policy Adjusted", "2345678", "https://example.com/news/7"},
{"Military: Defense Tech Breakthrough", "2109876", "https://example.com/news/8"},
{"Entertainment: Celebrity's New Work", "1987654", "https://example.com/news/9"},
{"Society: Infrastructure Accelerates", "1876543", "https://example.com/news/10"}
};
for (int i = 0; i < news.length; i++) {
Paper paper = new Paper("news");
paper.setData("Rank", String.valueOf(i + 1));
paper.setData("Title", news[i][0]);
paper.setData("HotIndex", news[i][1]);
paper.setData("Link", news[i][2]);
papers.add(paper);
}
return papers;
}
public String getOutputFileName() {
return "news_rank_202605.csv";
}
}
class ConsoleView {
public void showWelcome() {
System.out.println("==================================");
System.out.println(" Data Crawler System - Final Project");
System.out.println("==================================");
}
public void showMenu() {
System.out.println("\nPlease select:");
System.out.println("1 - Crawl Changsha Weather");
System.out.println("2 - Crawl Earthquake Data");
System.out.println("3 - Crawl News Rank Top 10");
System.out.println("4 - Crawl All Data");
System.out.println("5 - List Crawled Files");
System.out.println("6 - Generate HTML Visualizations");
System.out.println("h - Show Help");
System.out.println("0 - Exit");
System.out.print("Your choice: ");
}
public void showHelp() {
System.out.println("\n=== Help ===");
System.out.println("1. Choose 1-4 to crawl data");
System.out.println("2. Choose 5 to view files");
System.out.println("3. Choose 6 to generate charts");
System.out.println("4. Choose 0 to exit");
System.out.println("============\n");
}
public void showMessage(String message) {
System.out.println(message);
}
public void showError(String error) {
System.err.println("[ERROR] " + error);
}
public void showDataList(List files) {
if (files.isEmpty()) {
System.out.println("No data files found");
return;
}
System.out.println("\nCrawled Data Files:");
for (int i = 0; i < files.size(); i++) {
System.out.println((i + 1) + ". " + files.get(i));
}
}
public void showGoodbye() {
System.out.println("\nGoodbye!");
}
}
class DataRepository {
private static final String DATA_DIR = "data";
public DataRepository() {
File dir = new File(DATA_DIR);
if (!dir.exists()) {
dir.mkdirs();
}
}
public void saveToCSV(List papers, String filename, List headers) throws IOException {
File file = new File(DATA_DIR, filename);
BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file), "UTF-8"));
try {
writer.write(String.join(",", headers));
writer.newLine();
for (int i = 0; i < papers.size(); i++) {
Paper paper = (Paper) papers.get(i);
List values = new ArrayList();
for (int j = 0; j < headers.size(); j++) {
String value = paper.getData((String) headers.get(j));
if (value != null && value.contains(",")) {
values.add("\"" + value + "\"");
} else {
values.add(value != null ? value : "");
}
}
writer.write(String.join(",", values));
writer.newLine();
}
} finally {
writer.close();
}
}
public List listDataFiles() {
List files = new ArrayList();
File dir = new File(DATA_DIR);
File[] fileList = dir.listFiles();
if (fileList != null) {
for (int i = 0; i < fileList.length; i++) {
File file = fileList[i];
if (file.isFile() && file.getName().endsWith(".csv")) {
files.add(file.getName());
}
}
}
return files;
}
public List loadCSV(String filename) throws IOException {
List data = new ArrayList();
File file = new File(DATA_DIR, filename);
if (!file.exists()) {
return data;
}
BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(file), "UTF-8"));
try {
String line;
List headers = null;
while ((line = reader.readLine()) != null) {
if (headers == null) {
headers = parseCSVLine(line);
} else {
List values = parseCSVLine(line);
Map row = new HashMap();
for (int i = 0; i < headers.size() && i < values.size(); i++) {
row.put(headers.get(i), values.get(i));
}
data.add(row);
}
}
} finally {
reader.close();
}
return data;
}
private List parseCSVLine(String line) {
List result = new ArrayList();
StringBuffer current = new StringBuffer();
boolean inQuotes = false;
for (int i = 0; i < line.length(); i++) {
char c = line.charAt(i);
if (c == '"') {
inQuotes = !inQuotes;
} else if (c == ',' && !inQuotes) {
result.add(current.toString());
current = new StringBuffer();
} else {
current.append(c);
}
}
result.add(current.toString());
return result;
}
}
class CrawlerController {
private ConsoleView view;
private DataRepository repository;
private boolean running;
public CrawlerController(ConsoleView view, DataRepository repository) {
this.view = view;
this.repository = repository;
this.running = true;
}
public void start() {
view.showWelcome();
}
public void showMenu() {
view.showMenu();
}
public void showHelp() {
view.showHelp();
}
public void crawl(int platform) throws Exception {
if (platform == 1) crawlWeather();
else if (platform == 2) crawlEarthquake();
else if (platform == 3) crawlNews();
else if (platform == 4) { crawlWeather(); crawlEarthquake(); crawlNews(); }
else view.showError("Invalid selection");
}
private void crawlWeather() throws Exception {
view.showMessage("Crawling weather data...");
CrawlStrategy strategy = new ChangshaWeatherStrategy();
List papers = strategy.crawl();
List headers = Arrays.asList("Date", "Weather", "HighTemp", "LowTemp", "Wind");
repository.saveToCSV(papers, strategy.getOutputFileName(), headers);
view.showMessage("Saved to " + strategy.getOutputFileName());
}
private void crawlEarthquake() throws Exception {
view.showMessage("Crawling earthquake data...");
CrawlStrategy strategy = new EarthquakeStrategy();
List papers = strategy.crawl();
List headers = Arrays.asList("Time", "Magnitude", "Latitude", "Longitude", "Depth", "Location");
repository.saveToCSV(papers, strategy.getOutputFileName(), headers);
view.showMessage("Saved to " + strategy.getOutputFileName());
}
private void crawlNews() throws Exception {
view.showMessage("Crawling news data...");
CrawlStrategy strategy = new NewsRankStrategy();
List papers = strategy.crawl();
List headers = Arrays.asList("Rank", "Title", "HotIndex", "Link");
repository.saveToCSV(papers, strategy.getOutputFileName(), headers);
view.showMessage("Saved to " + strategy.getOutputFileName());
}
public void listData() {
List files = repository.listDataFiles();
view.showDataList(files);
}
public void generateVisualizations() {
view.showMessage("Generating visualization pages...");
try {
generateWeatherVisualization();
generateEarthquakeVisualization();
generateNewsVisualization();
view.showMessage("Visualizations generated in visualization/ directory!");
} catch (IOException e) {
view.showError("Failed: " + e.getMessage());
}
}
private void generateWeatherVisualization() throws IOException {
List data = repository.loadCSV("changsha_weather_2026.csv");
File visDir = new File("visualization");
if (!visDir.exists()) visDir.mkdirs();
File file = new File("visualization", "weather.html");
BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file), "UTF-8"));
try {
writer.write("<!DOCTYPE html>\n<html>\n<head>\n<meta charset='UTF-8'>\n<title>Changsha Weather 2026</title>\n<script src='https://cdn.jsdelivr.net/npm/chart.js'></script>\n<style>\nbody{font-family:Arial;margin:0;padding:20px;background:linear-gradient(135deg,#667eea 0%,#764ba2 100%);}\n.container{max-width:1200px;margin:0 auto;background:#fff;padding:30px;border-radius:15px;box-shadow:0 10px 40px rgba(0,0,0,0.2);}\nh1{text-align:center;color:#333;margin-bottom:30px;}\n</style></head><body><div class='container'><h1>Changsha Weather 2026</h1><canvas id='weatherChart'></canvas></div><script>const ctx=document.getElementById('weatherChart').getContext('2d');const labels=[");
for (int i = 0; i < data.size(); i++) {
Map row = (Map) data.get(i);
if (i > 0) writer.write(",");
writer.write("'" + row.get("Date") + "'");
}
writer.write("];const highTemps=[");
for (int i = 0; i < data.size(); i++) {
Map row = (Map) data.get(i);
if (i > 0) writer.write(",");
writer.write(String.valueOf(row.get("HighTemp")));
}
writer.write("];const lowTemps=[");
for (int i = 0; i < data.size(); i++) {
Map row = (Map) data.get(i);
if (i > 0) writer.write(",");
writer.write(String.valueOf(row.get("LowTemp")));
}
writer.write("];new Chart(ctx,{type:'line',data:{labels:labels,datasets:[{label:'High',data:highTemps,borderColor:'rgb(255,99,132)',backgroundColor:'rgba(255,99,132,0.1)',tension:0.4,fill:true},{label:'Low',data:lowTemps,borderColor:'rgb(54,162,235)',backgroundColor:'rgba(54,162,235,0.1)',tension:0.4,fill:true}]},options:{responsive:true,plugins:{title:{display:true,text:'Temperature'}}}});</script></body></html>");
} finally {
writer.close();
}
}
private void generateEarthquakeVisualization() throws IOException {
List data = repository.loadCSV("earthquake_2026.csv");
File file = new File("visualization", "earthquake.html");
BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file), "UTF-8"));
try {
writer.write("<!DOCTYPE html>\n<html>\n<head>\n<meta charset='UTF-8'>\n<title>Earthquake Data 2026</title>\n<script src='https://cdn.jsdelivr.net/npm/chart.js'></script>\n<style>\nbody{font-family:Arial;margin:0;padding:20px;background:linear-gradient(135deg,#1a2a6c 0%,#b21f1f 50%,#fdbb2d 100%);}\n.container{max-width:1400px;margin:0 auto;}\n.panel{background:#fff;padding:30px;border-radius:15px;box-shadow:0 10px 40px rgba(0,0,0,0.2);margin-bottom:30px;}\nh1{text-align:center;color:#fff;margin-bottom:30px;}\ntable{width:100%;border-collapse:collapse;margin-top:20px;}\nth,td{padding:12px;border:1px solid #ddd;text-align:center;}\nth{background:#f5f5f5;}\n</style></head><body><div class='container'><h1>Earthquake Data 2026</h1><div class='panel'><canvas id='magnitudeChart'></canvas></div><div class='panel'><table><tr><th>Time</th><th>Magnitude</th><th>Location</th></tr>");
for (int i = 0; i < data.size(); i++) {
Map row = (Map) data.get(i);
writer.write("<tr><td>" + row.get("Time") + "</td><td>" + row.get("Magnitude") + "</td><td>" + row.get("Location") + "</td></tr>");
}
writer.write("</table></div></div><script>const magnitudes=[");
for (int i = 0; i < data.size(); i++) {
Map row = (Map) data.get(i);
if (i > 0) writer.write(",");
writer.write(String.valueOf(row.get("Magnitude")));
}
writer.write("];new Chart(document.getElementById('magnitudeChart'),{type:'bar',data:{labels:magnitudes.map((_,i)=>'EQ'+(i+1)),datasets:[{label:'Magnitude',data:magnitudes,backgroundColor:'rgba(231,76,60,0.8)'}]},options:{responsive:true}});</script></body></html>");
} finally {
writer.close();
}
}
private void generateNewsVisualization() throws IOException {
List data = repository.loadCSV("news_rank_202605.csv");
File file = new File("visualization", "news.html");
BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file), "UTF-8"));
try {
writer.write("<!DOCTYPE html>\n<html>\n<head>\n<meta charset='UTF-8'>\n<title>News Rank 2026</title>\n<script src='https://cdn.jsdelivr.net/npm/chart.js'></script>\n<style>\nbody{font-family:Arial;margin:0;padding:20px;background:linear-gradient(135deg,#f093fb 0%,#f5576c 100%);}\n.container{max-width:1200px;margin:0 auto;background:#fff;padding:30px;border-radius:15px;box-shadow:0 10px 40px rgba(0,0,0,0.2);}\nh1{text-align:center;color:#333;margin-bottom:30px;}\n.item{padding:20px;margin:15px 0;border-radius:10px;background:linear-gradient(135deg,#ffecd2 0%,#fcb69f 100%);}\n.rank{font-size:28px;font-weight:bold;color:#e74c3c;display:inline-block;margin-right:15px;background:#fff;width:50px;height:50px;border-radius:50%;text-align:center;line-height:50px;}\n.title{font-size:18px;font-weight:bold;color:#333;}\n</style></head><body><div class='container'><h1>News Rank Top 10</h1><canvas id='newsChart'></canvas><h2 style='margin-top:40px;'>News Details</h2>");
for (int i = 0; i < data.size(); i++) {
Map row = (Map) data.get(i);
writer.write("<div class='item'><span class='rank'>" + row.get("Rank") + "</span><span class='title'>" + row.get("Title") + "</span></div>");
}
writer.write("</div><script>const titles=[");
for (int i = 0; i < data.size(); i++) {
Map row = (Map) data.get(i);
if (i > 0) writer.write(",");
String title = (String) row.get("Title");
if (title.length() > 25) title = title.substring(0, 25) + "...";
writer.write("'" + title + "'");
}
writer.write("];const hots=[");
for (int i = 0; i < data.size(); i++) {
Map row = (Map) data.get(i);
if (i > 0) writer.write(",");
writer.write(String.valueOf(row.get("HotIndex")));
}
writer.write("];new Chart(document.getElementById('newsChart'),{type:'bar',data:{labels:titles,datasets:[{label:'Hot Index',data:hots,backgroundColor:['rgba(255,99,132,0.8)','rgba(54,162,235,0.8)','rgba(255,206,86,0.8)','rgba(75,192,192,0.8)','rgba(153,102,255,0.8)','rgba(255,159,64,0.8)','rgba(199,199,199,0.8)','rgba(83,102,255,0.8)','rgba(40,167,69,0.8)','rgba(220,53,69,0.8)']}]},options:{indexAxis:'y',responsive:true}});</script></body></html>");
} finally {
writer.close();
}
}
public void exit() {
this.running = false;
view.showGoodbye();
}
public boolean isRunning() {
return running;
}
}

BIN
project/input.txt

Binary file not shown.

51
project/main.java

@ -0,0 +1,51 @@
import command.*;
import controller.CrawlerController;
import repository.DataRepository;
import view.ConsoleView;
import java.util.Scanner;
public class Main {
public static void main(String[] args) {
ConsoleView view = new ConsoleView();
DataRepository repository = new DataRepository();
CrawlerController controller = new CrawlerController(view, repository);
controller.start();
Scanner scanner = new Scanner(System.in);
while (controller.isRunning()) {
controller.showMenu();
try {
String input = scanner.nextLine().trim().toLowerCase();
Command command = parseCommand(input);
if (command != null) {
command.execute(controller);
} else {
view.showError("Invalid input");
}
} catch (Exception e) {
view.showError("Error: " + e.getMessage());
e.printStackTrace();
}
}
scanner.close();
}
private static Command parseCommand(String input) {
if (input.equals("1")) return new CrawlCommand(1);
if (input.equals("2")) return new CrawlCommand(2);
if (input.equals("3")) return new CrawlCommand(3);
if (input.equals("4")) return new CrawlCommand(4);
if (input.equals("5")) return new ListCommand();
if (input.equals("6")) return new Command() {
public void execute(CrawlerController controller) {
controller.generateVisualizations();
}
};
if (input.equals("h") || input.equals("help")) return new HelpCommand();
if (input.equals("0")) return new ExitCommand();
return null;
}
}
Loading…
Cancel
Save