Compare commits

...

No commits in common. 'main' and 'master' have entirely different histories.
main ... master

  1. 28
      .gitignore
  2. BIN
      202529010113-陈全文-期末实验报告.docx
  3. 2
      README.md
  4. BIN
      W1-陈全文-202529010113/TemperatureConverter.class
  5. 47
      W1-陈全文-202529010113/TemperatureConverter.java
  6. 61
      output/books.json
  7. 1001
      output/countries.json
  8. 31
      output/quotes.json
  9. 76
      pom.xml
  10. 109
      src/main/java/com/scraper/Main.java
  11. 45
      src/main/java/com/scraper/command/CrawlAllCommand.java
  12. 29
      src/main/java/com/scraper/command/CrawlBooksCommand.java
  13. 29
      src/main/java/com/scraper/command/CrawlCountriesCommand.java
  14. 29
      src/main/java/com/scraper/command/CrawlQuotesCommand.java
  15. 7
      src/main/java/com/scraper/command/CrawlerCommand.java
  16. 7
      src/main/java/com/scraper/exception/CrawlerException.java
  17. 7
      src/main/java/com/scraper/exception/NetworkException.java
  18. 7
      src/main/java/com/scraper/exception/ParseException.java
  19. 7
      src/main/java/com/scraper/exception/StorageException.java
  20. 24
      src/main/java/com/scraper/model/Book.java
  21. 30
      src/main/java/com/scraper/model/Country.java
  22. 24
      src/main/java/com/scraper/model/Quote.java
  23. 8
      src/main/java/com/scraper/strategy/CrawlStrategy.java
  24. 51
      src/main/java/com/scraper/strategy/SiteABooksStrategy.java
  25. 50
      src/main/java/com/scraper/strategy/SiteBQuotesStrategy.java
  26. 51
      src/main/java/com/scraper/strategy/SiteCCountriesStrategy.java
  27. 26
      src/main/java/com/scraper/view/ConsoleView.java
  28. 31
      src/main/java/com/scraper/view/FileSaver.java
  29. BIN
      w2陈全文202529010113/DataCleaner.class
  30. 33
      w2陈全文202529010113/DataCleaner.java

28
.gitignore

@ -0,0 +1,28 @@
# Maven
target/
pom.xml.tag
pom.xml.releaseBackup
pom.xml.versionsBackup
pom.xml.next
release.properties
dependency-reduced-pom.xml
buildNumber.properties
.mvn/timing.properties
.mvn/wrapper/maven-wrapper.jar
# IDE
.idea/
*.iml
*.ipr
*.iws
.project
.classpath
.settings/
.vscode/
# OS
.DS_Store
Thumbs.db
# Logs
*.log

BIN
202529010113-陈全文-期末实验报告.docx

Binary file not shown.

2
README.md

@ -1,2 +0,0 @@
# java

BIN
W1-陈全文-202529010113/TemperatureConverter.class

Binary file not shown.

47
W1-陈全文-202529010113/TemperatureConverter.java

@ -1,47 +0,0 @@
/**
* 温度转换程序
* 支持摄氏度和华氏度的互相转换
* @author 陈全文
* @version 1.0
*/
import java.util.Scanner;
public class TemperatureConverter {
public static double celsiusToFahrenheit(double celsius) {
return celsius * 9.0 / 5.0 + 32.0;
}
public static double fahrenheitToCelsius(double fahrenheit) {
return (fahrenheit - 32.0) * 5.0 / 9.0;
}
public static void main(String[] args) {
Scanner scanner = new Scanner(System.in);
System.out.println("===== 温度转换程序 =====");
System.out.println("1. 摄氏度 -> 华氏度");
System.out.println("2. 华氏度 -> 摄氏度");
System.out.print("请选择转换类型 (1/2): ");
String choice = scanner.nextLine();
if (choice.equals("1")) {
System.out.print("请输入摄氏度: ");
double celsius = scanner.nextDouble();
double fahrenheit = celsiusToFahrenheit(celsius);
System.out.println(celsius + "°C = " + fahrenheit + "°F");
} else if (choice.equals("2")) {
System.out.print("请输入华氏度: ");
double fahrenheit = scanner.nextDouble();
double celsius = fahrenheitToCelsius(fahrenheit);
System.out.println(fahrenheit + "°F = " + celsius + "°C");
} else {
System.out.println("无效选择,请输入1或2");
}
scanner.close();
}
}

61
output/books.json

@ -0,0 +1,61 @@
[ {
"title" : "A Light in the Attic",
"price" : "51.77"
}, {
"title" : "Tipping the Velvet",
"price" : "53.74"
}, {
"title" : "Soumission",
"price" : "50.10"
}, {
"title" : "Sharp Objects",
"price" : "47.82"
}, {
"title" : "Sapiens: A Brief History of Humankind",
"price" : "54.23"
}, {
"title" : "The Requiem Red",
"price" : "22.65"
}, {
"title" : "The Dirty Little Secrets of Getting Your Dream Job",
"price" : "33.34"
}, {
"title" : "The Coming Woman: A Novel Based on the Life of the Infamous Feminist, Victoria Woodhull",
"price" : "17.93"
}, {
"title" : "The Boys in the Boat: Nine Americans and Their Epic Quest for Gold at the 1936 Berlin Olympics",
"price" : "22.60"
}, {
"title" : "The Black Maria",
"price" : "52.15"
}, {
"title" : "Starving Hearts (Triangular Trade Trilogy, #1)",
"price" : "13.99"
}, {
"title" : "Shakespeare's Sonnets",
"price" : "20.66"
}, {
"title" : "Set Me Free",
"price" : "17.46"
}, {
"title" : "Scott Pilgrim's Precious Little Life (Scott Pilgrim #1)",
"price" : "52.29"
}, {
"title" : "Rip it Up and Start Again",
"price" : "35.02"
}, {
"title" : "Our Band Could Be Your Life: Scenes from the American Indie Underground, 1981-1991",
"price" : "57.25"
}, {
"title" : "Olio",
"price" : "23.88"
}, {
"title" : "Mesaerion: The Best Science Fiction Stories 1800-1849",
"price" : "37.59"
}, {
"title" : "Libertarianism for Beginners",
"price" : "51.33"
}, {
"title" : "It's Only the Himalayas",
"price" : "45.17"
} ]

1001
output/countries.json

File diff suppressed because it is too large

31
output/quotes.json

@ -0,0 +1,31 @@
[ {
"text" : "“The world as we have created it is a process of our thinking. It cannot be changed without changing our thinking.”",
"author" : "Albert Einstein"
}, {
"text" : "“It is our choices, Harry, that show what we truly are, far more than our abilities.”",
"author" : "J.K. Rowling"
}, {
"text" : "“There are only two ways to live your life. One is as though nothing is a miracle. The other is as though everything is a miracle.”",
"author" : "Albert Einstein"
}, {
"text" : "“The person, be it gentleman or lady, who has not pleasure in a good novel, must be intolerably stupid.”",
"author" : "Jane Austen"
}, {
"text" : "“Imperfection is beauty, madness is genius and it's better to be absolutely ridiculous than absolutely boring.”",
"author" : "Marilyn Monroe"
}, {
"text" : "“Try not to become a man of success. Rather become a man of value.”",
"author" : "Albert Einstein"
}, {
"text" : "“It is better to be hated for what you are than to be loved for what you are not.”",
"author" : "André Gide"
}, {
"text" : "“I have not failed. I've just found 10,000 ways that won't work.”",
"author" : "Thomas A. Edison"
}, {
"text" : "“A woman is like a tea bag; you never know how strong it is until it's in hot water.”",
"author" : "Eleanor Roosevelt"
}, {
"text" : "“A day without sunshine is like, you know, night.”",
"author" : "Steve Martin"
} ]

76
pom.xml

@ -0,0 +1,76 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.scraper</groupId>
<artifactId>web-scraper</artifactId>
<version>1.0-SNAPSHOT</version>
<packaging>jar</packaging>
<name>Web Scraper</name>
<description>A web scraping application</description>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<maven.compiler.source>11</maven.compiler.source>
<maven.compiler.target>11</maven.compiler.target>
</properties>
<dependencies>
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.17.2</version>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents.client5</groupId>
<artifactId>httpclient5</artifactId>
<version>5.4.1</version>
</dependency>
<dependency>
<groupId>info.picocli</groupId>
<artifactId>picocli</artifactId>
<version>4.7.6</version>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
<version>2.17.2</version>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-core</artifactId>
<version>2.17.2</version>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-annotations</artifactId>
<version>2.17.2</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.13.0</version>
<configuration>
<source>11</source>
<target>11</target>
</configuration>
</plugin>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>exec-maven-plugin</artifactId>
<version>3.1.0</version>
<configuration>
<mainClass>com.scraper.Main</mainClass>
<commandlineArgs>--site all --output ./output</commandlineArgs>
</configuration>
</plugin>
</plugins>
</build>
</project>

109
src/main/java/com/scraper/Main.java

@ -0,0 +1,109 @@
package com.scraper;
import com.scraper.command.CrawlerCommand;
import com.scraper.command.CrawlAllCommand;
import com.scraper.command.CrawlBooksCommand;
import com.scraper.command.CrawlCountriesCommand;
import com.scraper.command.CrawlQuotesCommand;
import com.scraper.exception.CrawlerException;
import com.scraper.exception.NetworkException;
import com.scraper.exception.ParseException;
import com.scraper.exception.StorageException;
import com.scraper.strategy.SiteABooksStrategy;
import com.scraper.strategy.SiteBQuotesStrategy;
import com.scraper.strategy.SiteCCountriesStrategy;
import picocli.CommandLine;
import picocli.CommandLine.Command;
import picocli.CommandLine.Option;
/**
* 爬虫程序的主入口类使用 Picocli 实现命令行解析
* 支持爬取书籍名言国家信息或全部内容并保存为 JSON 文件
*/
@Command(name = "webscraper", mixinStandardHelpOptions = true, version = "1.0",
description = "网页爬虫程序,支持爬取书籍、名言和国家信息。")
public class Main implements Runnable {
/**
* 要爬取的网站类型可选值booksquotescountriesall不区分大小写默认为 all
*/
@Option(names = {"-s", "--site"}, description = "要爬取的网站类型:books、quotes、countries、all(默认:all)")
private String site = "all";
/**
* 输出目录默认为 "./output"
*/
@Option(names = {"-o", "--output"}, description = "输出目录(默认:./output)")
private String outputDir = "./output";
/**
* 主方法程序入口
* @param args 命令行参数
*/
public static void main(String[] args) {
int exitCode = new CommandLine(new Main()).execute(args);
System.exit(exitCode);
}
/**
* 执行业务逻辑
*/
@Override
public void run() {
try {
// 创建策略实例
SiteABooksStrategy booksStrategy = new SiteABooksStrategy();
SiteBQuotesStrategy quotesStrategy = new SiteBQuotesStrategy();
SiteCCountriesStrategy countriesStrategy = new SiteCCountriesStrategy();
CrawlerCommand command;
// 根据 --site 选项选择对应的命令
switch (site.toLowerCase()) {
case "books":
command = new CrawlBooksCommand(booksStrategy, outputDir);
break;
case "quotes":
command = new CrawlQuotesCommand(quotesStrategy, outputDir);
break;
case "countries":
command = new CrawlCountriesCommand(countriesStrategy, outputDir);
break;
case "all":
default:
command = new CrawlAllCommand(booksStrategy, quotesStrategy, countriesStrategy, outputDir);
break;
}
// 执行命令
command.execute();
} catch (NetworkException e) {
System.err.println("网络错误:" + e.getMessage());
if (e.getCause() != null) {
System.err.println("原因:" + e.getCause().getMessage());
}
System.exit(1);
} catch (ParseException e) {
System.err.println("解析失败:" + e.getMessage());
if (e.getCause() != null) {
System.err.println("原因:" + e.getCause().getMessage());
}
System.exit(1);
} catch (StorageException e) {
System.err.println("存储异常:" + e.getMessage());
if (e.getCause() != null) {
System.err.println("原因:" + e.getCause().getMessage());
}
System.exit(1);
} catch (CrawlerException e) {
System.err.println("爬取异常:" + e.getMessage());
if (e.getCause() != null) {
System.err.println("原因:" + e.getCause().getMessage());
}
System.exit(1);
}
}
}

45
src/main/java/com/scraper/command/CrawlAllCommand.java

@ -0,0 +1,45 @@
package com.scraper.command;
import com.scraper.exception.CrawlerException;
import com.scraper.model.Book;
import com.scraper.model.Country;
import com.scraper.model.Quote;
import com.scraper.strategy.SiteABooksStrategy;
import com.scraper.strategy.SiteBQuotesStrategy;
import com.scraper.strategy.SiteCCountriesStrategy;
import com.scraper.view.ConsoleView;
import com.scraper.view.FileSaver;
import java.util.List;
public class CrawlAllCommand implements CrawlerCommand {
private SiteABooksStrategy booksStrategy;
private SiteBQuotesStrategy quotesStrategy;
private SiteCCountriesStrategy countriesStrategy;
private String outputDir;
public CrawlAllCommand(SiteABooksStrategy booksStrategy, SiteBQuotesStrategy quotesStrategy, SiteCCountriesStrategy countriesStrategy, String outputDir) {
this.booksStrategy = booksStrategy;
this.quotesStrategy = quotesStrategy;
this.countriesStrategy = countriesStrategy;
this.outputDir = outputDir;
}
public CrawlAllCommand(SiteABooksStrategy booksStrategy, SiteBQuotesStrategy quotesStrategy, SiteCCountriesStrategy countriesStrategy) {
this(booksStrategy, quotesStrategy, countriesStrategy, "./output");
}
@Override
public void execute() throws CrawlerException {
List<Book> books = booksStrategy.crawl("http://books.toscrape.com");
ConsoleView.printBooks(books);
FileSaver.saveToJson(books, outputDir + "/books.json");
List<Quote> quotes = quotesStrategy.crawl("http://quotes.toscrape.com");
ConsoleView.printQuotes(quotes);
FileSaver.saveToJson(quotes, outputDir + "/quotes.json");
List<Country> countries = countriesStrategy.crawl("https://www.scrapethissite.com/pages/simple/");
ConsoleView.printCountries(countries);
FileSaver.saveToJson(countries, outputDir + "/countries.json");
}
}

29
src/main/java/com/scraper/command/CrawlBooksCommand.java

@ -0,0 +1,29 @@
package com.scraper.command;
import com.scraper.exception.CrawlerException;
import com.scraper.model.Book;
import com.scraper.strategy.SiteABooksStrategy;
import com.scraper.view.ConsoleView;
import com.scraper.view.FileSaver;
import java.util.List;
public class CrawlBooksCommand implements CrawlerCommand {
private SiteABooksStrategy strategy;
private String outputDir;
public CrawlBooksCommand(SiteABooksStrategy strategy, String outputDir) {
this.strategy = strategy;
this.outputDir = outputDir;
}
public CrawlBooksCommand(SiteABooksStrategy strategy) {
this(strategy, "./output");
}
@Override
public void execute() throws CrawlerException {
List<Book> books = strategy.crawl("http://books.toscrape.com");
ConsoleView.printBooks(books);
FileSaver.saveToJson(books, outputDir + "/books.json");
}
}

29
src/main/java/com/scraper/command/CrawlCountriesCommand.java

@ -0,0 +1,29 @@
package com.scraper.command;
import com.scraper.exception.CrawlerException;
import com.scraper.model.Country;
import com.scraper.strategy.SiteCCountriesStrategy;
import com.scraper.view.ConsoleView;
import com.scraper.view.FileSaver;
import java.util.List;
public class CrawlCountriesCommand implements CrawlerCommand {
private SiteCCountriesStrategy strategy;
private String outputDir;
public CrawlCountriesCommand(SiteCCountriesStrategy strategy, String outputDir) {
this.strategy = strategy;
this.outputDir = outputDir;
}
public CrawlCountriesCommand(SiteCCountriesStrategy strategy) {
this(strategy, "./output");
}
@Override
public void execute() throws CrawlerException {
List<Country> countries = strategy.crawl("https://www.scrapethissite.com/pages/simple/");
ConsoleView.printCountries(countries);
FileSaver.saveToJson(countries, outputDir + "/countries.json");
}
}

29
src/main/java/com/scraper/command/CrawlQuotesCommand.java

@ -0,0 +1,29 @@
package com.scraper.command;
import com.scraper.exception.CrawlerException;
import com.scraper.model.Quote;
import com.scraper.strategy.SiteBQuotesStrategy;
import com.scraper.view.ConsoleView;
import com.scraper.view.FileSaver;
import java.util.List;
public class CrawlQuotesCommand implements CrawlerCommand {
private SiteBQuotesStrategy strategy;
private String outputDir;
public CrawlQuotesCommand(SiteBQuotesStrategy strategy, String outputDir) {
this.strategy = strategy;
this.outputDir = outputDir;
}
public CrawlQuotesCommand(SiteBQuotesStrategy strategy) {
this(strategy, "./output");
}
@Override
public void execute() throws CrawlerException {
List<Quote> quotes = strategy.crawl("http://quotes.toscrape.com");
ConsoleView.printQuotes(quotes);
FileSaver.saveToJson(quotes, outputDir + "/quotes.json");
}
}

7
src/main/java/com/scraper/command/CrawlerCommand.java

@ -0,0 +1,7 @@
package com.scraper.command;
import com.scraper.exception.CrawlerException;
public interface CrawlerCommand {
void execute() throws CrawlerException;
}

7
src/main/java/com/scraper/exception/CrawlerException.java

@ -0,0 +1,7 @@
package com.scraper.exception;
public abstract class CrawlerException extends Exception {
public CrawlerException(String message, Throwable cause) {
super(message, cause);
}
}

7
src/main/java/com/scraper/exception/NetworkException.java

@ -0,0 +1,7 @@
package com.scraper.exception;
public class NetworkException extends CrawlerException {
public NetworkException(String message, Throwable cause) {
super(message, cause);
}
}

7
src/main/java/com/scraper/exception/ParseException.java

@ -0,0 +1,7 @@
package com.scraper.exception;
public class ParseException extends CrawlerException {
public ParseException(String message, Throwable cause) {
super(message, cause);
}
}

7
src/main/java/com/scraper/exception/StorageException.java

@ -0,0 +1,7 @@
package com.scraper.exception;
public class StorageException extends CrawlerException {
public StorageException(String message, Throwable cause) {
super(message, cause);
}
}

24
src/main/java/com/scraper/model/Book.java

@ -0,0 +1,24 @@
package com.scraper.model;
public class Book {
private String title;
private String price;
public Book(String title, String price) {
this.title = title;
this.price = price;
}
public String getTitle() {
return title;
}
public String getPrice() {
return price;
}
@Override
public String toString() {
return "Book{title='" + title + "', price='" + price + "'}";
}
}

30
src/main/java/com/scraper/model/Country.java

@ -0,0 +1,30 @@
package com.scraper.model;
public class Country {
private String name;
private String capital;
private String population;
public Country(String name, String capital, String population) {
this.name = name;
this.capital = capital;
this.population = population;
}
public String getName() {
return name;
}
public String getCapital() {
return capital;
}
public String getPopulation() {
return population;
}
@Override
public String toString() {
return "Country{name='" + name + "', capital='" + capital + "', population='" + population + "'}";
}
}

24
src/main/java/com/scraper/model/Quote.java

@ -0,0 +1,24 @@
package com.scraper.model;
public class Quote {
private String text;
private String author;
public Quote(String text, String author) {
this.text = text;
this.author = author;
}
public String getText() {
return text;
}
public String getAuthor() {
return author;
}
@Override
public String toString() {
return "Quote{text='" + text + "', author='" + author + "'}";
}
}

8
src/main/java/com/scraper/strategy/CrawlStrategy.java

@ -0,0 +1,8 @@
package com.scraper.strategy;
import com.scraper.exception.CrawlerException;
import java.util.List;
public interface CrawlStrategy<T> {
List<T> crawl(String url) throws CrawlerException;
}

51
src/main/java/com/scraper/strategy/SiteABooksStrategy.java

@ -0,0 +1,51 @@
package com.scraper.strategy;
import com.scraper.exception.CrawlerException;
import com.scraper.exception.NetworkException;
import com.scraper.exception.ParseException;
import com.scraper.model.Book;
import org.apache.hc.client5.http.classic.methods.HttpGet;
import org.apache.hc.client5.http.impl.classic.CloseableHttpClient;
import org.apache.hc.client5.http.impl.classic.CloseableHttpResponse;
import org.apache.hc.client5.http.impl.classic.HttpClients;
import org.apache.hc.core5.http.io.entity.EntityUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
public class SiteABooksStrategy implements CrawlStrategy<Book> {
@Override
public List<Book> crawl(String url) throws CrawlerException {
System.out.println("正在爬取 [http://books.toscrape.com]...");
List<Book> books = new ArrayList<>();
try (CloseableHttpClient httpClient = HttpClients.createDefault()) {
HttpGet httpGet = new HttpGet(url);
try (CloseableHttpResponse response = httpClient.execute(httpGet)) {
String html = EntityUtils.toString(response.getEntity());
Document doc = Jsoup.parse(html);
Elements productPods = doc.select(".product_pod");
for (Element pod : productPods) {
String title = pod.select("h3 > a").attr("title");
String priceText = pod.select(".price_color").text();
String price = priceText.replace("£", "");
books.add(new Book(title, price));
}
} catch (org.apache.hc.core5.http.ParseException e) {
throw new ParseException("解析响应内容失败", e);
}
} catch (IOException e) {
throw new NetworkException("网络请求失败", e);
}
return books;
}
}

50
src/main/java/com/scraper/strategy/SiteBQuotesStrategy.java

@ -0,0 +1,50 @@
package com.scraper.strategy;
import com.scraper.exception.CrawlerException;
import com.scraper.exception.NetworkException;
import com.scraper.exception.ParseException;
import com.scraper.model.Quote;
import org.apache.hc.client5.http.classic.methods.HttpGet;
import org.apache.hc.client5.http.impl.classic.CloseableHttpClient;
import org.apache.hc.client5.http.impl.classic.CloseableHttpResponse;
import org.apache.hc.client5.http.impl.classic.HttpClients;
import org.apache.hc.core5.http.io.entity.EntityUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
public class SiteBQuotesStrategy implements CrawlStrategy<Quote> {
@Override
public List<Quote> crawl(String url) throws CrawlerException {
System.out.println("正在爬取 [http://quotes.toscrape.com]...");
List<Quote> quotes = new ArrayList<>();
try (CloseableHttpClient httpClient = HttpClients.createDefault()) {
HttpGet httpGet = new HttpGet(url);
try (CloseableHttpResponse response = httpClient.execute(httpGet)) {
String html = EntityUtils.toString(response.getEntity());
Document doc = Jsoup.parse(html);
Elements quoteElements = doc.select(".quote");
for (Element quoteEl : quoteElements) {
String text = quoteEl.select(".text").text();
String author = quoteEl.select(".author").text();
quotes.add(new Quote(text, author));
}
} catch (org.apache.hc.core5.http.ParseException e) {
throw new ParseException("解析响应内容失败", e);
}
} catch (IOException e) {
throw new NetworkException("网络请求失败", e);
}
return quotes;
}
}

51
src/main/java/com/scraper/strategy/SiteCCountriesStrategy.java

@ -0,0 +1,51 @@
package com.scraper.strategy;
import com.scraper.exception.CrawlerException;
import com.scraper.exception.NetworkException;
import com.scraper.exception.ParseException;
import com.scraper.model.Country;
import org.apache.hc.client5.http.classic.methods.HttpGet;
import org.apache.hc.client5.http.impl.classic.CloseableHttpClient;
import org.apache.hc.client5.http.impl.classic.CloseableHttpResponse;
import org.apache.hc.client5.http.impl.classic.HttpClients;
import org.apache.hc.core5.http.io.entity.EntityUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
public class SiteCCountriesStrategy implements CrawlStrategy<Country> {
@Override
public List<Country> crawl(String url) throws CrawlerException {
System.out.println("正在爬取 [https://www.scrapethissite.com/pages/simple/]...");
List<Country> countries = new ArrayList<>();
try (CloseableHttpClient httpClient = HttpClients.createDefault()) {
HttpGet httpGet = new HttpGet(url);
try (CloseableHttpResponse response = httpClient.execute(httpGet)) {
String html = EntityUtils.toString(response.getEntity());
Document doc = Jsoup.parse(html);
Elements countryElements = doc.select(".country");
for (Element countryEl : countryElements) {
String name = countryEl.select(".country-name").text().trim();
String capital = countryEl.select(".country-capital").text().trim();
String population = countryEl.select(".country-population").text().trim();
countries.add(new Country(name, capital, population));
}
} catch (org.apache.hc.core5.http.ParseException e) {
throw new ParseException("解析响应内容失败", e);
}
} catch (IOException e) {
throw new NetworkException("网络请求失败", e);
}
return countries;
}
}

26
src/main/java/com/scraper/view/ConsoleView.java

@ -0,0 +1,26 @@
package com.scraper.view;
import com.scraper.model.Book;
import com.scraper.model.Country;
import com.scraper.model.Quote;
import java.util.List;
public class ConsoleView {
public static void printBooks(List<Book> books) {
for (Book book : books) {
System.out.println("书名: 《" + book.getTitle() + "》, 价格: £" + book.getPrice());
}
}
public static void printQuotes(List<Quote> quotes) {
for (Quote quote : quotes) {
System.out.println("\"" + quote.getText() + "\" —— " + quote.getAuthor());
}
}
public static void printCountries(List<Country> countries) {
for (Country country : countries) {
System.out.println("国家: " + country.getName() + ", 首都: " + country.getCapital() + ", 人口: " + country.getPopulation());
}
}
}

31
src/main/java/com/scraper/view/FileSaver.java

@ -0,0 +1,31 @@
package com.scraper.view;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.scraper.exception.StorageException;
import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.List;
import java.util.function.Function;
public class FileSaver {
public static void saveToJson(Object data, String filePath) throws StorageException {
try {
Path path = Paths.get(filePath);
Path parentDir = path.getParent();
if (parentDir != null) {
Files.createDirectories(parentDir);
}
ObjectMapper mapper = new ObjectMapper();
mapper.writerWithDefaultPrettyPrinter().writeValue(new File(filePath), data);
} catch (IOException e) {
throw new StorageException("无法写入 JSON 文件: " + filePath, e);
}
}
public static void saveToCsv(List<?> items, String filePath, String[] headers, Function<Object, String> rowMapper) throws StorageException {
throw new UnsupportedOperationException("CSV 保存功能暂未实现");
}
}

BIN
w2陈全文202529010113/DataCleaner.class

Binary file not shown.

33
w2陈全文202529010113/DataCleaner.java

@ -1,33 +0,0 @@
public class DataCleaner {
public static void main(String[] args) {
int[] sensorData = {85, -5, 92, 0, 105, 999, 88, 76};
int validSum = 0; // 有效数据总和
int validCount = 0; // 有效数据个数
// 遍历数组
for (int value : sensorData) {
if (value == 999) {
System.out.println("致命错误:传感器掉线,终止处理");
break;
}
if (value >= 1 && value <= 100) {
// 正常范围
validSum += value;
validCount++;
} else {
// 无效数据:0、负数或大于100(且不是999,因为999已提前处理)
System.out.println("警告:发现越界数据 [" + value + "],已跳过");
continue; // 可省略,但为符合题意显式写出
}
}
// 输出结果
if (validCount > 0) {
double average = (double) validSum / validCount;
System.out.println("有效数据的平均值为:" + average);
} else {
System.out.println("无有效数据");
}
}
}
Loading…
Cancel
Save