java/Crawler/BookCrawler.java


								import org.jsoup.Jsoup;

								import org.jsoup.nodes.Document;

								import org.jsoup.nodes.Element;

								import org.jsoup.select.Elements;


								import java.io.FileOutputStream;

								import java.io.IOException;

								import java.io.OutputStreamWriter;

								import java.io.PrintWriter;

								import java.util.ArrayList;

								import java.util.List;

								//Bookable接口，约束图书必须提供书名、价格、原价、折扣、图片链接、作者

								interface Bookable {

								    String getTitle();

								    double getPrice();

								    double getOriginalPrice();

								    double getDiscount();

								    String getImageUrl();

								    String getAuthor();

								}

								//泛型爬虫接口，T必须是Bookable的子类，

								interface Crawler<T extends Bookable> {

								    List<T> crawlPage(String url) throws IOException;  //单页爬取

								    List<T> crawlAllPages(int startPage, int endPage);  //批量爬取

								    default void printResults(List<T> items) {

								        items.forEach(System.out::println);   //打印所有图书信息

								    }  //default能用但不强制用且可以override修改的方法

								}

								//定义图书抽象父类AbstractBook，实现Bookable接口

								abstract class AbstractBook implements Bookable {  //实现图书规范接口

								    protected String title, imageUrl, author;

								    protected double price, originalPrice, discount;


								    @Override public String getTitle() { return title; }  //get规定必须实现的方法，即返回图书书名

								    @Override public double getPrice() { return price; }

								    @Override public double getOriginalPrice() { return originalPrice; }

								    @Override public double getDiscount() { return discount; }

								    @Override public String getImageUrl() { return imageUrl; }

								    @Override public String getAuthor() { return author; }

								//格式化输出图书信息，方便控制台打印查看

								    @Override

								    public String toString() {

								        return String.format("Book{title='%s', price=%.2f, originalPrice=%.2f, discount=%.1f, author='%s'}",

								                title, price, originalPrice, discount, author);

								    }

								}

								//定义图书子类Book，继承AbstractBook,实现Bookable接口

								class Book extends AbstractBook {

								    public Book() {}

								    public Book(String title, double price, double originalPrice, double discount, String imageUrl, String author) {

								        this.title = title;  //把传入参数精准赋值给创建的图书对象的title

								        this.price = price;

								        this.originalPrice = originalPrice;

								        this.discount = discount;

								        this.imageUrl = imageUrl;

								        this.author = author;

								    }

								}

								//定义爬虫图书抽象父类AbstractCrawler,实现Crawler接口

								abstract class AbstractCrawler<T extends Bookable> implements Crawler<T> {

								    protected static final String BASE_URL = "http://bang.dangdang.com/books/bestsellers/%d";


								    @Override

								    public List<T> crawlAllPages(int startPage, int endPage) {

								        List<T> allItems = new ArrayList<>();

								        for (int page = startPage; page <= endPage; page++) {

								            try {

								                List<T> items = crawlPage(String.format(BASE_URL, page));

								                allItems.addAll(items);

								                System.out.println("Page " + page + ": " + items.size() + " items");

								            } catch (IOException e) {

								                System.err.println("Error: " + e.getMessage());

								            }

								        }

								        return allItems;

								    }


								    protected Document fetchDoc(String url) throws IOException {

								        return Jsoup.connect(url).timeout(10000).userAgent("Mozilla/5.0").get();

								    }


								    protected double parsePrice(String text) {

								        try { return Double.parseDouble(text.replaceAll("[^0-9.]", "")); }

								        catch (Exception e) { return 0.0; }

								    }


								    protected double parseDiscount(String text) {

								        try {

								            if (text.contains("折")) {

								                String discount = text.replace("折", "").replace("(", "").replace(")", "");

								                return Double.parseDouble(discount);

								            }

								        } catch (Exception e) {}

								        return 10.0;

								    }

								}

								//定义图书爬虫子类BookCrawler,继承AbstractCrawler<Book>,实现Crawler<Book>接口

								class BookCrawler extends AbstractCrawler<Book> {

								    @Override

								    public List<Book> crawlPage(String url) throws IOException {

								        List<Book> books = new ArrayList<>();

								        Document doc = fetchDoc(url);

								        Elements bookElements = doc.select("li");


								        for (Element e : bookElements) {

								            String title = e.select("a[title]").attr("title");

								            if (title == null || title.isEmpty() || title.length() < 10) continue;


								            String priceText = e.select("span.price_n").text();

								            if (priceText.isEmpty()) priceText = e.select(".price").text();

								            if (priceText.isEmpty()) priceText = e.select("[class*=price]").text();

								            if (priceText.isEmpty()) priceText = "0";


								            String originalPriceText = e.select("span.price_r").text();

								            if (originalPriceText.isEmpty()) originalPriceText = priceText;


								            String discountText = e.select("span.discount").text();

								            if (discountText.isEmpty()) discountText = e.select("[class*=discount]").text();


								            String imageUrl = e.select("img").attr("src");

								            if (imageUrl.isEmpty()) imageUrl = e.select("img").attr("data-original");


								            String author = e.select("span.author").text();

								            if (author.isEmpty()) author = e.select("[class*=author]").text();


								            Book book = new Book(

								                title,

								                parsePrice(priceText),

								                parsePrice(originalPriceText),

								                parseDiscount(discountText),

								                imageUrl,

								                author

								            );

								            books.add(book);

								        }

								        return books;

								    }

								//程序入口main方法，创建BookCrawler实例，爬取1-8页图书数据，保存到books_data.txt文件

								    public static void main(String[] args) {

								        Crawler<Book> crawler = new BookCrawler();

								        List<Book> books = crawler.crawlAllPages(1, 8);

								        saveToFile(books, "books_data.txt");

								        System.out.println("Total: " + books.size() + " books");

								    }

								//定义保存图书数据到文件的方法saveToFile,将图书数据保存到文件books_data.txt中

								    private static void saveToFile(List<Book> books, String filename) {

								        try (PrintWriter w = new PrintWriter(new OutputStreamWriter(new FileOutputStream(filename), "UTF-8"))) {

								            w.println("Title,Price,OriginalPrice,Discount,ImageUrl,Author");

								            books.forEach(b -> w.printf("%s,%.2f,%.2f,%.1f,%s,%s%n",

								                b.getTitle(), b.getPrice(), b.getOriginalPrice(),

								                b.getDiscount(), b.getImageUrl(), b.getAuthor()));

								        } catch (IOException e) {

								            System.err.println("Save error: " + e.getMessage());

								        }

								    }

								}