commit 8198b8c9b77d37f36b63aace8015142b189d0e6a
Author: 陈全文 <202529010113@innoschool.org>
Date: Sun May 31 16:42:14 2026 +0800
Initial commit: Web Scraper project with Maven
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..2a842fc
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,28 @@
+# Maven
+target/
+pom.xml.tag
+pom.xml.releaseBackup
+pom.xml.versionsBackup
+pom.xml.next
+release.properties
+dependency-reduced-pom.xml
+buildNumber.properties
+.mvn/timing.properties
+.mvn/wrapper/maven-wrapper.jar
+
+# IDE
+.idea/
+*.iml
+*.ipr
+*.iws
+.project
+.classpath
+.settings/
+.vscode/
+
+# OS
+.DS_Store
+Thumbs.db
+
+# Logs
+*.log
diff --git a/202529010113-陈全文-期末实验报告.docx b/202529010113-陈全文-期末实验报告.docx
new file mode 100644
index 0000000..9f0a2a5
Binary files /dev/null and b/202529010113-陈全文-期末实验报告.docx differ
diff --git a/output/books.json b/output/books.json
new file mode 100644
index 0000000..398bb97
--- /dev/null
+++ b/output/books.json
@@ -0,0 +1,61 @@
+[ {
+ "title" : "A Light in the Attic",
+ "price" : "51.77"
+}, {
+ "title" : "Tipping the Velvet",
+ "price" : "53.74"
+}, {
+ "title" : "Soumission",
+ "price" : "50.10"
+}, {
+ "title" : "Sharp Objects",
+ "price" : "47.82"
+}, {
+ "title" : "Sapiens: A Brief History of Humankind",
+ "price" : "54.23"
+}, {
+ "title" : "The Requiem Red",
+ "price" : "22.65"
+}, {
+ "title" : "The Dirty Little Secrets of Getting Your Dream Job",
+ "price" : "33.34"
+}, {
+ "title" : "The Coming Woman: A Novel Based on the Life of the Infamous Feminist, Victoria Woodhull",
+ "price" : "17.93"
+}, {
+ "title" : "The Boys in the Boat: Nine Americans and Their Epic Quest for Gold at the 1936 Berlin Olympics",
+ "price" : "22.60"
+}, {
+ "title" : "The Black Maria",
+ "price" : "52.15"
+}, {
+ "title" : "Starving Hearts (Triangular Trade Trilogy, #1)",
+ "price" : "13.99"
+}, {
+ "title" : "Shakespeare's Sonnets",
+ "price" : "20.66"
+}, {
+ "title" : "Set Me Free",
+ "price" : "17.46"
+}, {
+ "title" : "Scott Pilgrim's Precious Little Life (Scott Pilgrim #1)",
+ "price" : "52.29"
+}, {
+ "title" : "Rip it Up and Start Again",
+ "price" : "35.02"
+}, {
+ "title" : "Our Band Could Be Your Life: Scenes from the American Indie Underground, 1981-1991",
+ "price" : "57.25"
+}, {
+ "title" : "Olio",
+ "price" : "23.88"
+}, {
+ "title" : "Mesaerion: The Best Science Fiction Stories 1800-1849",
+ "price" : "37.59"
+}, {
+ "title" : "Libertarianism for Beginners",
+ "price" : "51.33"
+}, {
+ "title" : "It's Only the Himalayas",
+ "price" : "45.17"
+} ]
\ No newline at end of file
diff --git a/output/countries.json b/output/countries.json
new file mode 100644
index 0000000..83484e4
--- /dev/null
+++ b/output/countries.json
@@ -0,0 +1,1001 @@
+[ {
+ "name" : "Andorra",
+ "capital" : "Andorra la Vella",
+ "population" : "84000"
+}, {
+ "name" : "United Arab Emirates",
+ "capital" : "Abu Dhabi",
+ "population" : "4975593"
+}, {
+ "name" : "Afghanistan",
+ "capital" : "Kabul",
+ "population" : "29121286"
+}, {
+ "name" : "Antigua and Barbuda",
+ "capital" : "St. John's",
+ "population" : "86754"
+}, {
+ "name" : "Anguilla",
+ "capital" : "The Valley",
+ "population" : "13254"
+}, {
+ "name" : "Albania",
+ "capital" : "Tirana",
+ "population" : "2986952"
+}, {
+ "name" : "Armenia",
+ "capital" : "Yerevan",
+ "population" : "2968000"
+}, {
+ "name" : "Angola",
+ "capital" : "Luanda",
+ "population" : "13068161"
+}, {
+ "name" : "Antarctica",
+ "capital" : "None",
+ "population" : "0"
+}, {
+ "name" : "Argentina",
+ "capital" : "Buenos Aires",
+ "population" : "41343201"
+}, {
+ "name" : "American Samoa",
+ "capital" : "Pago Pago",
+ "population" : "57881"
+}, {
+ "name" : "Austria",
+ "capital" : "Vienna",
+ "population" : "8205000"
+}, {
+ "name" : "Australia",
+ "capital" : "Canberra",
+ "population" : "21515754"
+}, {
+ "name" : "Aruba",
+ "capital" : "Oranjestad",
+ "population" : "71566"
+}, {
+ "name" : "Åland",
+ "capital" : "Mariehamn",
+ "population" : "26711"
+}, {
+ "name" : "Azerbaijan",
+ "capital" : "Baku",
+ "population" : "8303512"
+}, {
+ "name" : "Bosnia and Herzegovina",
+ "capital" : "Sarajevo",
+ "population" : "4590000"
+}, {
+ "name" : "Barbados",
+ "capital" : "Bridgetown",
+ "population" : "285653"
+}, {
+ "name" : "Bangladesh",
+ "capital" : "Dhaka",
+ "population" : "156118464"
+}, {
+ "name" : "Belgium",
+ "capital" : "Brussels",
+ "population" : "10403000"
+}, {
+ "name" : "Burkina Faso",
+ "capital" : "Ouagadougou",
+ "population" : "16241811"
+}, {
+ "name" : "Bulgaria",
+ "capital" : "Sofia",
+ "population" : "7148785"
+}, {
+ "name" : "Bahrain",
+ "capital" : "Manama",
+ "population" : "738004"
+}, {
+ "name" : "Burundi",
+ "capital" : "Bujumbura",
+ "population" : "9863117"
+}, {
+ "name" : "Benin",
+ "capital" : "Porto-Novo",
+ "population" : "9056010"
+}, {
+ "name" : "Saint Barthélemy",
+ "capital" : "Gustavia",
+ "population" : "8450"
+}, {
+ "name" : "Bermuda",
+ "capital" : "Hamilton",
+ "population" : "65365"
+}, {
+ "name" : "Brunei",
+ "capital" : "Bandar Seri Begawan",
+ "population" : "395027"
+}, {
+ "name" : "Bolivia",
+ "capital" : "Sucre",
+ "population" : "9947418"
+}, {
+ "name" : "Bonaire",
+ "capital" : "Kralendijk",
+ "population" : "18012"
+}, {
+ "name" : "Brazil",
+ "capital" : "Brasília",
+ "population" : "201103330"
+}, {
+ "name" : "Bahamas",
+ "capital" : "Nassau",
+ "population" : "301790"
+}, {
+ "name" : "Bhutan",
+ "capital" : "Thimphu",
+ "population" : "699847"
+}, {
+ "name" : "Bouvet Island",
+ "capital" : "None",
+ "population" : "0"
+}, {
+ "name" : "Botswana",
+ "capital" : "Gaborone",
+ "population" : "2029307"
+}, {
+ "name" : "Belarus",
+ "capital" : "Minsk",
+ "population" : "9685000"
+}, {
+ "name" : "Belize",
+ "capital" : "Belmopan",
+ "population" : "314522"
+}, {
+ "name" : "Canada",
+ "capital" : "Ottawa",
+ "population" : "33679000"
+}, {
+ "name" : "Cocos [Keeling] Islands",
+ "capital" : "West Island",
+ "population" : "628"
+}, {
+ "name" : "Democratic Republic of the Congo",
+ "capital" : "Kinshasa",
+ "population" : "70916439"
+}, {
+ "name" : "Central African Republic",
+ "capital" : "Bangui",
+ "population" : "4844927"
+}, {
+ "name" : "Republic of the Congo",
+ "capital" : "Brazzaville",
+ "population" : "3039126"
+}, {
+ "name" : "Switzerland",
+ "capital" : "Bern",
+ "population" : "7581000"
+}, {
+ "name" : "Ivory Coast",
+ "capital" : "Yamoussoukro",
+ "population" : "21058798"
+}, {
+ "name" : "Cook Islands",
+ "capital" : "Avarua",
+ "population" : "21388"
+}, {
+ "name" : "Chile",
+ "capital" : "Santiago",
+ "population" : "16746491"
+}, {
+ "name" : "Cameroon",
+ "capital" : "Yaoundé",
+ "population" : "19294149"
+}, {
+ "name" : "China",
+ "capital" : "Beijing",
+ "population" : "1330044000"
+}, {
+ "name" : "Colombia",
+ "capital" : "Bogotá",
+ "population" : "47790000"
+}, {
+ "name" : "Costa Rica",
+ "capital" : "San José",
+ "population" : "4516220"
+}, {
+ "name" : "Cuba",
+ "capital" : "Havana",
+ "population" : "11423000"
+}, {
+ "name" : "Cape Verde",
+ "capital" : "Praia",
+ "population" : "508659"
+}, {
+ "name" : "Curacao",
+ "capital" : "Willemstad",
+ "population" : "141766"
+}, {
+ "name" : "Christmas Island",
+ "capital" : "Flying Fish Cove",
+ "population" : "1500"
+}, {
+ "name" : "Cyprus",
+ "capital" : "Nicosia",
+ "population" : "1102677"
+}, {
+ "name" : "Czech Republic",
+ "capital" : "Prague",
+ "population" : "10476000"
+}, {
+ "name" : "Germany",
+ "capital" : "Berlin",
+ "population" : "81802257"
+}, {
+ "name" : "Djibouti",
+ "capital" : "Djibouti",
+ "population" : "740528"
+}, {
+ "name" : "Denmark",
+ "capital" : "Copenhagen",
+ "population" : "5484000"
+}, {
+ "name" : "Dominica",
+ "capital" : "Roseau",
+ "population" : "72813"
+}, {
+ "name" : "Dominican Republic",
+ "capital" : "Santo Domingo",
+ "population" : "9823821"
+}, {
+ "name" : "Algeria",
+ "capital" : "Algiers",
+ "population" : "34586184"
+}, {
+ "name" : "Ecuador",
+ "capital" : "Quito",
+ "population" : "14790608"
+}, {
+ "name" : "Estonia",
+ "capital" : "Tallinn",
+ "population" : "1291170"
+}, {
+ "name" : "Egypt",
+ "capital" : "Cairo",
+ "population" : "80471869"
+}, {
+ "name" : "Western Sahara",
+ "capital" : "Laâyoune / El Aaiún",
+ "population" : "273008"
+}, {
+ "name" : "Eritrea",
+ "capital" : "Asmara",
+ "population" : "5792984"
+}, {
+ "name" : "Spain",
+ "capital" : "Madrid",
+ "population" : "46505963"
+}, {
+ "name" : "Ethiopia",
+ "capital" : "Addis Ababa",
+ "population" : "88013491"
+}, {
+ "name" : "Finland",
+ "capital" : "Helsinki",
+ "population" : "5244000"
+}, {
+ "name" : "Fiji",
+ "capital" : "Suva",
+ "population" : "875983"
+}, {
+ "name" : "Falkland Islands",
+ "capital" : "Stanley",
+ "population" : "2638"
+}, {
+ "name" : "Micronesia",
+ "capital" : "Palikir",
+ "population" : "107708"
+}, {
+ "name" : "Faroe Islands",
+ "capital" : "Tórshavn",
+ "population" : "48228"
+}, {
+ "name" : "France",
+ "capital" : "Paris",
+ "population" : "64768389"
+}, {
+ "name" : "Gabon",
+ "capital" : "Libreville",
+ "population" : "1545255"
+}, {
+ "name" : "United Kingdom",
+ "capital" : "London",
+ "population" : "62348447"
+}, {
+ "name" : "Grenada",
+ "capital" : "St. George's",
+ "population" : "107818"
+}, {
+ "name" : "Georgia",
+ "capital" : "Tbilisi",
+ "population" : "4630000"
+}, {
+ "name" : "French Guiana",
+ "capital" : "Cayenne",
+ "population" : "195506"
+}, {
+ "name" : "Guernsey",
+ "capital" : "St Peter Port",
+ "population" : "65228"
+}, {
+ "name" : "Ghana",
+ "capital" : "Accra",
+ "population" : "24339838"
+}, {
+ "name" : "Gibraltar",
+ "capital" : "Gibraltar",
+ "population" : "27884"
+}, {
+ "name" : "Greenland",
+ "capital" : "Nuuk",
+ "population" : "56375"
+}, {
+ "name" : "Gambia",
+ "capital" : "Bathurst",
+ "population" : "1593256"
+}, {
+ "name" : "Guinea",
+ "capital" : "Conakry",
+ "population" : "10324025"
+}, {
+ "name" : "Guadeloupe",
+ "capital" : "Basse-Terre",
+ "population" : "443000"
+}, {
+ "name" : "Equatorial Guinea",
+ "capital" : "Malabo",
+ "population" : "1014999"
+}, {
+ "name" : "Greece",
+ "capital" : "Athens",
+ "population" : "11000000"
+}, {
+ "name" : "South Georgia and the South Sandwich Islands",
+ "capital" : "Grytviken",
+ "population" : "30"
+}, {
+ "name" : "Guatemala",
+ "capital" : "Guatemala City",
+ "population" : "13550440"
+}, {
+ "name" : "Guam",
+ "capital" : "Hagåtña",
+ "population" : "159358"
+}, {
+ "name" : "Guinea-Bissau",
+ "capital" : "Bissau",
+ "population" : "1565126"
+}, {
+ "name" : "Guyana",
+ "capital" : "Georgetown",
+ "population" : "748486"
+}, {
+ "name" : "Hong Kong",
+ "capital" : "Hong Kong",
+ "population" : "6898686"
+}, {
+ "name" : "Heard Island and McDonald Islands",
+ "capital" : "None",
+ "population" : "0"
+}, {
+ "name" : "Honduras",
+ "capital" : "Tegucigalpa",
+ "population" : "7989415"
+}, {
+ "name" : "Croatia",
+ "capital" : "Zagreb",
+ "population" : "4491000"
+}, {
+ "name" : "Haiti",
+ "capital" : "Port-au-Prince",
+ "population" : "9648924"
+}, {
+ "name" : "Hungary",
+ "capital" : "Budapest",
+ "population" : "9982000"
+}, {
+ "name" : "Indonesia",
+ "capital" : "Jakarta",
+ "population" : "242968342"
+}, {
+ "name" : "Ireland",
+ "capital" : "Dublin",
+ "population" : "4622917"
+}, {
+ "name" : "Israel",
+ "capital" : "None",
+ "population" : "7353985"
+}, {
+ "name" : "Isle of Man",
+ "capital" : "Douglas",
+ "population" : "75049"
+}, {
+ "name" : "India",
+ "capital" : "New Delhi",
+ "population" : "1173108018"
+}, {
+ "name" : "British Indian Ocean Territory",
+ "capital" : "None",
+ "population" : "4000"
+}, {
+ "name" : "Iraq",
+ "capital" : "Baghdad",
+ "population" : "29671605"
+}, {
+ "name" : "Iran",
+ "capital" : "Tehran",
+ "population" : "76923300"
+}, {
+ "name" : "Iceland",
+ "capital" : "Reykjavik",
+ "population" : "308910"
+}, {
+ "name" : "Italy",
+ "capital" : "Rome",
+ "population" : "60340328"
+}, {
+ "name" : "Jersey",
+ "capital" : "Saint Helier",
+ "population" : "90812"
+}, {
+ "name" : "Jamaica",
+ "capital" : "Kingston",
+ "population" : "2847232"
+}, {
+ "name" : "Jordan",
+ "capital" : "Amman",
+ "population" : "6407085"
+}, {
+ "name" : "Japan",
+ "capital" : "Tokyo",
+ "population" : "127288000"
+}, {
+ "name" : "Kenya",
+ "capital" : "Nairobi",
+ "population" : "40046566"
+}, {
+ "name" : "Kyrgyzstan",
+ "capital" : "Bishkek",
+ "population" : "5776500"
+}, {
+ "name" : "Cambodia",
+ "capital" : "Phnom Penh",
+ "population" : "14453680"
+}, {
+ "name" : "Kiribati",
+ "capital" : "Tarawa",
+ "population" : "92533"
+}, {
+ "name" : "Comoros",
+ "capital" : "Moroni",
+ "population" : "773407"
+}, {
+ "name" : "Saint Kitts and Nevis",
+ "capital" : "Basseterre",
+ "population" : "51134"
+}, {
+ "name" : "North Korea",
+ "capital" : "Pyongyang",
+ "population" : "22912177"
+}, {
+ "name" : "South Korea",
+ "capital" : "Seoul",
+ "population" : "48422644"
+}, {
+ "name" : "Kuwait",
+ "capital" : "Kuwait City",
+ "population" : "2789132"
+}, {
+ "name" : "Cayman Islands",
+ "capital" : "George Town",
+ "population" : "44270"
+}, {
+ "name" : "Kazakhstan",
+ "capital" : "Astana",
+ "population" : "15340000"
+}, {
+ "name" : "Laos",
+ "capital" : "Vientiane",
+ "population" : "6368162"
+}, {
+ "name" : "Lebanon",
+ "capital" : "Beirut",
+ "population" : "4125247"
+}, {
+ "name" : "Saint Lucia",
+ "capital" : "Castries",
+ "population" : "160922"
+}, {
+ "name" : "Liechtenstein",
+ "capital" : "Vaduz",
+ "population" : "35000"
+}, {
+ "name" : "Sri Lanka",
+ "capital" : "Colombo",
+ "population" : "21513990"
+}, {
+ "name" : "Liberia",
+ "capital" : "Monrovia",
+ "population" : "3685076"
+}, {
+ "name" : "Lesotho",
+ "capital" : "Maseru",
+ "population" : "1919552"
+}, {
+ "name" : "Lithuania",
+ "capital" : "Vilnius",
+ "population" : "2944459"
+}, {
+ "name" : "Luxembourg",
+ "capital" : "Luxembourg",
+ "population" : "497538"
+}, {
+ "name" : "Latvia",
+ "capital" : "Riga",
+ "population" : "2217969"
+}, {
+ "name" : "Libya",
+ "capital" : "Tripoli",
+ "population" : "6461454"
+}, {
+ "name" : "Morocco",
+ "capital" : "Rabat",
+ "population" : "31627428"
+}, {
+ "name" : "Monaco",
+ "capital" : "Monaco",
+ "population" : "32965"
+}, {
+ "name" : "Moldova",
+ "capital" : "Chişinău",
+ "population" : "4324000"
+}, {
+ "name" : "Montenegro",
+ "capital" : "Podgorica",
+ "population" : "666730"
+}, {
+ "name" : "Saint Martin",
+ "capital" : "Marigot",
+ "population" : "35925"
+}, {
+ "name" : "Madagascar",
+ "capital" : "Antananarivo",
+ "population" : "21281844"
+}, {
+ "name" : "Marshall Islands",
+ "capital" : "Majuro",
+ "population" : "65859"
+}, {
+ "name" : "Macedonia",
+ "capital" : "Skopje",
+ "population" : "2062294"
+}, {
+ "name" : "Mali",
+ "capital" : "Bamako",
+ "population" : "13796354"
+}, {
+ "name" : "Myanmar [Burma]",
+ "capital" : "Naypyitaw",
+ "population" : "53414374"
+}, {
+ "name" : "Mongolia",
+ "capital" : "Ulan Bator",
+ "population" : "3086918"
+}, {
+ "name" : "Macao",
+ "capital" : "Macao",
+ "population" : "449198"
+}, {
+ "name" : "Northern Mariana Islands",
+ "capital" : "Saipan",
+ "population" : "53883"
+}, {
+ "name" : "Martinique",
+ "capital" : "Fort-de-France",
+ "population" : "432900"
+}, {
+ "name" : "Mauritania",
+ "capital" : "Nouakchott",
+ "population" : "3205060"
+}, {
+ "name" : "Montserrat",
+ "capital" : "Plymouth",
+ "population" : "9341"
+}, {
+ "name" : "Malta",
+ "capital" : "Valletta",
+ "population" : "403000"
+}, {
+ "name" : "Mauritius",
+ "capital" : "Port Louis",
+ "population" : "1294104"
+}, {
+ "name" : "Maldives",
+ "capital" : "Malé",
+ "population" : "395650"
+}, {
+ "name" : "Malawi",
+ "capital" : "Lilongwe",
+ "population" : "15447500"
+}, {
+ "name" : "Mexico",
+ "capital" : "Mexico City",
+ "population" : "112468855"
+}, {
+ "name" : "Malaysia",
+ "capital" : "Kuala Lumpur",
+ "population" : "28274729"
+}, {
+ "name" : "Mozambique",
+ "capital" : "Maputo",
+ "population" : "22061451"
+}, {
+ "name" : "Namibia",
+ "capital" : "Windhoek",
+ "population" : "2128471"
+}, {
+ "name" : "New Caledonia",
+ "capital" : "Noumea",
+ "population" : "216494"
+}, {
+ "name" : "Niger",
+ "capital" : "Niamey",
+ "population" : "15878271"
+}, {
+ "name" : "Norfolk Island",
+ "capital" : "Kingston",
+ "population" : "1828"
+}, {
+ "name" : "Nigeria",
+ "capital" : "Abuja",
+ "population" : "154000000"
+}, {
+ "name" : "Nicaragua",
+ "capital" : "Managua",
+ "population" : "5995928"
+}, {
+ "name" : "Netherlands",
+ "capital" : "Amsterdam",
+ "population" : "16645000"
+}, {
+ "name" : "Norway",
+ "capital" : "Oslo",
+ "population" : "5009150"
+}, {
+ "name" : "Nepal",
+ "capital" : "Kathmandu",
+ "population" : "28951852"
+}, {
+ "name" : "Nauru",
+ "capital" : "Yaren",
+ "population" : "10065"
+}, {
+ "name" : "Niue",
+ "capital" : "Alofi",
+ "population" : "2166"
+}, {
+ "name" : "New Zealand",
+ "capital" : "Wellington",
+ "population" : "4252277"
+}, {
+ "name" : "Oman",
+ "capital" : "Muscat",
+ "population" : "2967717"
+}, {
+ "name" : "Panama",
+ "capital" : "Panama City",
+ "population" : "3410676"
+}, {
+ "name" : "Peru",
+ "capital" : "Lima",
+ "population" : "29907003"
+}, {
+ "name" : "French Polynesia",
+ "capital" : "Papeete",
+ "population" : "270485"
+}, {
+ "name" : "Papua New Guinea",
+ "capital" : "Port Moresby",
+ "population" : "6064515"
+}, {
+ "name" : "Philippines",
+ "capital" : "Manila",
+ "population" : "99900177"
+}, {
+ "name" : "Pakistan",
+ "capital" : "Islamabad",
+ "population" : "184404791"
+}, {
+ "name" : "Poland",
+ "capital" : "Warsaw",
+ "population" : "38500000"
+}, {
+ "name" : "Saint Pierre and Miquelon",
+ "capital" : "Saint-Pierre",
+ "population" : "7012"
+}, {
+ "name" : "Pitcairn Islands",
+ "capital" : "Adamstown",
+ "population" : "46"
+}, {
+ "name" : "Puerto Rico",
+ "capital" : "San Juan",
+ "population" : "3916632"
+}, {
+ "name" : "Palestine",
+ "capital" : "None",
+ "population" : "3800000"
+}, {
+ "name" : "Portugal",
+ "capital" : "Lisbon",
+ "population" : "10676000"
+}, {
+ "name" : "Palau",
+ "capital" : "Melekeok",
+ "population" : "19907"
+}, {
+ "name" : "Paraguay",
+ "capital" : "Asunción",
+ "population" : "6375830"
+}, {
+ "name" : "Qatar",
+ "capital" : "Doha",
+ "population" : "840926"
+}, {
+ "name" : "Réunion",
+ "capital" : "Saint-Denis",
+ "population" : "776948"
+}, {
+ "name" : "Romania",
+ "capital" : "Bucharest",
+ "population" : "21959278"
+}, {
+ "name" : "Serbia",
+ "capital" : "Belgrade",
+ "population" : "7344847"
+}, {
+ "name" : "Russia",
+ "capital" : "Moscow",
+ "population" : "140702000"
+}, {
+ "name" : "Rwanda",
+ "capital" : "Kigali",
+ "population" : "11055976"
+}, {
+ "name" : "Saudi Arabia",
+ "capital" : "Riyadh",
+ "population" : "25731776"
+}, {
+ "name" : "Solomon Islands",
+ "capital" : "Honiara",
+ "population" : "559198"
+}, {
+ "name" : "Seychelles",
+ "capital" : "Victoria",
+ "population" : "88340"
+}, {
+ "name" : "Sudan",
+ "capital" : "Khartoum",
+ "population" : "35000000"
+}, {
+ "name" : "Sweden",
+ "capital" : "Stockholm",
+ "population" : "9828655"
+}, {
+ "name" : "Singapore",
+ "capital" : "Singapore",
+ "population" : "4701069"
+}, {
+ "name" : "Saint Helena",
+ "capital" : "Jamestown",
+ "population" : "7460"
+}, {
+ "name" : "Slovenia",
+ "capital" : "Ljubljana",
+ "population" : "2007000"
+}, {
+ "name" : "Svalbard and Jan Mayen",
+ "capital" : "Longyearbyen",
+ "population" : "2550"
+}, {
+ "name" : "Slovakia",
+ "capital" : "Bratislava",
+ "population" : "5455000"
+}, {
+ "name" : "Sierra Leone",
+ "capital" : "Freetown",
+ "population" : "5245695"
+}, {
+ "name" : "San Marino",
+ "capital" : "San Marino",
+ "population" : "31477"
+}, {
+ "name" : "Senegal",
+ "capital" : "Dakar",
+ "population" : "12323252"
+}, {
+ "name" : "Somalia",
+ "capital" : "Mogadishu",
+ "population" : "10112453"
+}, {
+ "name" : "Suriname",
+ "capital" : "Paramaribo",
+ "population" : "492829"
+}, {
+ "name" : "South Sudan",
+ "capital" : "Juba",
+ "population" : "8260490"
+}, {
+ "name" : "São Tomé and Príncipe",
+ "capital" : "São Tomé",
+ "population" : "175808"
+}, {
+ "name" : "El Salvador",
+ "capital" : "San Salvador",
+ "population" : "6052064"
+}, {
+ "name" : "Sint Maarten",
+ "capital" : "Philipsburg",
+ "population" : "37429"
+}, {
+ "name" : "Syria",
+ "capital" : "Damascus",
+ "population" : "22198110"
+}, {
+ "name" : "Swaziland",
+ "capital" : "Mbabane",
+ "population" : "1354051"
+}, {
+ "name" : "Turks and Caicos Islands",
+ "capital" : "Cockburn Town",
+ "population" : "20556"
+}, {
+ "name" : "Chad",
+ "capital" : "N'Djamena",
+ "population" : "10543464"
+}, {
+ "name" : "French Southern Territories",
+ "capital" : "Port-aux-Français",
+ "population" : "140"
+}, {
+ "name" : "Togo",
+ "capital" : "Lomé",
+ "population" : "6587239"
+}, {
+ "name" : "Thailand",
+ "capital" : "Bangkok",
+ "population" : "67089500"
+}, {
+ "name" : "Tajikistan",
+ "capital" : "Dushanbe",
+ "population" : "7487489"
+}, {
+ "name" : "Tokelau",
+ "capital" : "None",
+ "population" : "1466"
+}, {
+ "name" : "East Timor",
+ "capital" : "Dili",
+ "population" : "1154625"
+}, {
+ "name" : "Turkmenistan",
+ "capital" : "Ashgabat",
+ "population" : "4940916"
+}, {
+ "name" : "Tunisia",
+ "capital" : "Tunis",
+ "population" : "10589025"
+}, {
+ "name" : "Tonga",
+ "capital" : "Nuku'alofa",
+ "population" : "122580"
+}, {
+ "name" : "Turkey",
+ "capital" : "Ankara",
+ "population" : "77804122"
+}, {
+ "name" : "Trinidad and Tobago",
+ "capital" : "Port of Spain",
+ "population" : "1228691"
+}, {
+ "name" : "Tuvalu",
+ "capital" : "Funafuti",
+ "population" : "10472"
+}, {
+ "name" : "Taiwan",
+ "capital" : "Taipei",
+ "population" : "22894384"
+}, {
+ "name" : "Tanzania",
+ "capital" : "Dodoma",
+ "population" : "41892895"
+}, {
+ "name" : "Ukraine",
+ "capital" : "Kiev",
+ "population" : "45415596"
+}, {
+ "name" : "Uganda",
+ "capital" : "Kampala",
+ "population" : "33398682"
+}, {
+ "name" : "U.S. Minor Outlying Islands",
+ "capital" : "None",
+ "population" : "0"
+}, {
+ "name" : "United States",
+ "capital" : "Washington",
+ "population" : "310232863"
+}, {
+ "name" : "Uruguay",
+ "capital" : "Montevideo",
+ "population" : "3477000"
+}, {
+ "name" : "Uzbekistan",
+ "capital" : "Tashkent",
+ "population" : "27865738"
+}, {
+ "name" : "Vatican City",
+ "capital" : "Vatican City",
+ "population" : "921"
+}, {
+ "name" : "Saint Vincent and the Grenadines",
+ "capital" : "Kingstown",
+ "population" : "104217"
+}, {
+ "name" : "Venezuela",
+ "capital" : "Caracas",
+ "population" : "27223228"
+}, {
+ "name" : "British Virgin Islands",
+ "capital" : "Road Town",
+ "population" : "21730"
+}, {
+ "name" : "U.S. Virgin Islands",
+ "capital" : "Charlotte Amalie",
+ "population" : "108708"
+}, {
+ "name" : "Vietnam",
+ "capital" : "Hanoi",
+ "population" : "89571130"
+}, {
+ "name" : "Vanuatu",
+ "capital" : "Port Vila",
+ "population" : "221552"
+}, {
+ "name" : "Wallis and Futuna",
+ "capital" : "Mata-Utu",
+ "population" : "16025"
+}, {
+ "name" : "Samoa",
+ "capital" : "Apia",
+ "population" : "192001"
+}, {
+ "name" : "Kosovo",
+ "capital" : "Pristina",
+ "population" : "1800000"
+}, {
+ "name" : "Yemen",
+ "capital" : "Sanaa",
+ "population" : "23495361"
+}, {
+ "name" : "Mayotte",
+ "capital" : "Mamoudzou",
+ "population" : "159042"
+}, {
+ "name" : "South Africa",
+ "capital" : "Pretoria",
+ "population" : "49000000"
+}, {
+ "name" : "Zambia",
+ "capital" : "Lusaka",
+ "population" : "13460305"
+}, {
+ "name" : "Zimbabwe",
+ "capital" : "Harare",
+ "population" : "11651858"
+} ]
\ No newline at end of file
diff --git a/output/quotes.json b/output/quotes.json
new file mode 100644
index 0000000..42788c0
--- /dev/null
+++ b/output/quotes.json
@@ -0,0 +1,31 @@
+[ {
+ "text" : "“The world as we have created it is a process of our thinking. It cannot be changed without changing our thinking.”",
+ "author" : "Albert Einstein"
+}, {
+ "text" : "“It is our choices, Harry, that show what we truly are, far more than our abilities.”",
+ "author" : "J.K. Rowling"
+}, {
+ "text" : "“There are only two ways to live your life. One is as though nothing is a miracle. The other is as though everything is a miracle.”",
+ "author" : "Albert Einstein"
+}, {
+ "text" : "“The person, be it gentleman or lady, who has not pleasure in a good novel, must be intolerably stupid.”",
+ "author" : "Jane Austen"
+}, {
+ "text" : "“Imperfection is beauty, madness is genius and it's better to be absolutely ridiculous than absolutely boring.”",
+ "author" : "Marilyn Monroe"
+}, {
+ "text" : "“Try not to become a man of success. Rather become a man of value.”",
+ "author" : "Albert Einstein"
+}, {
+ "text" : "“It is better to be hated for what you are than to be loved for what you are not.”",
+ "author" : "André Gide"
+}, {
+ "text" : "“I have not failed. I've just found 10,000 ways that won't work.”",
+ "author" : "Thomas A. Edison"
+}, {
+ "text" : "“A woman is like a tea bag; you never know how strong it is until it's in hot water.”",
+ "author" : "Eleanor Roosevelt"
+}, {
+ "text" : "“A day without sunshine is like, you know, night.”",
+ "author" : "Steve Martin"
+} ]
\ No newline at end of file
diff --git a/pom.xml b/pom.xml
new file mode 100644
index 0000000..50b509b
--- /dev/null
+++ b/pom.xml
@@ -0,0 +1,76 @@
+
+
+ 4.0.0
+
+ com.scraper
+ web-scraper
+ 1.0-SNAPSHOT
+ jar
+
+ Web Scraper
+ A web scraping application
+
+
+ UTF-8
+ 11
+ 11
+
+
+
+
+ org.jsoup
+ jsoup
+ 1.17.2
+
+
+ org.apache.httpcomponents.client5
+ httpclient5
+ 5.4.1
+
+
+ info.picocli
+ picocli
+ 4.7.6
+
+
+ com.fasterxml.jackson.core
+ jackson-databind
+ 2.17.2
+
+
+ com.fasterxml.jackson.core
+ jackson-core
+ 2.17.2
+
+
+ com.fasterxml.jackson.core
+ jackson-annotations
+ 2.17.2
+
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-compiler-plugin
+ 3.13.0
+
+ 11
+ 11
+
+
+
+ org.codehaus.mojo
+ exec-maven-plugin
+ 3.1.0
+
+ com.scraper.Main
+ --site all --output ./output
+
+
+
+
+
\ No newline at end of file
diff --git a/src/main/java/com/scraper/Main.java b/src/main/java/com/scraper/Main.java
new file mode 100644
index 0000000..bea67e3
--- /dev/null
+++ b/src/main/java/com/scraper/Main.java
@@ -0,0 +1,109 @@
+package com.scraper;
+
+import com.scraper.command.CrawlerCommand;
+import com.scraper.command.CrawlAllCommand;
+import com.scraper.command.CrawlBooksCommand;
+import com.scraper.command.CrawlCountriesCommand;
+import com.scraper.command.CrawlQuotesCommand;
+import com.scraper.exception.CrawlerException;
+import com.scraper.exception.NetworkException;
+import com.scraper.exception.ParseException;
+import com.scraper.exception.StorageException;
+import com.scraper.strategy.SiteABooksStrategy;
+import com.scraper.strategy.SiteBQuotesStrategy;
+import com.scraper.strategy.SiteCCountriesStrategy;
+import picocli.CommandLine;
+import picocli.CommandLine.Command;
+import picocli.CommandLine.Option;
+
+
+
+/**
+ * 爬虫程序的主入口类,使用 Picocli 实现命令行解析。
+ * 支持爬取书籍、名言、国家信息或全部内容,并保存为 JSON 文件。
+ */
+@Command(name = "webscraper", mixinStandardHelpOptions = true, version = "1.0",
+ description = "网页爬虫程序,支持爬取书籍、名言和国家信息。")
+public class Main implements Runnable {
+
+ /**
+ * 要爬取的网站类型,可选值:books、quotes、countries、all(不区分大小写),默认为 all
+ */
+ @Option(names = {"-s", "--site"}, description = "要爬取的网站类型:books、quotes、countries、all(默认:all)")
+ private String site = "all";
+
+ /**
+ * 输出目录,默认为 "./output"
+ */
+ @Option(names = {"-o", "--output"}, description = "输出目录(默认:./output)")
+ private String outputDir = "./output";
+
+ /**
+ * 主方法,程序入口
+ * @param args 命令行参数
+ */
+ public static void main(String[] args) {
+ int exitCode = new CommandLine(new Main()).execute(args);
+ System.exit(exitCode);
+ }
+
+ /**
+ * 执行业务逻辑
+ */
+ @Override
+ public void run() {
+ try {
+ // 创建策略实例
+ SiteABooksStrategy booksStrategy = new SiteABooksStrategy();
+ SiteBQuotesStrategy quotesStrategy = new SiteBQuotesStrategy();
+ SiteCCountriesStrategy countriesStrategy = new SiteCCountriesStrategy();
+
+ CrawlerCommand command;
+
+ // 根据 --site 选项选择对应的命令
+ switch (site.toLowerCase()) {
+ case "books":
+ command = new CrawlBooksCommand(booksStrategy, outputDir);
+ break;
+ case "quotes":
+ command = new CrawlQuotesCommand(quotesStrategy, outputDir);
+ break;
+ case "countries":
+ command = new CrawlCountriesCommand(countriesStrategy, outputDir);
+ break;
+ case "all":
+ default:
+ command = new CrawlAllCommand(booksStrategy, quotesStrategy, countriesStrategy, outputDir);
+ break;
+ }
+
+ // 执行命令
+ command.execute();
+
+ } catch (NetworkException e) {
+ System.err.println("网络错误:" + e.getMessage());
+ if (e.getCause() != null) {
+ System.err.println("原因:" + e.getCause().getMessage());
+ }
+ System.exit(1);
+ } catch (ParseException e) {
+ System.err.println("解析失败:" + e.getMessage());
+ if (e.getCause() != null) {
+ System.err.println("原因:" + e.getCause().getMessage());
+ }
+ System.exit(1);
+ } catch (StorageException e) {
+ System.err.println("存储异常:" + e.getMessage());
+ if (e.getCause() != null) {
+ System.err.println("原因:" + e.getCause().getMessage());
+ }
+ System.exit(1);
+ } catch (CrawlerException e) {
+ System.err.println("爬取异常:" + e.getMessage());
+ if (e.getCause() != null) {
+ System.err.println("原因:" + e.getCause().getMessage());
+ }
+ System.exit(1);
+ }
+ }
+}
diff --git a/src/main/java/com/scraper/command/CrawlAllCommand.java b/src/main/java/com/scraper/command/CrawlAllCommand.java
new file mode 100644
index 0000000..9f59f64
--- /dev/null
+++ b/src/main/java/com/scraper/command/CrawlAllCommand.java
@@ -0,0 +1,45 @@
+package com.scraper.command;
+
+import com.scraper.exception.CrawlerException;
+import com.scraper.model.Book;
+import com.scraper.model.Country;
+import com.scraper.model.Quote;
+import com.scraper.strategy.SiteABooksStrategy;
+import com.scraper.strategy.SiteBQuotesStrategy;
+import com.scraper.strategy.SiteCCountriesStrategy;
+import com.scraper.view.ConsoleView;
+import com.scraper.view.FileSaver;
+import java.util.List;
+
+public class CrawlAllCommand implements CrawlerCommand {
+ private SiteABooksStrategy booksStrategy;
+ private SiteBQuotesStrategy quotesStrategy;
+ private SiteCCountriesStrategy countriesStrategy;
+ private String outputDir;
+
+ public CrawlAllCommand(SiteABooksStrategy booksStrategy, SiteBQuotesStrategy quotesStrategy, SiteCCountriesStrategy countriesStrategy, String outputDir) {
+ this.booksStrategy = booksStrategy;
+ this.quotesStrategy = quotesStrategy;
+ this.countriesStrategy = countriesStrategy;
+ this.outputDir = outputDir;
+ }
+
+ public CrawlAllCommand(SiteABooksStrategy booksStrategy, SiteBQuotesStrategy quotesStrategy, SiteCCountriesStrategy countriesStrategy) {
+ this(booksStrategy, quotesStrategy, countriesStrategy, "./output");
+ }
+
+ @Override
+ public void execute() throws CrawlerException {
+ List books = booksStrategy.crawl("http://books.toscrape.com");
+ ConsoleView.printBooks(books);
+ FileSaver.saveToJson(books, outputDir + "/books.json");
+
+ List quotes = quotesStrategy.crawl("http://quotes.toscrape.com");
+ ConsoleView.printQuotes(quotes);
+ FileSaver.saveToJson(quotes, outputDir + "/quotes.json");
+
+ List countries = countriesStrategy.crawl("https://www.scrapethissite.com/pages/simple/");
+ ConsoleView.printCountries(countries);
+ FileSaver.saveToJson(countries, outputDir + "/countries.json");
+ }
+}
\ No newline at end of file
diff --git a/src/main/java/com/scraper/command/CrawlBooksCommand.java b/src/main/java/com/scraper/command/CrawlBooksCommand.java
new file mode 100644
index 0000000..e6dcd67
--- /dev/null
+++ b/src/main/java/com/scraper/command/CrawlBooksCommand.java
@@ -0,0 +1,29 @@
+package com.scraper.command;
+
+import com.scraper.exception.CrawlerException;
+import com.scraper.model.Book;
+import com.scraper.strategy.SiteABooksStrategy;
+import com.scraper.view.ConsoleView;
+import com.scraper.view.FileSaver;
+import java.util.List;
+
+public class CrawlBooksCommand implements CrawlerCommand {
+ private SiteABooksStrategy strategy;
+ private String outputDir;
+
+ public CrawlBooksCommand(SiteABooksStrategy strategy, String outputDir) {
+ this.strategy = strategy;
+ this.outputDir = outputDir;
+ }
+
+ public CrawlBooksCommand(SiteABooksStrategy strategy) {
+ this(strategy, "./output");
+ }
+
+ @Override
+ public void execute() throws CrawlerException {
+ List books = strategy.crawl("http://books.toscrape.com");
+ ConsoleView.printBooks(books);
+ FileSaver.saveToJson(books, outputDir + "/books.json");
+ }
+}
\ No newline at end of file
diff --git a/src/main/java/com/scraper/command/CrawlCountriesCommand.java b/src/main/java/com/scraper/command/CrawlCountriesCommand.java
new file mode 100644
index 0000000..5a3811a
--- /dev/null
+++ b/src/main/java/com/scraper/command/CrawlCountriesCommand.java
@@ -0,0 +1,29 @@
+package com.scraper.command;
+
+import com.scraper.exception.CrawlerException;
+import com.scraper.model.Country;
+import com.scraper.strategy.SiteCCountriesStrategy;
+import com.scraper.view.ConsoleView;
+import com.scraper.view.FileSaver;
+import java.util.List;
+
+public class CrawlCountriesCommand implements CrawlerCommand {
+ private SiteCCountriesStrategy strategy;
+ private String outputDir;
+
+ public CrawlCountriesCommand(SiteCCountriesStrategy strategy, String outputDir) {
+ this.strategy = strategy;
+ this.outputDir = outputDir;
+ }
+
+ public CrawlCountriesCommand(SiteCCountriesStrategy strategy) {
+ this(strategy, "./output");
+ }
+
+ @Override
+ public void execute() throws CrawlerException {
+ List countries = strategy.crawl("https://www.scrapethissite.com/pages/simple/");
+ ConsoleView.printCountries(countries);
+ FileSaver.saveToJson(countries, outputDir + "/countries.json");
+ }
+}
\ No newline at end of file
diff --git a/src/main/java/com/scraper/command/CrawlQuotesCommand.java b/src/main/java/com/scraper/command/CrawlQuotesCommand.java
new file mode 100644
index 0000000..2e4dd6b
--- /dev/null
+++ b/src/main/java/com/scraper/command/CrawlQuotesCommand.java
@@ -0,0 +1,29 @@
+package com.scraper.command;
+
+import com.scraper.exception.CrawlerException;
+import com.scraper.model.Quote;
+import com.scraper.strategy.SiteBQuotesStrategy;
+import com.scraper.view.ConsoleView;
+import com.scraper.view.FileSaver;
+import java.util.List;
+
+public class CrawlQuotesCommand implements CrawlerCommand {
+ private SiteBQuotesStrategy strategy;
+ private String outputDir;
+
+ public CrawlQuotesCommand(SiteBQuotesStrategy strategy, String outputDir) {
+ this.strategy = strategy;
+ this.outputDir = outputDir;
+ }
+
+ public CrawlQuotesCommand(SiteBQuotesStrategy strategy) {
+ this(strategy, "./output");
+ }
+
+ @Override
+ public void execute() throws CrawlerException {
+ List quotes = strategy.crawl("http://quotes.toscrape.com");
+ ConsoleView.printQuotes(quotes);
+ FileSaver.saveToJson(quotes, outputDir + "/quotes.json");
+ }
+}
\ No newline at end of file
diff --git a/src/main/java/com/scraper/command/CrawlerCommand.java b/src/main/java/com/scraper/command/CrawlerCommand.java
new file mode 100644
index 0000000..282940e
--- /dev/null
+++ b/src/main/java/com/scraper/command/CrawlerCommand.java
@@ -0,0 +1,7 @@
+package com.scraper.command;
+
+import com.scraper.exception.CrawlerException;
+
+public interface CrawlerCommand {
+ void execute() throws CrawlerException;
+}
\ No newline at end of file
diff --git a/src/main/java/com/scraper/exception/CrawlerException.java b/src/main/java/com/scraper/exception/CrawlerException.java
new file mode 100644
index 0000000..677702f
--- /dev/null
+++ b/src/main/java/com/scraper/exception/CrawlerException.java
@@ -0,0 +1,7 @@
+package com.scraper.exception;
+
+public abstract class CrawlerException extends Exception {
+ public CrawlerException(String message, Throwable cause) {
+ super(message, cause);
+ }
+}
\ No newline at end of file
diff --git a/src/main/java/com/scraper/exception/NetworkException.java b/src/main/java/com/scraper/exception/NetworkException.java
new file mode 100644
index 0000000..432e072
--- /dev/null
+++ b/src/main/java/com/scraper/exception/NetworkException.java
@@ -0,0 +1,7 @@
+package com.scraper.exception;
+
+public class NetworkException extends CrawlerException {
+ public NetworkException(String message, Throwable cause) {
+ super(message, cause);
+ }
+}
\ No newline at end of file
diff --git a/src/main/java/com/scraper/exception/ParseException.java b/src/main/java/com/scraper/exception/ParseException.java
new file mode 100644
index 0000000..4a311d2
--- /dev/null
+++ b/src/main/java/com/scraper/exception/ParseException.java
@@ -0,0 +1,7 @@
+package com.scraper.exception;
+
+public class ParseException extends CrawlerException {
+ public ParseException(String message, Throwable cause) {
+ super(message, cause);
+ }
+}
\ No newline at end of file
diff --git a/src/main/java/com/scraper/exception/StorageException.java b/src/main/java/com/scraper/exception/StorageException.java
new file mode 100644
index 0000000..b190a21
--- /dev/null
+++ b/src/main/java/com/scraper/exception/StorageException.java
@@ -0,0 +1,7 @@
+package com.scraper.exception;
+
+public class StorageException extends CrawlerException {
+ public StorageException(String message, Throwable cause) {
+ super(message, cause);
+ }
+}
\ No newline at end of file
diff --git a/src/main/java/com/scraper/model/Book.java b/src/main/java/com/scraper/model/Book.java
new file mode 100644
index 0000000..fd9ba0c
--- /dev/null
+++ b/src/main/java/com/scraper/model/Book.java
@@ -0,0 +1,24 @@
+package com.scraper.model;
+
+public class Book {
+ private String title;
+ private String price;
+
+ public Book(String title, String price) {
+ this.title = title;
+ this.price = price;
+ }
+
+ public String getTitle() {
+ return title;
+ }
+
+ public String getPrice() {
+ return price;
+ }
+
+ @Override
+ public String toString() {
+ return "Book{title='" + title + "', price='" + price + "'}";
+ }
+}
\ No newline at end of file
diff --git a/src/main/java/com/scraper/model/Country.java b/src/main/java/com/scraper/model/Country.java
new file mode 100644
index 0000000..60249ac
--- /dev/null
+++ b/src/main/java/com/scraper/model/Country.java
@@ -0,0 +1,30 @@
+package com.scraper.model;
+
+public class Country {
+ private String name;
+ private String capital;
+ private String population;
+
+ public Country(String name, String capital, String population) {
+ this.name = name;
+ this.capital = capital;
+ this.population = population;
+ }
+
+ public String getName() {
+ return name;
+ }
+
+ public String getCapital() {
+ return capital;
+ }
+
+ public String getPopulation() {
+ return population;
+ }
+
+ @Override
+ public String toString() {
+ return "Country{name='" + name + "', capital='" + capital + "', population='" + population + "'}";
+ }
+}
\ No newline at end of file
diff --git a/src/main/java/com/scraper/model/Quote.java b/src/main/java/com/scraper/model/Quote.java
new file mode 100644
index 0000000..c684349
--- /dev/null
+++ b/src/main/java/com/scraper/model/Quote.java
@@ -0,0 +1,24 @@
+package com.scraper.model;
+
+public class Quote {
+ private String text;
+ private String author;
+
+ public Quote(String text, String author) {
+ this.text = text;
+ this.author = author;
+ }
+
+ public String getText() {
+ return text;
+ }
+
+ public String getAuthor() {
+ return author;
+ }
+
+ @Override
+ public String toString() {
+ return "Quote{text='" + text + "', author='" + author + "'}";
+ }
+}
\ No newline at end of file
diff --git a/src/main/java/com/scraper/strategy/CrawlStrategy.java b/src/main/java/com/scraper/strategy/CrawlStrategy.java
new file mode 100644
index 0000000..93e4a95
--- /dev/null
+++ b/src/main/java/com/scraper/strategy/CrawlStrategy.java
@@ -0,0 +1,8 @@
+package com.scraper.strategy;
+
+import com.scraper.exception.CrawlerException;
+import java.util.List;
+
+public interface CrawlStrategy {
+ List crawl(String url) throws CrawlerException;
+}
\ No newline at end of file
diff --git a/src/main/java/com/scraper/strategy/SiteABooksStrategy.java b/src/main/java/com/scraper/strategy/SiteABooksStrategy.java
new file mode 100644
index 0000000..664e30d
--- /dev/null
+++ b/src/main/java/com/scraper/strategy/SiteABooksStrategy.java
@@ -0,0 +1,51 @@
+package com.scraper.strategy;
+
+import com.scraper.exception.CrawlerException;
+import com.scraper.exception.NetworkException;
+import com.scraper.exception.ParseException;
+import com.scraper.model.Book;
+import org.apache.hc.client5.http.classic.methods.HttpGet;
+import org.apache.hc.client5.http.impl.classic.CloseableHttpClient;
+import org.apache.hc.client5.http.impl.classic.CloseableHttpResponse;
+import org.apache.hc.client5.http.impl.classic.HttpClients;
+
+import org.apache.hc.core5.http.io.entity.EntityUtils;
+import org.jsoup.Jsoup;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+public class SiteABooksStrategy implements CrawlStrategy {
+ @Override
+ public List crawl(String url) throws CrawlerException {
+ System.out.println("正在爬取 [http://books.toscrape.com]...");
+ List books = new ArrayList<>();
+
+ try (CloseableHttpClient httpClient = HttpClients.createDefault()) {
+ HttpGet httpGet = new HttpGet(url);
+
+ try (CloseableHttpResponse response = httpClient.execute(httpGet)) {
+ String html = EntityUtils.toString(response.getEntity());
+ Document doc = Jsoup.parse(html);
+ Elements productPods = doc.select(".product_pod");
+
+ for (Element pod : productPods) {
+ String title = pod.select("h3 > a").attr("title");
+ String priceText = pod.select(".price_color").text();
+ String price = priceText.replace("£", "");
+ books.add(new Book(title, price));
+ }
+ } catch (org.apache.hc.core5.http.ParseException e) {
+ throw new ParseException("解析响应内容失败", e);
+ }
+ } catch (IOException e) {
+ throw new NetworkException("网络请求失败", e);
+ }
+
+ return books;
+ }
+}
\ No newline at end of file
diff --git a/src/main/java/com/scraper/strategy/SiteBQuotesStrategy.java b/src/main/java/com/scraper/strategy/SiteBQuotesStrategy.java
new file mode 100644
index 0000000..96ffd15
--- /dev/null
+++ b/src/main/java/com/scraper/strategy/SiteBQuotesStrategy.java
@@ -0,0 +1,50 @@
+package com.scraper.strategy;
+
+import com.scraper.exception.CrawlerException;
+import com.scraper.exception.NetworkException;
+import com.scraper.exception.ParseException;
+import com.scraper.model.Quote;
+import org.apache.hc.client5.http.classic.methods.HttpGet;
+import org.apache.hc.client5.http.impl.classic.CloseableHttpClient;
+import org.apache.hc.client5.http.impl.classic.CloseableHttpResponse;
+import org.apache.hc.client5.http.impl.classic.HttpClients;
+
+import org.apache.hc.core5.http.io.entity.EntityUtils;
+import org.jsoup.Jsoup;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+public class SiteBQuotesStrategy implements CrawlStrategy {
+ @Override
+ public List crawl(String url) throws CrawlerException {
+ System.out.println("正在爬取 [http://quotes.toscrape.com]...");
+ List quotes = new ArrayList<>();
+
+ try (CloseableHttpClient httpClient = HttpClients.createDefault()) {
+ HttpGet httpGet = new HttpGet(url);
+
+ try (CloseableHttpResponse response = httpClient.execute(httpGet)) {
+ String html = EntityUtils.toString(response.getEntity());
+ Document doc = Jsoup.parse(html);
+ Elements quoteElements = doc.select(".quote");
+
+ for (Element quoteEl : quoteElements) {
+ String text = quoteEl.select(".text").text();
+ String author = quoteEl.select(".author").text();
+ quotes.add(new Quote(text, author));
+ }
+ } catch (org.apache.hc.core5.http.ParseException e) {
+ throw new ParseException("解析响应内容失败", e);
+ }
+ } catch (IOException e) {
+ throw new NetworkException("网络请求失败", e);
+ }
+
+ return quotes;
+ }
+}
\ No newline at end of file
diff --git a/src/main/java/com/scraper/strategy/SiteCCountriesStrategy.java b/src/main/java/com/scraper/strategy/SiteCCountriesStrategy.java
new file mode 100644
index 0000000..f1d920c
--- /dev/null
+++ b/src/main/java/com/scraper/strategy/SiteCCountriesStrategy.java
@@ -0,0 +1,51 @@
+package com.scraper.strategy;
+
+import com.scraper.exception.CrawlerException;
+import com.scraper.exception.NetworkException;
+import com.scraper.exception.ParseException;
+import com.scraper.model.Country;
+import org.apache.hc.client5.http.classic.methods.HttpGet;
+import org.apache.hc.client5.http.impl.classic.CloseableHttpClient;
+import org.apache.hc.client5.http.impl.classic.CloseableHttpResponse;
+import org.apache.hc.client5.http.impl.classic.HttpClients;
+
+import org.apache.hc.core5.http.io.entity.EntityUtils;
+import org.jsoup.Jsoup;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+public class SiteCCountriesStrategy implements CrawlStrategy {
+ @Override
+ public List crawl(String url) throws CrawlerException {
+ System.out.println("正在爬取 [https://www.scrapethissite.com/pages/simple/]...");
+ List countries = new ArrayList<>();
+
+ try (CloseableHttpClient httpClient = HttpClients.createDefault()) {
+ HttpGet httpGet = new HttpGet(url);
+
+ try (CloseableHttpResponse response = httpClient.execute(httpGet)) {
+ String html = EntityUtils.toString(response.getEntity());
+ Document doc = Jsoup.parse(html);
+ Elements countryElements = doc.select(".country");
+
+ for (Element countryEl : countryElements) {
+ String name = countryEl.select(".country-name").text().trim();
+ String capital = countryEl.select(".country-capital").text().trim();
+ String population = countryEl.select(".country-population").text().trim();
+ countries.add(new Country(name, capital, population));
+ }
+ } catch (org.apache.hc.core5.http.ParseException e) {
+ throw new ParseException("解析响应内容失败", e);
+ }
+ } catch (IOException e) {
+ throw new NetworkException("网络请求失败", e);
+ }
+
+ return countries;
+ }
+}
\ No newline at end of file
diff --git a/src/main/java/com/scraper/view/ConsoleView.java b/src/main/java/com/scraper/view/ConsoleView.java
new file mode 100644
index 0000000..e1bf876
--- /dev/null
+++ b/src/main/java/com/scraper/view/ConsoleView.java
@@ -0,0 +1,26 @@
+package com.scraper.view;
+
+import com.scraper.model.Book;
+import com.scraper.model.Country;
+import com.scraper.model.Quote;
+import java.util.List;
+
+public class ConsoleView {
+ public static void printBooks(List books) {
+ for (Book book : books) {
+ System.out.println("书名: 《" + book.getTitle() + "》, 价格: £" + book.getPrice());
+ }
+ }
+
+ public static void printQuotes(List quotes) {
+ for (Quote quote : quotes) {
+ System.out.println("\"" + quote.getText() + "\" —— " + quote.getAuthor());
+ }
+ }
+
+ public static void printCountries(List countries) {
+ for (Country country : countries) {
+ System.out.println("国家: " + country.getName() + ", 首都: " + country.getCapital() + ", 人口: " + country.getPopulation());
+ }
+ }
+}
\ No newline at end of file
diff --git a/src/main/java/com/scraper/view/FileSaver.java b/src/main/java/com/scraper/view/FileSaver.java
new file mode 100644
index 0000000..4c42089
--- /dev/null
+++ b/src/main/java/com/scraper/view/FileSaver.java
@@ -0,0 +1,31 @@
+package com.scraper.view;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.scraper.exception.StorageException;
+import java.io.File;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.List;
+import java.util.function.Function;
+
+public class FileSaver {
+ public static void saveToJson(Object data, String filePath) throws StorageException {
+ try {
+ Path path = Paths.get(filePath);
+ Path parentDir = path.getParent();
+ if (parentDir != null) {
+ Files.createDirectories(parentDir);
+ }
+ ObjectMapper mapper = new ObjectMapper();
+ mapper.writerWithDefaultPrettyPrinter().writeValue(new File(filePath), data);
+ } catch (IOException e) {
+ throw new StorageException("无法写入 JSON 文件: " + filePath, e);
+ }
+ }
+
+ public static void saveToCsv(List> items, String filePath, String[] headers, Function