commit 8198b8c9b77d37f36b63aace8015142b189d0e6a Author: 陈全文 <202529010113@innoschool.org> Date: Sun May 31 16:42:14 2026 +0800 Initial commit: Web Scraper project with Maven diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..2a842fc --- /dev/null +++ b/.gitignore @@ -0,0 +1,28 @@ +# Maven +target/ +pom.xml.tag +pom.xml.releaseBackup +pom.xml.versionsBackup +pom.xml.next +release.properties +dependency-reduced-pom.xml +buildNumber.properties +.mvn/timing.properties +.mvn/wrapper/maven-wrapper.jar + +# IDE +.idea/ +*.iml +*.ipr +*.iws +.project +.classpath +.settings/ +.vscode/ + +# OS +.DS_Store +Thumbs.db + +# Logs +*.log diff --git a/202529010113-陈全文-期末实验报告.docx b/202529010113-陈全文-期末实验报告.docx new file mode 100644 index 0000000..9f0a2a5 Binary files /dev/null and b/202529010113-陈全文-期末实验报告.docx differ diff --git a/output/books.json b/output/books.json new file mode 100644 index 0000000..398bb97 --- /dev/null +++ b/output/books.json @@ -0,0 +1,61 @@ +[ { + "title" : "A Light in the Attic", + "price" : "51.77" +}, { + "title" : "Tipping the Velvet", + "price" : "53.74" +}, { + "title" : "Soumission", + "price" : "50.10" +}, { + "title" : "Sharp Objects", + "price" : "47.82" +}, { + "title" : "Sapiens: A Brief History of Humankind", + "price" : "54.23" +}, { + "title" : "The Requiem Red", + "price" : "22.65" +}, { + "title" : "The Dirty Little Secrets of Getting Your Dream Job", + "price" : "33.34" +}, { + "title" : "The Coming Woman: A Novel Based on the Life of the Infamous Feminist, Victoria Woodhull", + "price" : "17.93" +}, { + "title" : "The Boys in the Boat: Nine Americans and Their Epic Quest for Gold at the 1936 Berlin Olympics", + "price" : "22.60" +}, { + "title" : "The Black Maria", + "price" : "52.15" +}, { + "title" : "Starving Hearts (Triangular Trade Trilogy, #1)", + "price" : "13.99" +}, { + "title" : "Shakespeare's Sonnets", + "price" : "20.66" +}, { + "title" : "Set Me Free", + "price" : "17.46" +}, { + "title" : "Scott Pilgrim's Precious Little Life (Scott Pilgrim #1)", + "price" : "52.29" +}, { + "title" : "Rip it Up and Start Again", + "price" : "35.02" +}, { + "title" : "Our Band Could Be Your Life: Scenes from the American Indie Underground, 1981-1991", + "price" : "57.25" +}, { + "title" : "Olio", + "price" : "23.88" +}, { + "title" : "Mesaerion: The Best Science Fiction Stories 1800-1849", + "price" : "37.59" +}, { + "title" : "Libertarianism for Beginners", + "price" : "51.33" +}, { + "title" : "It's Only the Himalayas", + "price" : "45.17" +} ] \ No newline at end of file diff --git a/output/countries.json b/output/countries.json new file mode 100644 index 0000000..83484e4 --- /dev/null +++ b/output/countries.json @@ -0,0 +1,1001 @@ +[ { + "name" : "Andorra", + "capital" : "Andorra la Vella", + "population" : "84000" +}, { + "name" : "United Arab Emirates", + "capital" : "Abu Dhabi", + "population" : "4975593" +}, { + "name" : "Afghanistan", + "capital" : "Kabul", + "population" : "29121286" +}, { + "name" : "Antigua and Barbuda", + "capital" : "St. John's", + "population" : "86754" +}, { + "name" : "Anguilla", + "capital" : "The Valley", + "population" : "13254" +}, { + "name" : "Albania", + "capital" : "Tirana", + "population" : "2986952" +}, { + "name" : "Armenia", + "capital" : "Yerevan", + "population" : "2968000" +}, { + "name" : "Angola", + "capital" : "Luanda", + "population" : "13068161" +}, { + "name" : "Antarctica", + "capital" : "None", + "population" : "0" +}, { + "name" : "Argentina", + "capital" : "Buenos Aires", + "population" : "41343201" +}, { + "name" : "American Samoa", + "capital" : "Pago Pago", + "population" : "57881" +}, { + "name" : "Austria", + "capital" : "Vienna", + "population" : "8205000" +}, { + "name" : "Australia", + "capital" : "Canberra", + "population" : "21515754" +}, { + "name" : "Aruba", + "capital" : "Oranjestad", + "population" : "71566" +}, { + "name" : "Åland", + "capital" : "Mariehamn", + "population" : "26711" +}, { + "name" : "Azerbaijan", + "capital" : "Baku", + "population" : "8303512" +}, { + "name" : "Bosnia and Herzegovina", + "capital" : "Sarajevo", + "population" : "4590000" +}, { + "name" : "Barbados", + "capital" : "Bridgetown", + "population" : "285653" +}, { + "name" : "Bangladesh", + "capital" : "Dhaka", + "population" : "156118464" +}, { + "name" : "Belgium", + "capital" : "Brussels", + "population" : "10403000" +}, { + "name" : "Burkina Faso", + "capital" : "Ouagadougou", + "population" : "16241811" +}, { + "name" : "Bulgaria", + "capital" : "Sofia", + "population" : "7148785" +}, { + "name" : "Bahrain", + "capital" : "Manama", + "population" : "738004" +}, { + "name" : "Burundi", + "capital" : "Bujumbura", + "population" : "9863117" +}, { + "name" : "Benin", + "capital" : "Porto-Novo", + "population" : "9056010" +}, { + "name" : "Saint Barthélemy", + "capital" : "Gustavia", + "population" : "8450" +}, { + "name" : "Bermuda", + "capital" : "Hamilton", + "population" : "65365" +}, { + "name" : "Brunei", + "capital" : "Bandar Seri Begawan", + "population" : "395027" +}, { + "name" : "Bolivia", + "capital" : "Sucre", + "population" : "9947418" +}, { + "name" : "Bonaire", + "capital" : "Kralendijk", + "population" : "18012" +}, { + "name" : "Brazil", + "capital" : "Brasília", + "population" : "201103330" +}, { + "name" : "Bahamas", + "capital" : "Nassau", + "population" : "301790" +}, { + "name" : "Bhutan", + "capital" : "Thimphu", + "population" : "699847" +}, { + "name" : "Bouvet Island", + "capital" : "None", + "population" : "0" +}, { + "name" : "Botswana", + "capital" : "Gaborone", + "population" : "2029307" +}, { + "name" : "Belarus", + "capital" : "Minsk", + "population" : "9685000" +}, { + "name" : "Belize", + "capital" : "Belmopan", + "population" : "314522" +}, { + "name" : "Canada", + "capital" : "Ottawa", + "population" : "33679000" +}, { + "name" : "Cocos [Keeling] Islands", + "capital" : "West Island", + "population" : "628" +}, { + "name" : "Democratic Republic of the Congo", + "capital" : "Kinshasa", + "population" : "70916439" +}, { + "name" : "Central African Republic", + "capital" : "Bangui", + "population" : "4844927" +}, { + "name" : "Republic of the Congo", + "capital" : "Brazzaville", + "population" : "3039126" +}, { + "name" : "Switzerland", + "capital" : "Bern", + "population" : "7581000" +}, { + "name" : "Ivory Coast", + "capital" : "Yamoussoukro", + "population" : "21058798" +}, { + "name" : "Cook Islands", + "capital" : "Avarua", + "population" : "21388" +}, { + "name" : "Chile", + "capital" : "Santiago", + "population" : "16746491" +}, { + "name" : "Cameroon", + "capital" : "Yaoundé", + "population" : "19294149" +}, { + "name" : "China", + "capital" : "Beijing", + "population" : "1330044000" +}, { + "name" : "Colombia", + "capital" : "Bogotá", + "population" : "47790000" +}, { + "name" : "Costa Rica", + "capital" : "San José", + "population" : "4516220" +}, { + "name" : "Cuba", + "capital" : "Havana", + "population" : "11423000" +}, { + "name" : "Cape Verde", + "capital" : "Praia", + "population" : "508659" +}, { + "name" : "Curacao", + "capital" : "Willemstad", + "population" : "141766" +}, { + "name" : "Christmas Island", + "capital" : "Flying Fish Cove", + "population" : "1500" +}, { + "name" : "Cyprus", + "capital" : "Nicosia", + "population" : "1102677" +}, { + "name" : "Czech Republic", + "capital" : "Prague", + "population" : "10476000" +}, { + "name" : "Germany", + "capital" : "Berlin", + "population" : "81802257" +}, { + "name" : "Djibouti", + "capital" : "Djibouti", + "population" : "740528" +}, { + "name" : "Denmark", + "capital" : "Copenhagen", + "population" : "5484000" +}, { + "name" : "Dominica", + "capital" : "Roseau", + "population" : "72813" +}, { + "name" : "Dominican Republic", + "capital" : "Santo Domingo", + "population" : "9823821" +}, { + "name" : "Algeria", + "capital" : "Algiers", + "population" : "34586184" +}, { + "name" : "Ecuador", + "capital" : "Quito", + "population" : "14790608" +}, { + "name" : "Estonia", + "capital" : "Tallinn", + "population" : "1291170" +}, { + "name" : "Egypt", + "capital" : "Cairo", + "population" : "80471869" +}, { + "name" : "Western Sahara", + "capital" : "Laâyoune / El Aaiún", + "population" : "273008" +}, { + "name" : "Eritrea", + "capital" : "Asmara", + "population" : "5792984" +}, { + "name" : "Spain", + "capital" : "Madrid", + "population" : "46505963" +}, { + "name" : "Ethiopia", + "capital" : "Addis Ababa", + "population" : "88013491" +}, { + "name" : "Finland", + "capital" : "Helsinki", + "population" : "5244000" +}, { + "name" : "Fiji", + "capital" : "Suva", + "population" : "875983" +}, { + "name" : "Falkland Islands", + "capital" : "Stanley", + "population" : "2638" +}, { + "name" : "Micronesia", + "capital" : "Palikir", + "population" : "107708" +}, { + "name" : "Faroe Islands", + "capital" : "Tórshavn", + "population" : "48228" +}, { + "name" : "France", + "capital" : "Paris", + "population" : "64768389" +}, { + "name" : "Gabon", + "capital" : "Libreville", + "population" : "1545255" +}, { + "name" : "United Kingdom", + "capital" : "London", + "population" : "62348447" +}, { + "name" : "Grenada", + "capital" : "St. George's", + "population" : "107818" +}, { + "name" : "Georgia", + "capital" : "Tbilisi", + "population" : "4630000" +}, { + "name" : "French Guiana", + "capital" : "Cayenne", + "population" : "195506" +}, { + "name" : "Guernsey", + "capital" : "St Peter Port", + "population" : "65228" +}, { + "name" : "Ghana", + "capital" : "Accra", + "population" : "24339838" +}, { + "name" : "Gibraltar", + "capital" : "Gibraltar", + "population" : "27884" +}, { + "name" : "Greenland", + "capital" : "Nuuk", + "population" : "56375" +}, { + "name" : "Gambia", + "capital" : "Bathurst", + "population" : "1593256" +}, { + "name" : "Guinea", + "capital" : "Conakry", + "population" : "10324025" +}, { + "name" : "Guadeloupe", + "capital" : "Basse-Terre", + "population" : "443000" +}, { + "name" : "Equatorial Guinea", + "capital" : "Malabo", + "population" : "1014999" +}, { + "name" : "Greece", + "capital" : "Athens", + "population" : "11000000" +}, { + "name" : "South Georgia and the South Sandwich Islands", + "capital" : "Grytviken", + "population" : "30" +}, { + "name" : "Guatemala", + "capital" : "Guatemala City", + "population" : "13550440" +}, { + "name" : "Guam", + "capital" : "Hagåtña", + "population" : "159358" +}, { + "name" : "Guinea-Bissau", + "capital" : "Bissau", + "population" : "1565126" +}, { + "name" : "Guyana", + "capital" : "Georgetown", + "population" : "748486" +}, { + "name" : "Hong Kong", + "capital" : "Hong Kong", + "population" : "6898686" +}, { + "name" : "Heard Island and McDonald Islands", + "capital" : "None", + "population" : "0" +}, { + "name" : "Honduras", + "capital" : "Tegucigalpa", + "population" : "7989415" +}, { + "name" : "Croatia", + "capital" : "Zagreb", + "population" : "4491000" +}, { + "name" : "Haiti", + "capital" : "Port-au-Prince", + "population" : "9648924" +}, { + "name" : "Hungary", + "capital" : "Budapest", + "population" : "9982000" +}, { + "name" : "Indonesia", + "capital" : "Jakarta", + "population" : "242968342" +}, { + "name" : "Ireland", + "capital" : "Dublin", + "population" : "4622917" +}, { + "name" : "Israel", + "capital" : "None", + "population" : "7353985" +}, { + "name" : "Isle of Man", + "capital" : "Douglas", + "population" : "75049" +}, { + "name" : "India", + "capital" : "New Delhi", + "population" : "1173108018" +}, { + "name" : "British Indian Ocean Territory", + "capital" : "None", + "population" : "4000" +}, { + "name" : "Iraq", + "capital" : "Baghdad", + "population" : "29671605" +}, { + "name" : "Iran", + "capital" : "Tehran", + "population" : "76923300" +}, { + "name" : "Iceland", + "capital" : "Reykjavik", + "population" : "308910" +}, { + "name" : "Italy", + "capital" : "Rome", + "population" : "60340328" +}, { + "name" : "Jersey", + "capital" : "Saint Helier", + "population" : "90812" +}, { + "name" : "Jamaica", + "capital" : "Kingston", + "population" : "2847232" +}, { + "name" : "Jordan", + "capital" : "Amman", + "population" : "6407085" +}, { + "name" : "Japan", + "capital" : "Tokyo", + "population" : "127288000" +}, { + "name" : "Kenya", + "capital" : "Nairobi", + "population" : "40046566" +}, { + "name" : "Kyrgyzstan", + "capital" : "Bishkek", + "population" : "5776500" +}, { + "name" : "Cambodia", + "capital" : "Phnom Penh", + "population" : "14453680" +}, { + "name" : "Kiribati", + "capital" : "Tarawa", + "population" : "92533" +}, { + "name" : "Comoros", + "capital" : "Moroni", + "population" : "773407" +}, { + "name" : "Saint Kitts and Nevis", + "capital" : "Basseterre", + "population" : "51134" +}, { + "name" : "North Korea", + "capital" : "Pyongyang", + "population" : "22912177" +}, { + "name" : "South Korea", + "capital" : "Seoul", + "population" : "48422644" +}, { + "name" : "Kuwait", + "capital" : "Kuwait City", + "population" : "2789132" +}, { + "name" : "Cayman Islands", + "capital" : "George Town", + "population" : "44270" +}, { + "name" : "Kazakhstan", + "capital" : "Astana", + "population" : "15340000" +}, { + "name" : "Laos", + "capital" : "Vientiane", + "population" : "6368162" +}, { + "name" : "Lebanon", + "capital" : "Beirut", + "population" : "4125247" +}, { + "name" : "Saint Lucia", + "capital" : "Castries", + "population" : "160922" +}, { + "name" : "Liechtenstein", + "capital" : "Vaduz", + "population" : "35000" +}, { + "name" : "Sri Lanka", + "capital" : "Colombo", + "population" : "21513990" +}, { + "name" : "Liberia", + "capital" : "Monrovia", + "population" : "3685076" +}, { + "name" : "Lesotho", + "capital" : "Maseru", + "population" : "1919552" +}, { + "name" : "Lithuania", + "capital" : "Vilnius", + "population" : "2944459" +}, { + "name" : "Luxembourg", + "capital" : "Luxembourg", + "population" : "497538" +}, { + "name" : "Latvia", + "capital" : "Riga", + "population" : "2217969" +}, { + "name" : "Libya", + "capital" : "Tripoli", + "population" : "6461454" +}, { + "name" : "Morocco", + "capital" : "Rabat", + "population" : "31627428" +}, { + "name" : "Monaco", + "capital" : "Monaco", + "population" : "32965" +}, { + "name" : "Moldova", + "capital" : "Chişinău", + "population" : "4324000" +}, { + "name" : "Montenegro", + "capital" : "Podgorica", + "population" : "666730" +}, { + "name" : "Saint Martin", + "capital" : "Marigot", + "population" : "35925" +}, { + "name" : "Madagascar", + "capital" : "Antananarivo", + "population" : "21281844" +}, { + "name" : "Marshall Islands", + "capital" : "Majuro", + "population" : "65859" +}, { + "name" : "Macedonia", + "capital" : "Skopje", + "population" : "2062294" +}, { + "name" : "Mali", + "capital" : "Bamako", + "population" : "13796354" +}, { + "name" : "Myanmar [Burma]", + "capital" : "Naypyitaw", + "population" : "53414374" +}, { + "name" : "Mongolia", + "capital" : "Ulan Bator", + "population" : "3086918" +}, { + "name" : "Macao", + "capital" : "Macao", + "population" : "449198" +}, { + "name" : "Northern Mariana Islands", + "capital" : "Saipan", + "population" : "53883" +}, { + "name" : "Martinique", + "capital" : "Fort-de-France", + "population" : "432900" +}, { + "name" : "Mauritania", + "capital" : "Nouakchott", + "population" : "3205060" +}, { + "name" : "Montserrat", + "capital" : "Plymouth", + "population" : "9341" +}, { + "name" : "Malta", + "capital" : "Valletta", + "population" : "403000" +}, { + "name" : "Mauritius", + "capital" : "Port Louis", + "population" : "1294104" +}, { + "name" : "Maldives", + "capital" : "Malé", + "population" : "395650" +}, { + "name" : "Malawi", + "capital" : "Lilongwe", + "population" : "15447500" +}, { + "name" : "Mexico", + "capital" : "Mexico City", + "population" : "112468855" +}, { + "name" : "Malaysia", + "capital" : "Kuala Lumpur", + "population" : "28274729" +}, { + "name" : "Mozambique", + "capital" : "Maputo", + "population" : "22061451" +}, { + "name" : "Namibia", + "capital" : "Windhoek", + "population" : "2128471" +}, { + "name" : "New Caledonia", + "capital" : "Noumea", + "population" : "216494" +}, { + "name" : "Niger", + "capital" : "Niamey", + "population" : "15878271" +}, { + "name" : "Norfolk Island", + "capital" : "Kingston", + "population" : "1828" +}, { + "name" : "Nigeria", + "capital" : "Abuja", + "population" : "154000000" +}, { + "name" : "Nicaragua", + "capital" : "Managua", + "population" : "5995928" +}, { + "name" : "Netherlands", + "capital" : "Amsterdam", + "population" : "16645000" +}, { + "name" : "Norway", + "capital" : "Oslo", + "population" : "5009150" +}, { + "name" : "Nepal", + "capital" : "Kathmandu", + "population" : "28951852" +}, { + "name" : "Nauru", + "capital" : "Yaren", + "population" : "10065" +}, { + "name" : "Niue", + "capital" : "Alofi", + "population" : "2166" +}, { + "name" : "New Zealand", + "capital" : "Wellington", + "population" : "4252277" +}, { + "name" : "Oman", + "capital" : "Muscat", + "population" : "2967717" +}, { + "name" : "Panama", + "capital" : "Panama City", + "population" : "3410676" +}, { + "name" : "Peru", + "capital" : "Lima", + "population" : "29907003" +}, { + "name" : "French Polynesia", + "capital" : "Papeete", + "population" : "270485" +}, { + "name" : "Papua New Guinea", + "capital" : "Port Moresby", + "population" : "6064515" +}, { + "name" : "Philippines", + "capital" : "Manila", + "population" : "99900177" +}, { + "name" : "Pakistan", + "capital" : "Islamabad", + "population" : "184404791" +}, { + "name" : "Poland", + "capital" : "Warsaw", + "population" : "38500000" +}, { + "name" : "Saint Pierre and Miquelon", + "capital" : "Saint-Pierre", + "population" : "7012" +}, { + "name" : "Pitcairn Islands", + "capital" : "Adamstown", + "population" : "46" +}, { + "name" : "Puerto Rico", + "capital" : "San Juan", + "population" : "3916632" +}, { + "name" : "Palestine", + "capital" : "None", + "population" : "3800000" +}, { + "name" : "Portugal", + "capital" : "Lisbon", + "population" : "10676000" +}, { + "name" : "Palau", + "capital" : "Melekeok", + "population" : "19907" +}, { + "name" : "Paraguay", + "capital" : "Asunción", + "population" : "6375830" +}, { + "name" : "Qatar", + "capital" : "Doha", + "population" : "840926" +}, { + "name" : "Réunion", + "capital" : "Saint-Denis", + "population" : "776948" +}, { + "name" : "Romania", + "capital" : "Bucharest", + "population" : "21959278" +}, { + "name" : "Serbia", + "capital" : "Belgrade", + "population" : "7344847" +}, { + "name" : "Russia", + "capital" : "Moscow", + "population" : "140702000" +}, { + "name" : "Rwanda", + "capital" : "Kigali", + "population" : "11055976" +}, { + "name" : "Saudi Arabia", + "capital" : "Riyadh", + "population" : "25731776" +}, { + "name" : "Solomon Islands", + "capital" : "Honiara", + "population" : "559198" +}, { + "name" : "Seychelles", + "capital" : "Victoria", + "population" : "88340" +}, { + "name" : "Sudan", + "capital" : "Khartoum", + "population" : "35000000" +}, { + "name" : "Sweden", + "capital" : "Stockholm", + "population" : "9828655" +}, { + "name" : "Singapore", + "capital" : "Singapore", + "population" : "4701069" +}, { + "name" : "Saint Helena", + "capital" : "Jamestown", + "population" : "7460" +}, { + "name" : "Slovenia", + "capital" : "Ljubljana", + "population" : "2007000" +}, { + "name" : "Svalbard and Jan Mayen", + "capital" : "Longyearbyen", + "population" : "2550" +}, { + "name" : "Slovakia", + "capital" : "Bratislava", + "population" : "5455000" +}, { + "name" : "Sierra Leone", + "capital" : "Freetown", + "population" : "5245695" +}, { + "name" : "San Marino", + "capital" : "San Marino", + "population" : "31477" +}, { + "name" : "Senegal", + "capital" : "Dakar", + "population" : "12323252" +}, { + "name" : "Somalia", + "capital" : "Mogadishu", + "population" : "10112453" +}, { + "name" : "Suriname", + "capital" : "Paramaribo", + "population" : "492829" +}, { + "name" : "South Sudan", + "capital" : "Juba", + "population" : "8260490" +}, { + "name" : "São Tomé and Príncipe", + "capital" : "São Tomé", + "population" : "175808" +}, { + "name" : "El Salvador", + "capital" : "San Salvador", + "population" : "6052064" +}, { + "name" : "Sint Maarten", + "capital" : "Philipsburg", + "population" : "37429" +}, { + "name" : "Syria", + "capital" : "Damascus", + "population" : "22198110" +}, { + "name" : "Swaziland", + "capital" : "Mbabane", + "population" : "1354051" +}, { + "name" : "Turks and Caicos Islands", + "capital" : "Cockburn Town", + "population" : "20556" +}, { + "name" : "Chad", + "capital" : "N'Djamena", + "population" : "10543464" +}, { + "name" : "French Southern Territories", + "capital" : "Port-aux-Français", + "population" : "140" +}, { + "name" : "Togo", + "capital" : "Lomé", + "population" : "6587239" +}, { + "name" : "Thailand", + "capital" : "Bangkok", + "population" : "67089500" +}, { + "name" : "Tajikistan", + "capital" : "Dushanbe", + "population" : "7487489" +}, { + "name" : "Tokelau", + "capital" : "None", + "population" : "1466" +}, { + "name" : "East Timor", + "capital" : "Dili", + "population" : "1154625" +}, { + "name" : "Turkmenistan", + "capital" : "Ashgabat", + "population" : "4940916" +}, { + "name" : "Tunisia", + "capital" : "Tunis", + "population" : "10589025" +}, { + "name" : "Tonga", + "capital" : "Nuku'alofa", + "population" : "122580" +}, { + "name" : "Turkey", + "capital" : "Ankara", + "population" : "77804122" +}, { + "name" : "Trinidad and Tobago", + "capital" : "Port of Spain", + "population" : "1228691" +}, { + "name" : "Tuvalu", + "capital" : "Funafuti", + "population" : "10472" +}, { + "name" : "Taiwan", + "capital" : "Taipei", + "population" : "22894384" +}, { + "name" : "Tanzania", + "capital" : "Dodoma", + "population" : "41892895" +}, { + "name" : "Ukraine", + "capital" : "Kiev", + "population" : "45415596" +}, { + "name" : "Uganda", + "capital" : "Kampala", + "population" : "33398682" +}, { + "name" : "U.S. Minor Outlying Islands", + "capital" : "None", + "population" : "0" +}, { + "name" : "United States", + "capital" : "Washington", + "population" : "310232863" +}, { + "name" : "Uruguay", + "capital" : "Montevideo", + "population" : "3477000" +}, { + "name" : "Uzbekistan", + "capital" : "Tashkent", + "population" : "27865738" +}, { + "name" : "Vatican City", + "capital" : "Vatican City", + "population" : "921" +}, { + "name" : "Saint Vincent and the Grenadines", + "capital" : "Kingstown", + "population" : "104217" +}, { + "name" : "Venezuela", + "capital" : "Caracas", + "population" : "27223228" +}, { + "name" : "British Virgin Islands", + "capital" : "Road Town", + "population" : "21730" +}, { + "name" : "U.S. Virgin Islands", + "capital" : "Charlotte Amalie", + "population" : "108708" +}, { + "name" : "Vietnam", + "capital" : "Hanoi", + "population" : "89571130" +}, { + "name" : "Vanuatu", + "capital" : "Port Vila", + "population" : "221552" +}, { + "name" : "Wallis and Futuna", + "capital" : "Mata-Utu", + "population" : "16025" +}, { + "name" : "Samoa", + "capital" : "Apia", + "population" : "192001" +}, { + "name" : "Kosovo", + "capital" : "Pristina", + "population" : "1800000" +}, { + "name" : "Yemen", + "capital" : "Sanaa", + "population" : "23495361" +}, { + "name" : "Mayotte", + "capital" : "Mamoudzou", + "population" : "159042" +}, { + "name" : "South Africa", + "capital" : "Pretoria", + "population" : "49000000" +}, { + "name" : "Zambia", + "capital" : "Lusaka", + "population" : "13460305" +}, { + "name" : "Zimbabwe", + "capital" : "Harare", + "population" : "11651858" +} ] \ No newline at end of file diff --git a/output/quotes.json b/output/quotes.json new file mode 100644 index 0000000..42788c0 --- /dev/null +++ b/output/quotes.json @@ -0,0 +1,31 @@ +[ { + "text" : "“The world as we have created it is a process of our thinking. It cannot be changed without changing our thinking.”", + "author" : "Albert Einstein" +}, { + "text" : "“It is our choices, Harry, that show what we truly are, far more than our abilities.”", + "author" : "J.K. Rowling" +}, { + "text" : "“There are only two ways to live your life. One is as though nothing is a miracle. The other is as though everything is a miracle.”", + "author" : "Albert Einstein" +}, { + "text" : "“The person, be it gentleman or lady, who has not pleasure in a good novel, must be intolerably stupid.”", + "author" : "Jane Austen" +}, { + "text" : "“Imperfection is beauty, madness is genius and it's better to be absolutely ridiculous than absolutely boring.”", + "author" : "Marilyn Monroe" +}, { + "text" : "“Try not to become a man of success. Rather become a man of value.”", + "author" : "Albert Einstein" +}, { + "text" : "“It is better to be hated for what you are than to be loved for what you are not.”", + "author" : "André Gide" +}, { + "text" : "“I have not failed. I've just found 10,000 ways that won't work.”", + "author" : "Thomas A. Edison" +}, { + "text" : "“A woman is like a tea bag; you never know how strong it is until it's in hot water.”", + "author" : "Eleanor Roosevelt" +}, { + "text" : "“A day without sunshine is like, you know, night.”", + "author" : "Steve Martin" +} ] \ No newline at end of file diff --git a/pom.xml b/pom.xml new file mode 100644 index 0000000..50b509b --- /dev/null +++ b/pom.xml @@ -0,0 +1,76 @@ + + + 4.0.0 + + com.scraper + web-scraper + 1.0-SNAPSHOT + jar + + Web Scraper + A web scraping application + + + UTF-8 + 11 + 11 + + + + + org.jsoup + jsoup + 1.17.2 + + + org.apache.httpcomponents.client5 + httpclient5 + 5.4.1 + + + info.picocli + picocli + 4.7.6 + + + com.fasterxml.jackson.core + jackson-databind + 2.17.2 + + + com.fasterxml.jackson.core + jackson-core + 2.17.2 + + + com.fasterxml.jackson.core + jackson-annotations + 2.17.2 + + + + + + + org.apache.maven.plugins + maven-compiler-plugin + 3.13.0 + + 11 + 11 + + + + org.codehaus.mojo + exec-maven-plugin + 3.1.0 + + com.scraper.Main + --site all --output ./output + + + + + \ No newline at end of file diff --git a/src/main/java/com/scraper/Main.java b/src/main/java/com/scraper/Main.java new file mode 100644 index 0000000..bea67e3 --- /dev/null +++ b/src/main/java/com/scraper/Main.java @@ -0,0 +1,109 @@ +package com.scraper; + +import com.scraper.command.CrawlerCommand; +import com.scraper.command.CrawlAllCommand; +import com.scraper.command.CrawlBooksCommand; +import com.scraper.command.CrawlCountriesCommand; +import com.scraper.command.CrawlQuotesCommand; +import com.scraper.exception.CrawlerException; +import com.scraper.exception.NetworkException; +import com.scraper.exception.ParseException; +import com.scraper.exception.StorageException; +import com.scraper.strategy.SiteABooksStrategy; +import com.scraper.strategy.SiteBQuotesStrategy; +import com.scraper.strategy.SiteCCountriesStrategy; +import picocli.CommandLine; +import picocli.CommandLine.Command; +import picocli.CommandLine.Option; + + + +/** + * 爬虫程序的主入口类,使用 Picocli 实现命令行解析。 + * 支持爬取书籍、名言、国家信息或全部内容,并保存为 JSON 文件。 + */ +@Command(name = "webscraper", mixinStandardHelpOptions = true, version = "1.0", + description = "网页爬虫程序,支持爬取书籍、名言和国家信息。") +public class Main implements Runnable { + + /** + * 要爬取的网站类型,可选值:books、quotes、countries、all(不区分大小写),默认为 all + */ + @Option(names = {"-s", "--site"}, description = "要爬取的网站类型:books、quotes、countries、all(默认:all)") + private String site = "all"; + + /** + * 输出目录,默认为 "./output" + */ + @Option(names = {"-o", "--output"}, description = "输出目录(默认:./output)") + private String outputDir = "./output"; + + /** + * 主方法,程序入口 + * @param args 命令行参数 + */ + public static void main(String[] args) { + int exitCode = new CommandLine(new Main()).execute(args); + System.exit(exitCode); + } + + /** + * 执行业务逻辑 + */ + @Override + public void run() { + try { + // 创建策略实例 + SiteABooksStrategy booksStrategy = new SiteABooksStrategy(); + SiteBQuotesStrategy quotesStrategy = new SiteBQuotesStrategy(); + SiteCCountriesStrategy countriesStrategy = new SiteCCountriesStrategy(); + + CrawlerCommand command; + + // 根据 --site 选项选择对应的命令 + switch (site.toLowerCase()) { + case "books": + command = new CrawlBooksCommand(booksStrategy, outputDir); + break; + case "quotes": + command = new CrawlQuotesCommand(quotesStrategy, outputDir); + break; + case "countries": + command = new CrawlCountriesCommand(countriesStrategy, outputDir); + break; + case "all": + default: + command = new CrawlAllCommand(booksStrategy, quotesStrategy, countriesStrategy, outputDir); + break; + } + + // 执行命令 + command.execute(); + + } catch (NetworkException e) { + System.err.println("网络错误:" + e.getMessage()); + if (e.getCause() != null) { + System.err.println("原因:" + e.getCause().getMessage()); + } + System.exit(1); + } catch (ParseException e) { + System.err.println("解析失败:" + e.getMessage()); + if (e.getCause() != null) { + System.err.println("原因:" + e.getCause().getMessage()); + } + System.exit(1); + } catch (StorageException e) { + System.err.println("存储异常:" + e.getMessage()); + if (e.getCause() != null) { + System.err.println("原因:" + e.getCause().getMessage()); + } + System.exit(1); + } catch (CrawlerException e) { + System.err.println("爬取异常:" + e.getMessage()); + if (e.getCause() != null) { + System.err.println("原因:" + e.getCause().getMessage()); + } + System.exit(1); + } + } +} diff --git a/src/main/java/com/scraper/command/CrawlAllCommand.java b/src/main/java/com/scraper/command/CrawlAllCommand.java new file mode 100644 index 0000000..9f59f64 --- /dev/null +++ b/src/main/java/com/scraper/command/CrawlAllCommand.java @@ -0,0 +1,45 @@ +package com.scraper.command; + +import com.scraper.exception.CrawlerException; +import com.scraper.model.Book; +import com.scraper.model.Country; +import com.scraper.model.Quote; +import com.scraper.strategy.SiteABooksStrategy; +import com.scraper.strategy.SiteBQuotesStrategy; +import com.scraper.strategy.SiteCCountriesStrategy; +import com.scraper.view.ConsoleView; +import com.scraper.view.FileSaver; +import java.util.List; + +public class CrawlAllCommand implements CrawlerCommand { + private SiteABooksStrategy booksStrategy; + private SiteBQuotesStrategy quotesStrategy; + private SiteCCountriesStrategy countriesStrategy; + private String outputDir; + + public CrawlAllCommand(SiteABooksStrategy booksStrategy, SiteBQuotesStrategy quotesStrategy, SiteCCountriesStrategy countriesStrategy, String outputDir) { + this.booksStrategy = booksStrategy; + this.quotesStrategy = quotesStrategy; + this.countriesStrategy = countriesStrategy; + this.outputDir = outputDir; + } + + public CrawlAllCommand(SiteABooksStrategy booksStrategy, SiteBQuotesStrategy quotesStrategy, SiteCCountriesStrategy countriesStrategy) { + this(booksStrategy, quotesStrategy, countriesStrategy, "./output"); + } + + @Override + public void execute() throws CrawlerException { + List books = booksStrategy.crawl("http://books.toscrape.com"); + ConsoleView.printBooks(books); + FileSaver.saveToJson(books, outputDir + "/books.json"); + + List quotes = quotesStrategy.crawl("http://quotes.toscrape.com"); + ConsoleView.printQuotes(quotes); + FileSaver.saveToJson(quotes, outputDir + "/quotes.json"); + + List countries = countriesStrategy.crawl("https://www.scrapethissite.com/pages/simple/"); + ConsoleView.printCountries(countries); + FileSaver.saveToJson(countries, outputDir + "/countries.json"); + } +} \ No newline at end of file diff --git a/src/main/java/com/scraper/command/CrawlBooksCommand.java b/src/main/java/com/scraper/command/CrawlBooksCommand.java new file mode 100644 index 0000000..e6dcd67 --- /dev/null +++ b/src/main/java/com/scraper/command/CrawlBooksCommand.java @@ -0,0 +1,29 @@ +package com.scraper.command; + +import com.scraper.exception.CrawlerException; +import com.scraper.model.Book; +import com.scraper.strategy.SiteABooksStrategy; +import com.scraper.view.ConsoleView; +import com.scraper.view.FileSaver; +import java.util.List; + +public class CrawlBooksCommand implements CrawlerCommand { + private SiteABooksStrategy strategy; + private String outputDir; + + public CrawlBooksCommand(SiteABooksStrategy strategy, String outputDir) { + this.strategy = strategy; + this.outputDir = outputDir; + } + + public CrawlBooksCommand(SiteABooksStrategy strategy) { + this(strategy, "./output"); + } + + @Override + public void execute() throws CrawlerException { + List books = strategy.crawl("http://books.toscrape.com"); + ConsoleView.printBooks(books); + FileSaver.saveToJson(books, outputDir + "/books.json"); + } +} \ No newline at end of file diff --git a/src/main/java/com/scraper/command/CrawlCountriesCommand.java b/src/main/java/com/scraper/command/CrawlCountriesCommand.java new file mode 100644 index 0000000..5a3811a --- /dev/null +++ b/src/main/java/com/scraper/command/CrawlCountriesCommand.java @@ -0,0 +1,29 @@ +package com.scraper.command; + +import com.scraper.exception.CrawlerException; +import com.scraper.model.Country; +import com.scraper.strategy.SiteCCountriesStrategy; +import com.scraper.view.ConsoleView; +import com.scraper.view.FileSaver; +import java.util.List; + +public class CrawlCountriesCommand implements CrawlerCommand { + private SiteCCountriesStrategy strategy; + private String outputDir; + + public CrawlCountriesCommand(SiteCCountriesStrategy strategy, String outputDir) { + this.strategy = strategy; + this.outputDir = outputDir; + } + + public CrawlCountriesCommand(SiteCCountriesStrategy strategy) { + this(strategy, "./output"); + } + + @Override + public void execute() throws CrawlerException { + List countries = strategy.crawl("https://www.scrapethissite.com/pages/simple/"); + ConsoleView.printCountries(countries); + FileSaver.saveToJson(countries, outputDir + "/countries.json"); + } +} \ No newline at end of file diff --git a/src/main/java/com/scraper/command/CrawlQuotesCommand.java b/src/main/java/com/scraper/command/CrawlQuotesCommand.java new file mode 100644 index 0000000..2e4dd6b --- /dev/null +++ b/src/main/java/com/scraper/command/CrawlQuotesCommand.java @@ -0,0 +1,29 @@ +package com.scraper.command; + +import com.scraper.exception.CrawlerException; +import com.scraper.model.Quote; +import com.scraper.strategy.SiteBQuotesStrategy; +import com.scraper.view.ConsoleView; +import com.scraper.view.FileSaver; +import java.util.List; + +public class CrawlQuotesCommand implements CrawlerCommand { + private SiteBQuotesStrategy strategy; + private String outputDir; + + public CrawlQuotesCommand(SiteBQuotesStrategy strategy, String outputDir) { + this.strategy = strategy; + this.outputDir = outputDir; + } + + public CrawlQuotesCommand(SiteBQuotesStrategy strategy) { + this(strategy, "./output"); + } + + @Override + public void execute() throws CrawlerException { + List quotes = strategy.crawl("http://quotes.toscrape.com"); + ConsoleView.printQuotes(quotes); + FileSaver.saveToJson(quotes, outputDir + "/quotes.json"); + } +} \ No newline at end of file diff --git a/src/main/java/com/scraper/command/CrawlerCommand.java b/src/main/java/com/scraper/command/CrawlerCommand.java new file mode 100644 index 0000000..282940e --- /dev/null +++ b/src/main/java/com/scraper/command/CrawlerCommand.java @@ -0,0 +1,7 @@ +package com.scraper.command; + +import com.scraper.exception.CrawlerException; + +public interface CrawlerCommand { + void execute() throws CrawlerException; +} \ No newline at end of file diff --git a/src/main/java/com/scraper/exception/CrawlerException.java b/src/main/java/com/scraper/exception/CrawlerException.java new file mode 100644 index 0000000..677702f --- /dev/null +++ b/src/main/java/com/scraper/exception/CrawlerException.java @@ -0,0 +1,7 @@ +package com.scraper.exception; + +public abstract class CrawlerException extends Exception { + public CrawlerException(String message, Throwable cause) { + super(message, cause); + } +} \ No newline at end of file diff --git a/src/main/java/com/scraper/exception/NetworkException.java b/src/main/java/com/scraper/exception/NetworkException.java new file mode 100644 index 0000000..432e072 --- /dev/null +++ b/src/main/java/com/scraper/exception/NetworkException.java @@ -0,0 +1,7 @@ +package com.scraper.exception; + +public class NetworkException extends CrawlerException { + public NetworkException(String message, Throwable cause) { + super(message, cause); + } +} \ No newline at end of file diff --git a/src/main/java/com/scraper/exception/ParseException.java b/src/main/java/com/scraper/exception/ParseException.java new file mode 100644 index 0000000..4a311d2 --- /dev/null +++ b/src/main/java/com/scraper/exception/ParseException.java @@ -0,0 +1,7 @@ +package com.scraper.exception; + +public class ParseException extends CrawlerException { + public ParseException(String message, Throwable cause) { + super(message, cause); + } +} \ No newline at end of file diff --git a/src/main/java/com/scraper/exception/StorageException.java b/src/main/java/com/scraper/exception/StorageException.java new file mode 100644 index 0000000..b190a21 --- /dev/null +++ b/src/main/java/com/scraper/exception/StorageException.java @@ -0,0 +1,7 @@ +package com.scraper.exception; + +public class StorageException extends CrawlerException { + public StorageException(String message, Throwable cause) { + super(message, cause); + } +} \ No newline at end of file diff --git a/src/main/java/com/scraper/model/Book.java b/src/main/java/com/scraper/model/Book.java new file mode 100644 index 0000000..fd9ba0c --- /dev/null +++ b/src/main/java/com/scraper/model/Book.java @@ -0,0 +1,24 @@ +package com.scraper.model; + +public class Book { + private String title; + private String price; + + public Book(String title, String price) { + this.title = title; + this.price = price; + } + + public String getTitle() { + return title; + } + + public String getPrice() { + return price; + } + + @Override + public String toString() { + return "Book{title='" + title + "', price='" + price + "'}"; + } +} \ No newline at end of file diff --git a/src/main/java/com/scraper/model/Country.java b/src/main/java/com/scraper/model/Country.java new file mode 100644 index 0000000..60249ac --- /dev/null +++ b/src/main/java/com/scraper/model/Country.java @@ -0,0 +1,30 @@ +package com.scraper.model; + +public class Country { + private String name; + private String capital; + private String population; + + public Country(String name, String capital, String population) { + this.name = name; + this.capital = capital; + this.population = population; + } + + public String getName() { + return name; + } + + public String getCapital() { + return capital; + } + + public String getPopulation() { + return population; + } + + @Override + public String toString() { + return "Country{name='" + name + "', capital='" + capital + "', population='" + population + "'}"; + } +} \ No newline at end of file diff --git a/src/main/java/com/scraper/model/Quote.java b/src/main/java/com/scraper/model/Quote.java new file mode 100644 index 0000000..c684349 --- /dev/null +++ b/src/main/java/com/scraper/model/Quote.java @@ -0,0 +1,24 @@ +package com.scraper.model; + +public class Quote { + private String text; + private String author; + + public Quote(String text, String author) { + this.text = text; + this.author = author; + } + + public String getText() { + return text; + } + + public String getAuthor() { + return author; + } + + @Override + public String toString() { + return "Quote{text='" + text + "', author='" + author + "'}"; + } +} \ No newline at end of file diff --git a/src/main/java/com/scraper/strategy/CrawlStrategy.java b/src/main/java/com/scraper/strategy/CrawlStrategy.java new file mode 100644 index 0000000..93e4a95 --- /dev/null +++ b/src/main/java/com/scraper/strategy/CrawlStrategy.java @@ -0,0 +1,8 @@ +package com.scraper.strategy; + +import com.scraper.exception.CrawlerException; +import java.util.List; + +public interface CrawlStrategy { + List crawl(String url) throws CrawlerException; +} \ No newline at end of file diff --git a/src/main/java/com/scraper/strategy/SiteABooksStrategy.java b/src/main/java/com/scraper/strategy/SiteABooksStrategy.java new file mode 100644 index 0000000..664e30d --- /dev/null +++ b/src/main/java/com/scraper/strategy/SiteABooksStrategy.java @@ -0,0 +1,51 @@ +package com.scraper.strategy; + +import com.scraper.exception.CrawlerException; +import com.scraper.exception.NetworkException; +import com.scraper.exception.ParseException; +import com.scraper.model.Book; +import org.apache.hc.client5.http.classic.methods.HttpGet; +import org.apache.hc.client5.http.impl.classic.CloseableHttpClient; +import org.apache.hc.client5.http.impl.classic.CloseableHttpResponse; +import org.apache.hc.client5.http.impl.classic.HttpClients; + +import org.apache.hc.core5.http.io.entity.EntityUtils; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +public class SiteABooksStrategy implements CrawlStrategy { + @Override + public List crawl(String url) throws CrawlerException { + System.out.println("正在爬取 [http://books.toscrape.com]..."); + List books = new ArrayList<>(); + + try (CloseableHttpClient httpClient = HttpClients.createDefault()) { + HttpGet httpGet = new HttpGet(url); + + try (CloseableHttpResponse response = httpClient.execute(httpGet)) { + String html = EntityUtils.toString(response.getEntity()); + Document doc = Jsoup.parse(html); + Elements productPods = doc.select(".product_pod"); + + for (Element pod : productPods) { + String title = pod.select("h3 > a").attr("title"); + String priceText = pod.select(".price_color").text(); + String price = priceText.replace("£", ""); + books.add(new Book(title, price)); + } + } catch (org.apache.hc.core5.http.ParseException e) { + throw new ParseException("解析响应内容失败", e); + } + } catch (IOException e) { + throw new NetworkException("网络请求失败", e); + } + + return books; + } +} \ No newline at end of file diff --git a/src/main/java/com/scraper/strategy/SiteBQuotesStrategy.java b/src/main/java/com/scraper/strategy/SiteBQuotesStrategy.java new file mode 100644 index 0000000..96ffd15 --- /dev/null +++ b/src/main/java/com/scraper/strategy/SiteBQuotesStrategy.java @@ -0,0 +1,50 @@ +package com.scraper.strategy; + +import com.scraper.exception.CrawlerException; +import com.scraper.exception.NetworkException; +import com.scraper.exception.ParseException; +import com.scraper.model.Quote; +import org.apache.hc.client5.http.classic.methods.HttpGet; +import org.apache.hc.client5.http.impl.classic.CloseableHttpClient; +import org.apache.hc.client5.http.impl.classic.CloseableHttpResponse; +import org.apache.hc.client5.http.impl.classic.HttpClients; + +import org.apache.hc.core5.http.io.entity.EntityUtils; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +public class SiteBQuotesStrategy implements CrawlStrategy { + @Override + public List crawl(String url) throws CrawlerException { + System.out.println("正在爬取 [http://quotes.toscrape.com]..."); + List quotes = new ArrayList<>(); + + try (CloseableHttpClient httpClient = HttpClients.createDefault()) { + HttpGet httpGet = new HttpGet(url); + + try (CloseableHttpResponse response = httpClient.execute(httpGet)) { + String html = EntityUtils.toString(response.getEntity()); + Document doc = Jsoup.parse(html); + Elements quoteElements = doc.select(".quote"); + + for (Element quoteEl : quoteElements) { + String text = quoteEl.select(".text").text(); + String author = quoteEl.select(".author").text(); + quotes.add(new Quote(text, author)); + } + } catch (org.apache.hc.core5.http.ParseException e) { + throw new ParseException("解析响应内容失败", e); + } + } catch (IOException e) { + throw new NetworkException("网络请求失败", e); + } + + return quotes; + } +} \ No newline at end of file diff --git a/src/main/java/com/scraper/strategy/SiteCCountriesStrategy.java b/src/main/java/com/scraper/strategy/SiteCCountriesStrategy.java new file mode 100644 index 0000000..f1d920c --- /dev/null +++ b/src/main/java/com/scraper/strategy/SiteCCountriesStrategy.java @@ -0,0 +1,51 @@ +package com.scraper.strategy; + +import com.scraper.exception.CrawlerException; +import com.scraper.exception.NetworkException; +import com.scraper.exception.ParseException; +import com.scraper.model.Country; +import org.apache.hc.client5.http.classic.methods.HttpGet; +import org.apache.hc.client5.http.impl.classic.CloseableHttpClient; +import org.apache.hc.client5.http.impl.classic.CloseableHttpResponse; +import org.apache.hc.client5.http.impl.classic.HttpClients; + +import org.apache.hc.core5.http.io.entity.EntityUtils; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +public class SiteCCountriesStrategy implements CrawlStrategy { + @Override + public List crawl(String url) throws CrawlerException { + System.out.println("正在爬取 [https://www.scrapethissite.com/pages/simple/]..."); + List countries = new ArrayList<>(); + + try (CloseableHttpClient httpClient = HttpClients.createDefault()) { + HttpGet httpGet = new HttpGet(url); + + try (CloseableHttpResponse response = httpClient.execute(httpGet)) { + String html = EntityUtils.toString(response.getEntity()); + Document doc = Jsoup.parse(html); + Elements countryElements = doc.select(".country"); + + for (Element countryEl : countryElements) { + String name = countryEl.select(".country-name").text().trim(); + String capital = countryEl.select(".country-capital").text().trim(); + String population = countryEl.select(".country-population").text().trim(); + countries.add(new Country(name, capital, population)); + } + } catch (org.apache.hc.core5.http.ParseException e) { + throw new ParseException("解析响应内容失败", e); + } + } catch (IOException e) { + throw new NetworkException("网络请求失败", e); + } + + return countries; + } +} \ No newline at end of file diff --git a/src/main/java/com/scraper/view/ConsoleView.java b/src/main/java/com/scraper/view/ConsoleView.java new file mode 100644 index 0000000..e1bf876 --- /dev/null +++ b/src/main/java/com/scraper/view/ConsoleView.java @@ -0,0 +1,26 @@ +package com.scraper.view; + +import com.scraper.model.Book; +import com.scraper.model.Country; +import com.scraper.model.Quote; +import java.util.List; + +public class ConsoleView { + public static void printBooks(List books) { + for (Book book : books) { + System.out.println("书名: 《" + book.getTitle() + "》, 价格: £" + book.getPrice()); + } + } + + public static void printQuotes(List quotes) { + for (Quote quote : quotes) { + System.out.println("\"" + quote.getText() + "\" —— " + quote.getAuthor()); + } + } + + public static void printCountries(List countries) { + for (Country country : countries) { + System.out.println("国家: " + country.getName() + ", 首都: " + country.getCapital() + ", 人口: " + country.getPopulation()); + } + } +} \ No newline at end of file diff --git a/src/main/java/com/scraper/view/FileSaver.java b/src/main/java/com/scraper/view/FileSaver.java new file mode 100644 index 0000000..4c42089 --- /dev/null +++ b/src/main/java/com/scraper/view/FileSaver.java @@ -0,0 +1,31 @@ +package com.scraper.view; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.scraper.exception.StorageException; +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.List; +import java.util.function.Function; + +public class FileSaver { + public static void saveToJson(Object data, String filePath) throws StorageException { + try { + Path path = Paths.get(filePath); + Path parentDir = path.getParent(); + if (parentDir != null) { + Files.createDirectories(parentDir); + } + ObjectMapper mapper = new ObjectMapper(); + mapper.writerWithDefaultPrettyPrinter().writeValue(new File(filePath), data); + } catch (IOException e) { + throw new StorageException("无法写入 JSON 文件: " + filePath, e); + } + } + + public static void saveToCsv(List items, String filePath, String[] headers, Function rowMapper) throws StorageException { + throw new UnsupportedOperationException("CSV 保存功能暂未实现"); + } +} \ No newline at end of file